diff --git a/src/gallium/drivers/panfrost/pan_blend_cso.h b/src/gallium/drivers/panfrost/pan_blend_cso.h
index dd6c4a58c12..bf3c53d1afe 100644
--- a/src/gallium/drivers/panfrost/pan_blend_cso.h
+++ b/src/gallium/drivers/panfrost/pan_blend_cso.h
@@ -36,26 +36,26 @@
 struct panfrost_bo;
 
 struct pan_blend_info {
-        unsigned constant_mask : 4;
-        bool fixed_function : 1;
-        bool enabled : 1;
-        bool load_dest : 1;
-        bool opaque : 1;
-        bool alpha_zero_nop : 1;
-        bool alpha_one_store : 1;
+   unsigned constant_mask : 4;
+   bool fixed_function    : 1;
+   bool enabled           : 1;
+   bool load_dest         : 1;
+   bool opaque            : 1;
+   bool alpha_zero_nop    : 1;
+   bool alpha_one_store   : 1;
 };
 
 struct panfrost_blend_state {
-        struct pipe_blend_state base;
-        struct pan_blend_state pan;
-        struct pan_blend_info info[PIPE_MAX_COLOR_BUFS];
-        uint32_t equation[PIPE_MAX_COLOR_BUFS];
+   struct pipe_blend_state base;
+   struct pan_blend_state pan;
+   struct pan_blend_info info[PIPE_MAX_COLOR_BUFS];
+   uint32_t equation[PIPE_MAX_COLOR_BUFS];
 
-        /* info.load presented as a bitfield for draw call hot paths */
-        unsigned load_dest_mask : PIPE_MAX_COLOR_BUFS;
+   /* info.load presented as a bitfield for draw call hot paths */
+   unsigned load_dest_mask : PIPE_MAX_COLOR_BUFS;
 };
 
-mali_ptr
-panfrost_get_blend(struct panfrost_batch *batch, unsigned rt, struct panfrost_bo **bo, unsigned *shader_offset);
+mali_ptr panfrost_get_blend(struct panfrost_batch *batch, unsigned rt,
+                            struct panfrost_bo **bo, unsigned *shader_offset);
 
 #endif
diff --git a/src/gallium/drivers/panfrost/pan_blit.c b/src/gallium/drivers/panfrost/pan_blit.c
index 7f059bd4aa4..190bab39574 100644
--- a/src/gallium/drivers/panfrost/pan_blit.c
+++ b/src/gallium/drivers/panfrost/pan_blit.c
@@ -27,59 +27,58 @@
  *
  */
 
+#include "util/format/u_format.h"
 #include "pan_context.h"
 #include "pan_util.h"
-#include "util/format/u_format.h"
 
 void
 panfrost_blitter_save(struct panfrost_context *ctx, bool render_cond)
 {
-        struct blitter_context *blitter = ctx->blitter;
+   struct blitter_context *blitter = ctx->blitter;
 
-        util_blitter_save_vertex_buffer_slot(blitter, ctx->vertex_buffers);
-        util_blitter_save_vertex_elements(blitter, ctx->vertex);
-        util_blitter_save_vertex_shader(blitter, ctx->uncompiled[PIPE_SHADER_VERTEX]);
-        util_blitter_save_rasterizer(blitter, ctx->rasterizer);
-        util_blitter_save_viewport(blitter, &ctx->pipe_viewport);
-        util_blitter_save_scissor(blitter, &ctx->scissor);
-        util_blitter_save_fragment_shader(blitter, ctx->uncompiled[PIPE_SHADER_FRAGMENT]);
-        util_blitter_save_blend(blitter, ctx->blend);
-        util_blitter_save_depth_stencil_alpha(blitter, ctx->depth_stencil);
-        util_blitter_save_stencil_ref(blitter, &ctx->stencil_ref);
-        util_blitter_save_so_targets(blitter, 0, NULL);
-        util_blitter_save_sample_mask(blitter, ctx->sample_mask, ctx->min_samples);
+   util_blitter_save_vertex_buffer_slot(blitter, ctx->vertex_buffers);
+   util_blitter_save_vertex_elements(blitter, ctx->vertex);
+   util_blitter_save_vertex_shader(blitter,
+                                   ctx->uncompiled[PIPE_SHADER_VERTEX]);
+   util_blitter_save_rasterizer(blitter, ctx->rasterizer);
+   util_blitter_save_viewport(blitter, &ctx->pipe_viewport);
+   util_blitter_save_scissor(blitter, &ctx->scissor);
+   util_blitter_save_fragment_shader(blitter,
+                                     ctx->uncompiled[PIPE_SHADER_FRAGMENT]);
+   util_blitter_save_blend(blitter, ctx->blend);
+   util_blitter_save_depth_stencil_alpha(blitter, ctx->depth_stencil);
+   util_blitter_save_stencil_ref(blitter, &ctx->stencil_ref);
+   util_blitter_save_so_targets(blitter, 0, NULL);
+   util_blitter_save_sample_mask(blitter, ctx->sample_mask, ctx->min_samples);
 
-        util_blitter_save_framebuffer(blitter, &ctx->pipe_framebuffer);
-        util_blitter_save_fragment_sampler_states(blitter,
-                        ctx->sampler_count[PIPE_SHADER_FRAGMENT],
-                        (void **)(&ctx->samplers[PIPE_SHADER_FRAGMENT]));
-        util_blitter_save_fragment_sampler_views(blitter,
-                        ctx->sampler_view_count[PIPE_SHADER_FRAGMENT],
-                        (struct pipe_sampler_view **)&ctx->sampler_views[PIPE_SHADER_FRAGMENT]);
-        util_blitter_save_fragment_constant_buffer_slot(blitter,
-                        ctx->constant_buffer[PIPE_SHADER_FRAGMENT].cb);
-
-        if (!render_cond) {
-                util_blitter_save_render_condition(blitter,
-                                (struct pipe_query *) ctx->cond_query,
-                                ctx->cond_cond, ctx->cond_mode);
-        }
+   util_blitter_save_framebuffer(blitter, &ctx->pipe_framebuffer);
+   util_blitter_save_fragment_sampler_states(
+      blitter, ctx->sampler_count[PIPE_SHADER_FRAGMENT],
+      (void **)(&ctx->samplers[PIPE_SHADER_FRAGMENT]));
+   util_blitter_save_fragment_sampler_views(
+      blitter, ctx->sampler_view_count[PIPE_SHADER_FRAGMENT],
+      (struct pipe_sampler_view **)&ctx->sampler_views[PIPE_SHADER_FRAGMENT]);
+   util_blitter_save_fragment_constant_buffer_slot(
+      blitter, ctx->constant_buffer[PIPE_SHADER_FRAGMENT].cb);
 
+   if (!render_cond) {
+      util_blitter_save_render_condition(blitter,
+                                         (struct pipe_query *)ctx->cond_query,
+                                         ctx->cond_cond, ctx->cond_mode);
+   }
 }
 
 void
-panfrost_blit(struct pipe_context *pipe,
-              const struct pipe_blit_info *info)
+panfrost_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
 {
-        struct panfrost_context *ctx = pan_context(pipe);
+   struct panfrost_context *ctx = pan_context(pipe);
 
-        if (info->render_condition_enable &&
-            !panfrost_render_condition_check(ctx))
-                return;
+   if (info->render_condition_enable && !panfrost_render_condition_check(ctx))
+      return;
 
-        if (!util_blitter_is_blit_supported(ctx->blitter, info))
-                unreachable("Unsupported blit\n");
+   if (!util_blitter_is_blit_supported(ctx->blitter, info))
+      unreachable("Unsupported blit\n");
 
-        panfrost_blitter_save(ctx, info->render_condition_enable);
-        util_blitter_blit(ctx->blitter, info);
+   panfrost_blitter_save(ctx, info->render_condition_enable);
+   util_blitter_blit(ctx->blitter, info);
 }
diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c
index c082687e969..53534de041b 100644
--- a/src/gallium/drivers/panfrost/pan_cmdstream.c
+++ b/src/gallium/drivers/panfrost/pan_cmdstream.c
@@ -23,158 +23,170 @@
  * SOFTWARE.
  */
 
-#include "util/macros.h"
-#include "util/u_prim.h"
-#include "util/u_vbuf.h"
-#include "util/u_helpers.h"
-#include "util/u_draw.h"
-#include "util/u_memory.h"
-#include "util/u_viewport.h"
+#include "gallium/auxiliary/util/u_blend.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_state.h"
-#include "gallium/auxiliary/util/u_blend.h"
+#include "util/macros.h"
+#include "util/u_draw.h"
+#include "util/u_helpers.h"
+#include "util/u_memory.h"
+#include "util/u_prim.h"
+#include "util/u_vbuf.h"
+#include "util/u_viewport.h"
 
 #include "genxml/gen_macros.h"
 
-#include "pan_pool.h"
-#include "pan_bo.h"
 #include "pan_blend.h"
+#include "pan_blitter.h"
+#include "pan_bo.h"
 #include "pan_context.h"
+#include "pan_indirect_dispatch.h"
 #include "pan_job.h"
+#include "pan_pool.h"
 #include "pan_shader.h"
 #include "pan_texture.h"
 #include "pan_util.h"
-#include "pan_indirect_dispatch.h"
-#include "pan_blitter.h"
 
 #define PAN_GPU_INDIRECTS (PAN_ARCH == 7)
 
 struct panfrost_rasterizer {
-        struct pipe_rasterizer_state base;
+   struct pipe_rasterizer_state base;
 
 #if PAN_ARCH <= 7
-        /* Partially packed RSD words */
-        struct mali_multisample_misc_packed multisample;
-        struct mali_stencil_mask_misc_packed stencil_misc;
+   /* Partially packed RSD words */
+   struct mali_multisample_misc_packed multisample;
+   struct mali_stencil_mask_misc_packed stencil_misc;
 #endif
 };
 
 struct panfrost_zsa_state {
-        struct pipe_depth_stencil_alpha_state base;
+   struct pipe_depth_stencil_alpha_state base;
 
-        /* Is any depth, stencil, or alpha testing enabled? */
-        bool enabled;
+   /* Is any depth, stencil, or alpha testing enabled? */
+   bool enabled;
 
-        /* Does the depth and stencil tests always pass? This ignores write
-         * masks, we are only interested in whether pixels may be killed.
-         */
-        bool zs_always_passes;
+   /* Does the depth and stencil tests always pass? This ignores write
+    * masks, we are only interested in whether pixels may be killed.
+    */
+   bool zs_always_passes;
 
-        /* Are depth or stencil writes possible? */
-        bool writes_zs;
+   /* Are depth or stencil writes possible? */
+   bool writes_zs;
 
 #if PAN_ARCH <= 7
-        /* Prepacked words from the RSD */
-        struct mali_multisample_misc_packed rsd_depth;
-        struct mali_stencil_mask_misc_packed rsd_stencil;
-        struct mali_stencil_packed stencil_front, stencil_back;
+   /* Prepacked words from the RSD */
+   struct mali_multisample_misc_packed rsd_depth;
+   struct mali_stencil_mask_misc_packed rsd_stencil;
+   struct mali_stencil_packed stencil_front, stencil_back;
 #else
-        /* Depth/stencil descriptor template */
-        struct mali_depth_stencil_packed desc;
+   /* Depth/stencil descriptor template */
+   struct mali_depth_stencil_packed desc;
 #endif
 };
 
 struct panfrost_sampler_state {
-        struct pipe_sampler_state base;
-        struct mali_sampler_packed hw;
+   struct pipe_sampler_state base;
+   struct mali_sampler_packed hw;
 };
 
 /* Misnomer: Sampler view corresponds to textures, not samplers */
 
 struct panfrost_sampler_view {
-        struct pipe_sampler_view base;
-        struct panfrost_pool_ref state;
-        struct mali_texture_packed bifrost_descriptor;
-        mali_ptr texture_bo;
-        uint64_t modifier;
+   struct pipe_sampler_view base;
+   struct panfrost_pool_ref state;
+   struct mali_texture_packed bifrost_descriptor;
+   mali_ptr texture_bo;
+   uint64_t modifier;
 
-        /* Pool used to allocate the descriptor. If NULL, defaults to the global
-         * descriptor pool. Can be set for short lived descriptors, useful for
-         * shader images on Valhall.
-         */
-        struct panfrost_pool *pool;
+   /* Pool used to allocate the descriptor. If NULL, defaults to the global
+    * descriptor pool. Can be set for short lived descriptors, useful for
+    * shader images on Valhall.
+    */
+   struct panfrost_pool *pool;
 };
 
 struct panfrost_vertex_state {
-        unsigned num_elements;
-        struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS];
+   unsigned num_elements;
+   struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS];
 
 #if PAN_ARCH >= 9
-        /* Packed attribute descriptor. All fields are set at CSO create time
-         * except for stride, which must be ORed in at draw time
-         */
-        struct mali_attribute_packed attributes[PIPE_MAX_ATTRIBS];
+   /* Packed attribute descriptor. All fields are set at CSO create time
+    * except for stride, which must be ORed in at draw time
+    */
+   struct mali_attribute_packed attributes[PIPE_MAX_ATTRIBS];
 #else
-        /* buffers corresponds to attribute buffer, element_buffers corresponds
-         * to an index in buffers for each vertex element */
-        struct pan_vertex_buffer buffers[PIPE_MAX_ATTRIBS];
-        unsigned element_buffer[PIPE_MAX_ATTRIBS];
-        unsigned nr_bufs;
+   /* buffers corresponds to attribute buffer, element_buffers corresponds
+    * to an index in buffers for each vertex element */
+   struct pan_vertex_buffer buffers[PIPE_MAX_ATTRIBS];
+   unsigned element_buffer[PIPE_MAX_ATTRIBS];
+   unsigned nr_bufs;
 
-        unsigned formats[PIPE_MAX_ATTRIBS];
+   unsigned formats[PIPE_MAX_ATTRIBS];
 #endif
 };
 
 /* Statically assert that PIPE_* enums match the hardware enums.
  * (As long as they match, we don't need to translate them.)
  */
-static_assert((int)PIPE_FUNC_NEVER    == MALI_FUNC_NEVER,     "must match");
-static_assert((int)PIPE_FUNC_LESS     == MALI_FUNC_LESS,      "must match");
-static_assert((int)PIPE_FUNC_EQUAL    == MALI_FUNC_EQUAL,     "must match");
-static_assert((int)PIPE_FUNC_LEQUAL   == MALI_FUNC_LEQUAL,    "must match");
-static_assert((int)PIPE_FUNC_GREATER  == MALI_FUNC_GREATER,   "must match");
+static_assert((int)PIPE_FUNC_NEVER == MALI_FUNC_NEVER, "must match");
+static_assert((int)PIPE_FUNC_LESS == MALI_FUNC_LESS, "must match");
+static_assert((int)PIPE_FUNC_EQUAL == MALI_FUNC_EQUAL, "must match");
+static_assert((int)PIPE_FUNC_LEQUAL == MALI_FUNC_LEQUAL, "must match");
+static_assert((int)PIPE_FUNC_GREATER == MALI_FUNC_GREATER, "must match");
 static_assert((int)PIPE_FUNC_NOTEQUAL == MALI_FUNC_NOT_EQUAL, "must match");
-static_assert((int)PIPE_FUNC_GEQUAL   == MALI_FUNC_GEQUAL,    "must match");
-static_assert((int)PIPE_FUNC_ALWAYS   == MALI_FUNC_ALWAYS,    "must match");
+static_assert((int)PIPE_FUNC_GEQUAL == MALI_FUNC_GEQUAL, "must match");
+static_assert((int)PIPE_FUNC_ALWAYS == MALI_FUNC_ALWAYS, "must match");
 
 static inline enum mali_sample_pattern
 panfrost_sample_pattern(unsigned samples)
 {
-        switch (samples) {
-        case 1:  return MALI_SAMPLE_PATTERN_SINGLE_SAMPLED;
-        case 4:  return MALI_SAMPLE_PATTERN_ROTATED_4X_GRID;
-        case 8:  return MALI_SAMPLE_PATTERN_D3D_8X_GRID;
-        case 16: return MALI_SAMPLE_PATTERN_D3D_16X_GRID;
-        default: unreachable("Unsupported sample count");
-        }
+   switch (samples) {
+   case 1:
+      return MALI_SAMPLE_PATTERN_SINGLE_SAMPLED;
+   case 4:
+      return MALI_SAMPLE_PATTERN_ROTATED_4X_GRID;
+   case 8:
+      return MALI_SAMPLE_PATTERN_D3D_8X_GRID;
+   case 16:
+      return MALI_SAMPLE_PATTERN_D3D_16X_GRID;
+   default:
+      unreachable("Unsupported sample count");
+   }
 }
 
 static unsigned
 translate_tex_wrap(enum pipe_tex_wrap w, bool using_nearest)
 {
-        /* CLAMP is only supported on Midgard, where it is broken for nearest
-         * filtering. Use CLAMP_TO_EDGE in that case.
-         */
+   /* CLAMP is only supported on Midgard, where it is broken for nearest
+    * filtering. Use CLAMP_TO_EDGE in that case.
+    */
 
-        switch (w) {
-        case PIPE_TEX_WRAP_REPEAT: return MALI_WRAP_MODE_REPEAT;
-        case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return MALI_WRAP_MODE_CLAMP_TO_EDGE;
-        case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return MALI_WRAP_MODE_CLAMP_TO_BORDER;
-        case PIPE_TEX_WRAP_MIRROR_REPEAT: return MALI_WRAP_MODE_MIRRORED_REPEAT;
-        case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE;
-        case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_BORDER;
+   switch (w) {
+   case PIPE_TEX_WRAP_REPEAT:
+      return MALI_WRAP_MODE_REPEAT;
+   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+      return MALI_WRAP_MODE_CLAMP_TO_EDGE;
+   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+      return MALI_WRAP_MODE_CLAMP_TO_BORDER;
+   case PIPE_TEX_WRAP_MIRROR_REPEAT:
+      return MALI_WRAP_MODE_MIRRORED_REPEAT;
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+      return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE;
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+      return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_BORDER;
 
 #if PAN_ARCH <= 5
-        case PIPE_TEX_WRAP_CLAMP:
-                return using_nearest ? MALI_WRAP_MODE_CLAMP_TO_EDGE :
-                                       MALI_WRAP_MODE_CLAMP;
-        case PIPE_TEX_WRAP_MIRROR_CLAMP:
-                return using_nearest ? MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE :
-                                       MALI_WRAP_MODE_MIRRORED_CLAMP;
+   case PIPE_TEX_WRAP_CLAMP:
+      return using_nearest ? MALI_WRAP_MODE_CLAMP_TO_EDGE
+                           : MALI_WRAP_MODE_CLAMP;
+   case PIPE_TEX_WRAP_MIRROR_CLAMP:
+      return using_nearest ? MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE
+                           : MALI_WRAP_MODE_MIRRORED_CLAMP;
 #endif
 
-        default: unreachable("Invalid wrap");
-        }
+   default:
+      unreachable("Invalid wrap");
+   }
 }
 
 /* The hardware compares in the wrong order order, so we have to flip before
@@ -183,121 +195,123 @@ translate_tex_wrap(enum pipe_tex_wrap w, bool using_nearest)
 static enum mali_func
 panfrost_sampler_compare_func(const struct pipe_sampler_state *cso)
 {
-        return !cso->compare_mode ? MALI_FUNC_NEVER :
-                panfrost_flip_compare_func((enum mali_func) cso->compare_func);
+   return !cso->compare_mode
+             ? MALI_FUNC_NEVER
+             : panfrost_flip_compare_func((enum mali_func)cso->compare_func);
 }
 
 static enum mali_mipmap_mode
 pan_pipe_to_mipmode(enum pipe_tex_mipfilter f)
 {
-        switch (f) {
-        case PIPE_TEX_MIPFILTER_NEAREST: return MALI_MIPMAP_MODE_NEAREST;
-        case PIPE_TEX_MIPFILTER_LINEAR: return MALI_MIPMAP_MODE_TRILINEAR;
+   switch (f) {
+   case PIPE_TEX_MIPFILTER_NEAREST:
+      return MALI_MIPMAP_MODE_NEAREST;
+   case PIPE_TEX_MIPFILTER_LINEAR:
+      return MALI_MIPMAP_MODE_TRILINEAR;
 #if PAN_ARCH >= 6
-        case PIPE_TEX_MIPFILTER_NONE: return MALI_MIPMAP_MODE_NONE;
+   case PIPE_TEX_MIPFILTER_NONE:
+      return MALI_MIPMAP_MODE_NONE;
 #else
-        case PIPE_TEX_MIPFILTER_NONE: return MALI_MIPMAP_MODE_NEAREST;
+   case PIPE_TEX_MIPFILTER_NONE:
+      return MALI_MIPMAP_MODE_NEAREST;
 #endif
-        default: unreachable("Invalid");
-        }
+   default:
+      unreachable("Invalid");
+   }
 }
 
-
 static void *
-panfrost_create_sampler_state(
-        struct pipe_context *pctx,
-        const struct pipe_sampler_state *cso)
+panfrost_create_sampler_state(struct pipe_context *pctx,
+                              const struct pipe_sampler_state *cso)
 {
-        struct panfrost_sampler_state *so = CALLOC_STRUCT(panfrost_sampler_state);
-        so->base = *cso;
+   struct panfrost_sampler_state *so = CALLOC_STRUCT(panfrost_sampler_state);
+   so->base = *cso;
 
 #if PAN_ARCH == 7
-        /* On v7, pan_texture.c composes the API swizzle with a bijective
-         * swizzle derived from the format, to allow more formats than the
-         * hardware otherwise supports. When packing border colours, we need to
-         * undo this bijection, by swizzling with its inverse.
-         */
-        unsigned mali_format = panfrost_pipe_format_v7[cso->border_color_format].hw;
-        enum mali_rgb_component_order order = mali_format & BITFIELD_MASK(12);
+   /* On v7, pan_texture.c composes the API swizzle with a bijective
+    * swizzle derived from the format, to allow more formats than the
+    * hardware otherwise supports. When packing border colours, we need to
+    * undo this bijection, by swizzling with its inverse.
+    */
+   unsigned mali_format = panfrost_pipe_format_v7[cso->border_color_format].hw;
+   enum mali_rgb_component_order order = mali_format & BITFIELD_MASK(12);
 
-        unsigned char inverted_swizzle[4];
-        panfrost_invert_swizzle(GENX(pan_decompose_swizzle)(order).post,
-                                inverted_swizzle);
+   unsigned char inverted_swizzle[4];
+   panfrost_invert_swizzle(GENX(pan_decompose_swizzle)(order).post,
+                           inverted_swizzle);
 
-        util_format_apply_color_swizzle(&so->base.border_color,
-                                        &cso->border_color,
-                                        inverted_swizzle,
-                                        false /* is_integer (irrelevant) */);
+   util_format_apply_color_swizzle(&so->base.border_color, &cso->border_color,
+                                   inverted_swizzle,
+                                   false /* is_integer (irrelevant) */);
 #endif
 
-        bool using_nearest = cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST;
+   bool using_nearest = cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST;
 
-        pan_pack(&so->hw, SAMPLER, cfg) {
-                cfg.magnify_nearest = cso->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
-                cfg.minify_nearest = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST;
+   pan_pack(&so->hw, SAMPLER, cfg) {
+      cfg.magnify_nearest = cso->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
+      cfg.minify_nearest = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST;
 
-                cfg.normalized_coordinates = !cso->unnormalized_coords;
-                cfg.lod_bias = FIXED_16(cso->lod_bias, true);
-                cfg.minimum_lod = FIXED_16(cso->min_lod, false);
-                cfg.maximum_lod = FIXED_16(cso->max_lod, false);
+      cfg.normalized_coordinates = !cso->unnormalized_coords;
+      cfg.lod_bias = FIXED_16(cso->lod_bias, true);
+      cfg.minimum_lod = FIXED_16(cso->min_lod, false);
+      cfg.maximum_lod = FIXED_16(cso->max_lod, false);
 
-                cfg.wrap_mode_s = translate_tex_wrap(cso->wrap_s, using_nearest);
-                cfg.wrap_mode_t = translate_tex_wrap(cso->wrap_t, using_nearest);
-                cfg.wrap_mode_r = translate_tex_wrap(cso->wrap_r, using_nearest);
+      cfg.wrap_mode_s = translate_tex_wrap(cso->wrap_s, using_nearest);
+      cfg.wrap_mode_t = translate_tex_wrap(cso->wrap_t, using_nearest);
+      cfg.wrap_mode_r = translate_tex_wrap(cso->wrap_r, using_nearest);
 
-                cfg.mipmap_mode = pan_pipe_to_mipmode(cso->min_mip_filter);
-                cfg.compare_function = panfrost_sampler_compare_func(cso);
-                cfg.seamless_cube_map = cso->seamless_cube_map;
+      cfg.mipmap_mode = pan_pipe_to_mipmode(cso->min_mip_filter);
+      cfg.compare_function = panfrost_sampler_compare_func(cso);
+      cfg.seamless_cube_map = cso->seamless_cube_map;
 
-                cfg.border_color_r = so->base.border_color.ui[0];
-                cfg.border_color_g = so->base.border_color.ui[1];
-                cfg.border_color_b = so->base.border_color.ui[2];
-                cfg.border_color_a = so->base.border_color.ui[3];
+      cfg.border_color_r = so->base.border_color.ui[0];
+      cfg.border_color_g = so->base.border_color.ui[1];
+      cfg.border_color_b = so->base.border_color.ui[2];
+      cfg.border_color_a = so->base.border_color.ui[3];
 
 #if PAN_ARCH >= 6
-                if (cso->max_anisotropy > 1) {
-                        cfg.maximum_anisotropy = cso->max_anisotropy;
-                        cfg.lod_algorithm = MALI_LOD_ALGORITHM_ANISOTROPIC;
-                }
+      if (cso->max_anisotropy > 1) {
+         cfg.maximum_anisotropy = cso->max_anisotropy;
+         cfg.lod_algorithm = MALI_LOD_ALGORITHM_ANISOTROPIC;
+      }
 #else
-                /* Emulate disabled mipmapping by clamping the LOD as tight as
-                 * possible (from 0 to epsilon = 1/256) */
-                if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
-                        cfg.maximum_lod = cfg.minimum_lod + 1;
+      /* Emulate disabled mipmapping by clamping the LOD as tight as
+       * possible (from 0 to epsilon = 1/256) */
+      if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
+         cfg.maximum_lod = cfg.minimum_lod + 1;
 #endif
-        }
+   }
 
-        return so;
+   return so;
 }
 
 static bool
-panfrost_fs_required(
-                struct panfrost_compiled_shader *fs,
-                struct panfrost_blend_state *blend,
-                struct pipe_framebuffer_state *state,
-                const struct panfrost_zsa_state *zsa)
+panfrost_fs_required(struct panfrost_compiled_shader *fs,
+                     struct panfrost_blend_state *blend,
+                     struct pipe_framebuffer_state *state,
+                     const struct panfrost_zsa_state *zsa)
 {
-        /* If we generally have side effects. This inclues use of discard,
-         * which can affect the results of an occlusion query. */
-        if (fs->info.fs.sidefx)
-                return true;
+   /* If we generally have side effects. This inclues use of discard,
+    * which can affect the results of an occlusion query. */
+   if (fs->info.fs.sidefx)
+      return true;
 
-        /* Using an empty FS requires early-z to be enabled, but alpha test
-         * needs it disabled. Alpha test is only native on Midgard, so only
-         * check there.
-         */
-        if (PAN_ARCH <= 5 && zsa->base.alpha_func != PIPE_FUNC_ALWAYS)
-                return true;
+   /* Using an empty FS requires early-z to be enabled, but alpha test
+    * needs it disabled. Alpha test is only native on Midgard, so only
+    * check there.
+    */
+   if (PAN_ARCH <= 5 && zsa->base.alpha_func != PIPE_FUNC_ALWAYS)
+      return true;
 
-        /* If colour is written we need to execute */
-        for (unsigned i = 0; i < state->nr_cbufs; ++i) {
-                if (state->cbufs[i] && blend->info[i].enabled)
-                        return true;
-        }
+   /* If colour is written we need to execute */
+   for (unsigned i = 0; i < state->nr_cbufs; ++i) {
+      if (state->cbufs[i] && blend->info[i].enabled)
+         return true;
+   }
 
-        /* If depth is written and not implied we need to execute.
-         * TODO: Predicate on Z/S writes being enabled */
-        return (fs->info.fs.writes_depth || fs->info.fs.writes_stencil);
+   /* If depth is written and not implied we need to execute.
+    * TODO: Predicate on Z/S writes being enabled */
+   return (fs->info.fs.writes_depth || fs->info.fs.writes_stencil);
 }
 
 /* Get pointers to the blend shaders bound to each active render target. Used
@@ -308,34 +322,34 @@ static void
 panfrost_get_blend_shaders(struct panfrost_batch *batch,
                            mali_ptr *blend_shaders)
 {
-        unsigned shader_offset = 0;
-        struct panfrost_bo *shader_bo = NULL;
+   unsigned shader_offset = 0;
+   struct panfrost_bo *shader_bo = NULL;
 
-        for (unsigned c = 0; c < batch->key.nr_cbufs; ++c) {
-                if (batch->key.cbufs[c]) {
-                        blend_shaders[c] = panfrost_get_blend(batch,
-                                        c, &shader_bo, &shader_offset);
-                }
-        }
+   for (unsigned c = 0; c < batch->key.nr_cbufs; ++c) {
+      if (batch->key.cbufs[c]) {
+         blend_shaders[c] =
+            panfrost_get_blend(batch, c, &shader_bo, &shader_offset);
+      }
+   }
 
-        if (shader_bo)
-                perf_debug_ctx(batch->ctx, "Blend shader use");
+   if (shader_bo)
+      perf_debug_ctx(batch->ctx, "Blend shader use");
 }
 
 #if PAN_ARCH >= 5
 UNUSED static uint16_t
 pack_blend_constant(enum pipe_format format, float cons)
 {
-        const struct util_format_description *format_desc =
-                util_format_description(format);
+   const struct util_format_description *format_desc =
+      util_format_description(format);
 
-        unsigned chan_size = 0;
+   unsigned chan_size = 0;
 
-        for (unsigned i = 0; i < format_desc->nr_channels; i++)
-                chan_size = MAX2(format_desc->channel[0].size, chan_size);
+   for (unsigned i = 0; i < format_desc->nr_channels; i++)
+      chan_size = MAX2(format_desc->channel[0].size, chan_size);
 
-        uint16_t unorm = (cons * ((1 << chan_size) - 1));
-        return unorm << (16 - chan_size);
+   uint16_t unorm = (cons * ((1 << chan_size) - 1));
+   return unorm << (16 - chan_size);
 }
 
 /*
@@ -349,163 +363,160 @@ pack_blend_constant(enum pipe_format format, float cons)
 static bool
 panfrost_overdraw_alpha(const struct panfrost_context *ctx, bool zero)
 {
-        const struct panfrost_blend_state *so = ctx->blend;
+   const struct panfrost_blend_state *so = ctx->blend;
 
-        for (unsigned i = 0; i < ctx->pipe_framebuffer.nr_cbufs; ++i) {
-                const struct pan_blend_info info = so->info[i];
+   for (unsigned i = 0; i < ctx->pipe_framebuffer.nr_cbufs; ++i) {
+      const struct pan_blend_info info = so->info[i];
 
-                bool enabled = ctx->pipe_framebuffer.cbufs[i] && !info.enabled;
-                bool flag = zero ? info.alpha_zero_nop : info.alpha_one_store;
+      bool enabled = ctx->pipe_framebuffer.cbufs[i] && !info.enabled;
+      bool flag = zero ? info.alpha_zero_nop : info.alpha_one_store;
 
-                if (enabled && !flag)
-                        return false;
-        }
+      if (enabled && !flag)
+         return false;
+   }
 
-        return true;
+   return true;
 }
 
 static void
-panfrost_emit_blend(struct panfrost_batch *batch, void *rts, mali_ptr *blend_shaders)
+panfrost_emit_blend(struct panfrost_batch *batch, void *rts,
+                    mali_ptr *blend_shaders)
 {
-        unsigned rt_count = batch->key.nr_cbufs;
-        struct panfrost_context *ctx = batch->ctx;
-        const struct panfrost_blend_state *so = ctx->blend;
-        bool dithered = so->base.dither;
+   unsigned rt_count = batch->key.nr_cbufs;
+   struct panfrost_context *ctx = batch->ctx;
+   const struct panfrost_blend_state *so = ctx->blend;
+   bool dithered = so->base.dither;
 
-        /* Always have at least one render target for depth-only passes */
-        for (unsigned i = 0; i < MAX2(rt_count, 1); ++i) {
-                struct mali_blend_packed *packed = rts + (i * pan_size(BLEND));
+   /* Always have at least one render target for depth-only passes */
+   for (unsigned i = 0; i < MAX2(rt_count, 1); ++i) {
+      struct mali_blend_packed *packed = rts + (i * pan_size(BLEND));
 
-                /* Disable blending for unbacked render targets */
-                if (rt_count == 0 || !batch->key.cbufs[i] || !so->info[i].enabled) {
-                        pan_pack(rts + i * pan_size(BLEND), BLEND, cfg) {
-                                cfg.enable = false;
+      /* Disable blending for unbacked render targets */
+      if (rt_count == 0 || !batch->key.cbufs[i] || !so->info[i].enabled) {
+         pan_pack(rts + i * pan_size(BLEND), BLEND, cfg) {
+            cfg.enable = false;
 #if PAN_ARCH >= 6
-                                cfg.internal.mode = MALI_BLEND_MODE_OFF;
+            cfg.internal.mode = MALI_BLEND_MODE_OFF;
 #endif
-                        }
+         }
 
-                        continue;
-                }
+         continue;
+      }
 
-                struct pan_blend_info info = so->info[i];
-                enum pipe_format format = batch->key.cbufs[i]->format;
-                float cons = pan_blend_get_constant(info.constant_mask,
-                                                    ctx->blend_color.color);
+      struct pan_blend_info info = so->info[i];
+      enum pipe_format format = batch->key.cbufs[i]->format;
+      float cons =
+         pan_blend_get_constant(info.constant_mask, ctx->blend_color.color);
 
-                /* Word 0: Flags and constant */
-                pan_pack(packed, BLEND, cfg) {
-                        cfg.srgb = util_format_is_srgb(format);
-                        cfg.load_destination = info.load_dest;
-                        cfg.round_to_fb_precision = !dithered;
-                        cfg.alpha_to_one = ctx->blend->base.alpha_to_one;
+      /* Word 0: Flags and constant */
+      pan_pack(packed, BLEND, cfg) {
+         cfg.srgb = util_format_is_srgb(format);
+         cfg.load_destination = info.load_dest;
+         cfg.round_to_fb_precision = !dithered;
+         cfg.alpha_to_one = ctx->blend->base.alpha_to_one;
 #if PAN_ARCH >= 6
-                        if (!blend_shaders[i])
-                                cfg.constant = pack_blend_constant(format, cons);
+         if (!blend_shaders[i])
+            cfg.constant = pack_blend_constant(format, cons);
 #else
-                        cfg.blend_shader = (blend_shaders[i] != 0);
+         cfg.blend_shader = (blend_shaders[i] != 0);
 
-                        if (blend_shaders[i])
-                                cfg.shader_pc = blend_shaders[i];
-                        else
-                                cfg.constant = cons;
+         if (blend_shaders[i])
+            cfg.shader_pc = blend_shaders[i];
+         else
+            cfg.constant = cons;
 #endif
-                }
+      }
 
-                if (!blend_shaders[i]) {
-                        /* Word 1: Blend Equation */
-                        STATIC_ASSERT(pan_size(BLEND_EQUATION) == 4);
-                        packed->opaque[PAN_ARCH >= 6 ? 1 : 2] = so->equation[i];
-                }
+      if (!blend_shaders[i]) {
+         /* Word 1: Blend Equation */
+         STATIC_ASSERT(pan_size(BLEND_EQUATION) == 4);
+         packed->opaque[PAN_ARCH >= 6 ? 1 : 2] = so->equation[i];
+      }
 
 #if PAN_ARCH >= 6
-                const struct panfrost_device *dev = pan_device(ctx->base.screen);
-                struct panfrost_compiled_shader *fs =
-                        ctx->prog[PIPE_SHADER_FRAGMENT];
+      const struct panfrost_device *dev = pan_device(ctx->base.screen);
+      struct panfrost_compiled_shader *fs = ctx->prog[PIPE_SHADER_FRAGMENT];
 
-                /* Words 2 and 3: Internal blend */
-                if (blend_shaders[i]) {
-                        /* The blend shader's address needs to be at
-                         * the same top 32 bit as the fragment shader.
-                         * TODO: Ensure that's always the case.
-                         */
-                        assert(!fs->bin.bo ||
-                                        (blend_shaders[i] & (0xffffffffull << 32)) ==
-                                        (fs->bin.gpu & (0xffffffffull << 32)));
+      /* Words 2 and 3: Internal blend */
+      if (blend_shaders[i]) {
+         /* The blend shader's address needs to be at
+          * the same top 32 bit as the fragment shader.
+          * TODO: Ensure that's always the case.
+          */
+         assert(!fs->bin.bo || (blend_shaders[i] & (0xffffffffull << 32)) ==
+                                  (fs->bin.gpu & (0xffffffffull << 32)));
 
-                        pan_pack(&packed->opaque[2], INTERNAL_BLEND, cfg) {
-                                cfg.mode = MALI_BLEND_MODE_SHADER;
-                                cfg.shader.pc = (u32) blend_shaders[i];
+         pan_pack(&packed->opaque[2], INTERNAL_BLEND, cfg) {
+            cfg.mode = MALI_BLEND_MODE_SHADER;
+            cfg.shader.pc = (u32)blend_shaders[i];
 
 #if PAN_ARCH <= 7
-                                unsigned ret_offset = fs->info.bifrost.blend[i].return_offset;
-                                assert(!(ret_offset & 0x7));
+            unsigned ret_offset = fs->info.bifrost.blend[i].return_offset;
+            assert(!(ret_offset & 0x7));
 
-                                cfg.shader.return_value = ret_offset ?
-                                        fs->bin.gpu + ret_offset : 0;
+            cfg.shader.return_value = ret_offset ? fs->bin.gpu + ret_offset : 0;
 #endif
-                        }
-                } else {
-                        pan_pack(&packed->opaque[2], INTERNAL_BLEND, cfg) {
-                                cfg.mode = info.opaque ?
-                                        MALI_BLEND_MODE_OPAQUE :
-                                        MALI_BLEND_MODE_FIXED_FUNCTION;
+         }
+      } else {
+         pan_pack(&packed->opaque[2], INTERNAL_BLEND, cfg) {
+            cfg.mode = info.opaque ? MALI_BLEND_MODE_OPAQUE
+                                   : MALI_BLEND_MODE_FIXED_FUNCTION;
 
-                                /* If we want the conversion to work properly,
-                                 * num_comps must be set to 4
-                                 */
-                                cfg.fixed_function.num_comps = 4;
-                                cfg.fixed_function.conversion.memory_format =
-                                        panfrost_format_to_bifrost_blend(dev, format, dithered);
-                                cfg.fixed_function.rt = i;
+            /* If we want the conversion to work properly,
+             * num_comps must be set to 4
+             */
+            cfg.fixed_function.num_comps = 4;
+            cfg.fixed_function.conversion.memory_format =
+               panfrost_format_to_bifrost_blend(dev, format, dithered);
+            cfg.fixed_function.rt = i;
 
 #if PAN_ARCH <= 7
-                                if (!info.opaque) {
-                                        cfg.fixed_function.alpha_zero_nop = info.alpha_zero_nop;
-                                        cfg.fixed_function.alpha_one_store = info.alpha_one_store;
-                                }
+            if (!info.opaque) {
+               cfg.fixed_function.alpha_zero_nop = info.alpha_zero_nop;
+               cfg.fixed_function.alpha_one_store = info.alpha_one_store;
+            }
 
-                                if (fs->info.fs.untyped_color_outputs) {
-                                        cfg.fixed_function.conversion.register_format =
-                                                GENX(pan_fixup_blend_type)(fs->info.bifrost.blend[i].type, format);
-                                } else {
-                                        cfg.fixed_function.conversion.register_format =
-                                                fs->info.bifrost.blend[i].format;
-                                }
+            if (fs->info.fs.untyped_color_outputs) {
+               cfg.fixed_function.conversion.register_format = GENX(
+                  pan_fixup_blend_type)(fs->info.bifrost.blend[i].type, format);
+            } else {
+               cfg.fixed_function.conversion.register_format =
+                  fs->info.bifrost.blend[i].format;
+            }
 #endif
-                        }
-                }
+         }
+      }
 #endif
-        }
+   }
 }
 #endif
 
 static inline bool
-pan_allow_forward_pixel_to_kill(struct panfrost_context *ctx, struct panfrost_compiled_shader *fs)
+pan_allow_forward_pixel_to_kill(struct panfrost_context *ctx,
+                                struct panfrost_compiled_shader *fs)
 {
-        /* Track if any colour buffer is reused across draws, either
-         * from reading it directly, or from failing to write it
-         */
-        unsigned rt_mask = ctx->fb_rt_mask;
-        uint64_t rt_written = (fs->info.outputs_written >> FRAG_RESULT_DATA0);
-        bool blend_reads_dest = (ctx->blend->load_dest_mask & rt_mask);
-        bool alpha_to_coverage = ctx->blend->base.alpha_to_coverage;
+   /* Track if any colour buffer is reused across draws, either
+    * from reading it directly, or from failing to write it
+    */
+   unsigned rt_mask = ctx->fb_rt_mask;
+   uint64_t rt_written = (fs->info.outputs_written >> FRAG_RESULT_DATA0);
+   bool blend_reads_dest = (ctx->blend->load_dest_mask & rt_mask);
+   bool alpha_to_coverage = ctx->blend->base.alpha_to_coverage;
 
-        return fs->info.fs.can_fpk &&
-                !(rt_mask & ~rt_written) &&
-                !alpha_to_coverage &&
-                !blend_reads_dest;
+   return fs->info.fs.can_fpk && !(rt_mask & ~rt_written) &&
+          !alpha_to_coverage && !blend_reads_dest;
 }
 
 static mali_ptr
-panfrost_emit_compute_shader_meta(struct panfrost_batch *batch, enum pipe_shader_type stage)
+panfrost_emit_compute_shader_meta(struct panfrost_batch *batch,
+                                  enum pipe_shader_type stage)
 {
-        struct panfrost_compiled_shader *ss = batch->ctx->prog[stage];
+   struct panfrost_compiled_shader *ss = batch->ctx->prog[stage];
 
-        panfrost_batch_add_bo(batch, ss->bin.bo, PIPE_SHADER_VERTEX);
-        panfrost_batch_add_bo(batch, ss->state.bo, PIPE_SHADER_VERTEX);
+   panfrost_batch_add_bo(batch, ss->bin.bo, PIPE_SHADER_VERTEX);
+   panfrost_batch_add_bo(batch, ss->state.bo, PIPE_SHADER_VERTEX);
 
-        return ss->state.gpu;
+   return ss->state.gpu;
 }
 
 #if PAN_ARCH <= 7
@@ -515,161 +526,159 @@ panfrost_emit_compute_shader_meta(struct panfrost_batch *batch, enum pipe_shader
 static void
 pan_merge_empty_fs(struct mali_renderer_state_packed *rsd)
 {
-        struct mali_renderer_state_packed empty_rsd;
+   struct mali_renderer_state_packed empty_rsd;
 
-        pan_pack(&empty_rsd, RENDERER_STATE, cfg) {
+   pan_pack(&empty_rsd, RENDERER_STATE, cfg) {
 #if PAN_ARCH >= 6
-                cfg.properties.shader_modifies_coverage = true;
-                cfg.properties.allow_forward_pixel_to_kill = true;
-                cfg.properties.allow_forward_pixel_to_be_killed = true;
-                cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
+      cfg.properties.shader_modifies_coverage = true;
+      cfg.properties.allow_forward_pixel_to_kill = true;
+      cfg.properties.allow_forward_pixel_to_be_killed = true;
+      cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
 
-                /* Alpha isn't written so these are vacuous */
-                cfg.multisample_misc.overdraw_alpha0 = true;
-                cfg.multisample_misc.overdraw_alpha1 = true;
+      /* Alpha isn't written so these are vacuous */
+      cfg.multisample_misc.overdraw_alpha0 = true;
+      cfg.multisample_misc.overdraw_alpha1 = true;
 #else
-                cfg.shader.shader = 0x1;
-                cfg.properties.work_register_count = 1;
-                cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
-                cfg.properties.force_early_z = true;
+      cfg.shader.shader = 0x1;
+      cfg.properties.work_register_count = 1;
+      cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
+      cfg.properties.force_early_z = true;
 #endif
-        }
+   }
 
-        pan_merge((*rsd), empty_rsd, RENDERER_STATE);
+   pan_merge((*rsd), empty_rsd, RENDERER_STATE);
 }
 
 static void
-panfrost_prepare_fs_state(struct panfrost_context *ctx,
-                          mali_ptr *blend_shaders,
+panfrost_prepare_fs_state(struct panfrost_context *ctx, mali_ptr *blend_shaders,
                           struct mali_renderer_state_packed *rsd)
 {
-        struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
-        const struct panfrost_zsa_state *zsa = ctx->depth_stencil;
-        struct panfrost_compiled_shader *fs = ctx->prog[PIPE_SHADER_FRAGMENT];
-        struct panfrost_blend_state *so = ctx->blend;
-        bool alpha_to_coverage = ctx->blend->base.alpha_to_coverage;
-        bool msaa = rast->multisample;
+   struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
+   const struct panfrost_zsa_state *zsa = ctx->depth_stencil;
+   struct panfrost_compiled_shader *fs = ctx->prog[PIPE_SHADER_FRAGMENT];
+   struct panfrost_blend_state *so = ctx->blend;
+   bool alpha_to_coverage = ctx->blend->base.alpha_to_coverage;
+   bool msaa = rast->multisample;
 
-        unsigned rt_count = ctx->pipe_framebuffer.nr_cbufs;
+   unsigned rt_count = ctx->pipe_framebuffer.nr_cbufs;
 
-        bool has_blend_shader = false;
+   bool has_blend_shader = false;
 
-        for (unsigned c = 0; c < rt_count; ++c)
-                has_blend_shader |= (blend_shaders[c] != 0);
+   for (unsigned c = 0; c < rt_count; ++c)
+      has_blend_shader |= (blend_shaders[c] != 0);
 
-        bool has_oq = ctx->occlusion_query && ctx->active_queries;
+   bool has_oq = ctx->occlusion_query && ctx->active_queries;
 
-        pan_pack(rsd, RENDERER_STATE, cfg) {
-                if (panfrost_fs_required(fs, so, &ctx->pipe_framebuffer, zsa)) {
+   pan_pack(rsd, RENDERER_STATE, cfg) {
+      if (panfrost_fs_required(fs, so, &ctx->pipe_framebuffer, zsa)) {
 #if PAN_ARCH >= 6
-                        struct pan_earlyzs_state earlyzs =
-                               pan_earlyzs_get(fs->earlyzs,
-                                               ctx->depth_stencil->writes_zs ||
-                                               has_oq,
-                                               ctx->blend->base.alpha_to_coverage,
-                                               ctx->depth_stencil->zs_always_passes);
+         struct pan_earlyzs_state earlyzs = pan_earlyzs_get(
+            fs->earlyzs, ctx->depth_stencil->writes_zs || has_oq,
+            ctx->blend->base.alpha_to_coverage,
+            ctx->depth_stencil->zs_always_passes);
 
-                        cfg.properties.pixel_kill_operation = earlyzs.kill;
-                        cfg.properties.zs_update_operation = earlyzs.update;
+         cfg.properties.pixel_kill_operation = earlyzs.kill;
+         cfg.properties.zs_update_operation = earlyzs.update;
 
-                        cfg.properties.allow_forward_pixel_to_kill =
-                                pan_allow_forward_pixel_to_kill(ctx, fs);
+         cfg.properties.allow_forward_pixel_to_kill =
+            pan_allow_forward_pixel_to_kill(ctx, fs);
 #else
-                        cfg.properties.force_early_z =
-                                fs->info.fs.can_early_z && !alpha_to_coverage &&
-                                ((enum mali_func) zsa->base.alpha_func == MALI_FUNC_ALWAYS);
+         cfg.properties.force_early_z =
+            fs->info.fs.can_early_z && !alpha_to_coverage &&
+            ((enum mali_func)zsa->base.alpha_func == MALI_FUNC_ALWAYS);
 
-                        /* TODO: Reduce this limit? */
-                        if (has_blend_shader)
-                                cfg.properties.work_register_count = MAX2(fs->info.work_reg_count, 8);
-                        else
-                                cfg.properties.work_register_count = fs->info.work_reg_count;
+         /* TODO: Reduce this limit? */
+         if (has_blend_shader)
+            cfg.properties.work_register_count =
+               MAX2(fs->info.work_reg_count, 8);
+         else
+            cfg.properties.work_register_count = fs->info.work_reg_count;
 
-                        /* Hardware quirks around early-zs forcing without a
-                         * depth buffer. Note this breaks occlusion queries. */
-                        bool force_ez_with_discard = !zsa->enabled && !has_oq;
+         /* Hardware quirks around early-zs forcing without a
+          * depth buffer. Note this breaks occlusion queries. */
+         bool force_ez_with_discard = !zsa->enabled && !has_oq;
 
-                        cfg.properties.shader_reads_tilebuffer =
-                                force_ez_with_discard && fs->info.fs.can_discard;
-                        cfg.properties.shader_contains_discard =
-                                !force_ez_with_discard && fs->info.fs.can_discard;
+         cfg.properties.shader_reads_tilebuffer =
+            force_ez_with_discard && fs->info.fs.can_discard;
+         cfg.properties.shader_contains_discard =
+            !force_ez_with_discard && fs->info.fs.can_discard;
 #endif
-                }
+      }
 
 #if PAN_ARCH == 4
-                if (rt_count > 0) {
-                        cfg.multisample_misc.load_destination = so->info[0].load_dest;
-                        cfg.multisample_misc.blend_shader = (blend_shaders[0] != 0);
-                        cfg.stencil_mask_misc.write_enable = so->info[0].enabled;
-                        cfg.stencil_mask_misc.srgb = util_format_is_srgb(ctx->pipe_framebuffer.cbufs[0]->format);
-                        cfg.stencil_mask_misc.dither_disable = !so->base.dither;
-                        cfg.stencil_mask_misc.alpha_to_one = so->base.alpha_to_one;
+      if (rt_count > 0) {
+         cfg.multisample_misc.load_destination = so->info[0].load_dest;
+         cfg.multisample_misc.blend_shader = (blend_shaders[0] != 0);
+         cfg.stencil_mask_misc.write_enable = so->info[0].enabled;
+         cfg.stencil_mask_misc.srgb =
+            util_format_is_srgb(ctx->pipe_framebuffer.cbufs[0]->format);
+         cfg.stencil_mask_misc.dither_disable = !so->base.dither;
+         cfg.stencil_mask_misc.alpha_to_one = so->base.alpha_to_one;
 
-                        if (blend_shaders[0]) {
-                                cfg.blend_shader = blend_shaders[0];
-                        } else {
-                                cfg.blend_constant = pan_blend_get_constant(
-                                                so->info[0].constant_mask,
-                                                ctx->blend_color.color);
-                        }
-                } else {
-                        /* If there is no colour buffer, leaving fields default is
-                         * fine, except for blending which is nonnullable */
-                        cfg.blend_equation.color_mask = 0xf;
-                        cfg.blend_equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
-                        cfg.blend_equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
-                        cfg.blend_equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
-                        cfg.blend_equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
-                        cfg.blend_equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
-                        cfg.blend_equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
-                }
+         if (blend_shaders[0]) {
+            cfg.blend_shader = blend_shaders[0];
+         } else {
+            cfg.blend_constant = pan_blend_get_constant(
+               so->info[0].constant_mask, ctx->blend_color.color);
+         }
+      } else {
+         /* If there is no colour buffer, leaving fields default is
+          * fine, except for blending which is nonnullable */
+         cfg.blend_equation.color_mask = 0xf;
+         cfg.blend_equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
+         cfg.blend_equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
+         cfg.blend_equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
+         cfg.blend_equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
+         cfg.blend_equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
+         cfg.blend_equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
+      }
 #elif PAN_ARCH == 5
-                /* Workaround */
-                cfg.legacy_blend_shader = panfrost_last_nonnull(blend_shaders, rt_count);
+      /* Workaround */
+      cfg.legacy_blend_shader = panfrost_last_nonnull(blend_shaders, rt_count);
 #endif
 
-                cfg.multisample_misc.sample_mask = msaa ? ctx->sample_mask : 0xFFFF;
+      cfg.multisample_misc.sample_mask = msaa ? ctx->sample_mask : 0xFFFF;
 
-                cfg.multisample_misc.evaluate_per_sample =
-                        msaa && (ctx->min_samples > 1);
+      cfg.multisample_misc.evaluate_per_sample = msaa && (ctx->min_samples > 1);
 
 #if PAN_ARCH >= 6
-                /* MSAA blend shaders need to pass their sample ID to
-                 * LD_TILE/ST_TILE, so we must preload it. Additionally, we
-                 * need per-sample shading for the blend shader, accomplished
-                 * by forcing per-sample shading for the whole program. */
+      /* MSAA blend shaders need to pass their sample ID to
+       * LD_TILE/ST_TILE, so we must preload it. Additionally, we
+       * need per-sample shading for the blend shader, accomplished
+       * by forcing per-sample shading for the whole program. */
 
-                if (msaa && has_blend_shader) {
-                        cfg.multisample_misc.evaluate_per_sample = true;
-                        cfg.preload.fragment.sample_mask_id = true;
-                }
+      if (msaa && has_blend_shader) {
+         cfg.multisample_misc.evaluate_per_sample = true;
+         cfg.preload.fragment.sample_mask_id = true;
+      }
 
-                /* Bifrost does not have native point sprites. Point sprites are
-                 * lowered in the driver to gl_PointCoord reads. This field
-                 * actually controls the orientation of gl_PointCoord. Both
-                 * orientations are controlled with sprite_coord_mode in
-                 * Gallium.
-                 */
-                cfg.properties.point_sprite_coord_origin_max_y =
-                        (rast->sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT);
+      /* Bifrost does not have native point sprites. Point sprites are
+       * lowered in the driver to gl_PointCoord reads. This field
+       * actually controls the orientation of gl_PointCoord. Both
+       * orientations are controlled with sprite_coord_mode in
+       * Gallium.
+       */
+      cfg.properties.point_sprite_coord_origin_max_y =
+         (rast->sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT);
 
-                cfg.multisample_misc.overdraw_alpha0 = panfrost_overdraw_alpha(ctx, 0);
-                cfg.multisample_misc.overdraw_alpha1 = panfrost_overdraw_alpha(ctx, 1);
+      cfg.multisample_misc.overdraw_alpha0 = panfrost_overdraw_alpha(ctx, 0);
+      cfg.multisample_misc.overdraw_alpha1 = panfrost_overdraw_alpha(ctx, 1);
 #endif
 
-                cfg.stencil_mask_misc.alpha_to_coverage = alpha_to_coverage;
-                cfg.depth_units = rast->offset_units * 2.0f;
-                cfg.depth_factor = rast->offset_scale;
+      cfg.stencil_mask_misc.alpha_to_coverage = alpha_to_coverage;
+      cfg.depth_units = rast->offset_units * 2.0f;
+      cfg.depth_factor = rast->offset_scale;
 
-                bool back_enab = zsa->base.stencil[1].enabled;
-                cfg.stencil_front.reference_value = ctx->stencil_ref.ref_value[0];
-                cfg.stencil_back.reference_value = ctx->stencil_ref.ref_value[back_enab ? 1 : 0];
+      bool back_enab = zsa->base.stencil[1].enabled;
+      cfg.stencil_front.reference_value = ctx->stencil_ref.ref_value[0];
+      cfg.stencil_back.reference_value =
+         ctx->stencil_ref.ref_value[back_enab ? 1 : 0];
 
 #if PAN_ARCH <= 5
-                /* v6+ fits register preload here, no alpha testing */
-                cfg.alpha_reference = zsa->base.alpha_ref_value;
+      /* v6+ fits register preload here, no alpha testing */
+      cfg.alpha_reference = zsa->base.alpha_ref_value;
 #endif
-        }
+   }
 }
 
 static void
@@ -677,153 +686,152 @@ panfrost_emit_frag_shader(struct panfrost_context *ctx,
                           struct mali_renderer_state_packed *fragmeta,
                           mali_ptr *blend_shaders)
 {
-        const struct panfrost_zsa_state *zsa = ctx->depth_stencil;
-        const struct panfrost_rasterizer *rast = ctx->rasterizer;
-        struct panfrost_compiled_shader *fs =
-                ctx->prog[PIPE_SHADER_FRAGMENT];
+   const struct panfrost_zsa_state *zsa = ctx->depth_stencil;
+   const struct panfrost_rasterizer *rast = ctx->rasterizer;
+   struct panfrost_compiled_shader *fs = ctx->prog[PIPE_SHADER_FRAGMENT];
 
-        /* We need to merge several several partial renderer state descriptors,
-         * so stage to temporary storage rather than reading back write-combine
-         * memory, which will trash performance. */
-        struct mali_renderer_state_packed rsd;
-        panfrost_prepare_fs_state(ctx, blend_shaders, &rsd);
+   /* We need to merge several several partial renderer state descriptors,
+    * so stage to temporary storage rather than reading back write-combine
+    * memory, which will trash performance. */
+   struct mali_renderer_state_packed rsd;
+   panfrost_prepare_fs_state(ctx, blend_shaders, &rsd);
 
 #if PAN_ARCH == 4
-        if (ctx->pipe_framebuffer.nr_cbufs > 0 && !blend_shaders[0]) {
-                /* Word 14: SFBD Blend Equation */
-                STATIC_ASSERT(pan_size(BLEND_EQUATION) == 4);
-                rsd.opaque[14] = ctx->blend->equation[0];
-        }
+   if (ctx->pipe_framebuffer.nr_cbufs > 0 && !blend_shaders[0]) {
+      /* Word 14: SFBD Blend Equation */
+      STATIC_ASSERT(pan_size(BLEND_EQUATION) == 4);
+      rsd.opaque[14] = ctx->blend->equation[0];
+   }
 #endif
 
-        /* Merge with CSO state and upload */
-        if (panfrost_fs_required(fs, ctx->blend, &ctx->pipe_framebuffer, zsa)) {
-                struct mali_renderer_state_packed *partial_rsd =
-                        (struct mali_renderer_state_packed *)&fs->partial_rsd;
-                STATIC_ASSERT(sizeof(fs->partial_rsd) == sizeof(*partial_rsd));
-                pan_merge(rsd, *partial_rsd, RENDERER_STATE);
-        } else {
-                pan_merge_empty_fs(&rsd);
-        }
+   /* Merge with CSO state and upload */
+   if (panfrost_fs_required(fs, ctx->blend, &ctx->pipe_framebuffer, zsa)) {
+      struct mali_renderer_state_packed *partial_rsd =
+         (struct mali_renderer_state_packed *)&fs->partial_rsd;
+      STATIC_ASSERT(sizeof(fs->partial_rsd) == sizeof(*partial_rsd));
+      pan_merge(rsd, *partial_rsd, RENDERER_STATE);
+   } else {
+      pan_merge_empty_fs(&rsd);
+   }
 
-        /* Word 8, 9 Misc state */
-        rsd.opaque[8] |= zsa->rsd_depth.opaque[0]
-                       | rast->multisample.opaque[0];
+   /* Word 8, 9 Misc state */
+   rsd.opaque[8] |= zsa->rsd_depth.opaque[0] | rast->multisample.opaque[0];
 
-        rsd.opaque[9] |= zsa->rsd_stencil.opaque[0]
-                       | rast->stencil_misc.opaque[0];
+   rsd.opaque[9] |= zsa->rsd_stencil.opaque[0] | rast->stencil_misc.opaque[0];
 
-        /* Word 10, 11 Stencil Front and Back */
-        rsd.opaque[10] |= zsa->stencil_front.opaque[0];
-        rsd.opaque[11] |= zsa->stencil_back.opaque[0];
+   /* Word 10, 11 Stencil Front and Back */
+   rsd.opaque[10] |= zsa->stencil_front.opaque[0];
+   rsd.opaque[11] |= zsa->stencil_back.opaque[0];
 
-        memcpy(fragmeta, &rsd, sizeof(rsd));
+   memcpy(fragmeta, &rsd, sizeof(rsd));
 }
 
 static mali_ptr
 panfrost_emit_frag_shader_meta(struct panfrost_batch *batch)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        struct panfrost_compiled_shader *ss = ctx->prog[PIPE_SHADER_FRAGMENT];
+   struct panfrost_context *ctx = batch->ctx;
+   struct panfrost_compiled_shader *ss = ctx->prog[PIPE_SHADER_FRAGMENT];
 
-        panfrost_batch_add_bo(batch, ss->bin.bo, PIPE_SHADER_FRAGMENT);
+   panfrost_batch_add_bo(batch, ss->bin.bo, PIPE_SHADER_FRAGMENT);
 
-        struct panfrost_ptr xfer;
+   struct panfrost_ptr xfer;
 
 #if PAN_ARCH == 4
-        xfer = pan_pool_alloc_desc(&batch->pool.base, RENDERER_STATE);
+   xfer = pan_pool_alloc_desc(&batch->pool.base, RENDERER_STATE);
 #else
-        unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
+   unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
 
-        xfer = pan_pool_alloc_desc_aggregate(&batch->pool.base,
-                                             PAN_DESC(RENDERER_STATE),
-                                             PAN_DESC_ARRAY(rt_count, BLEND));
+   xfer =
+      pan_pool_alloc_desc_aggregate(&batch->pool.base, PAN_DESC(RENDERER_STATE),
+                                    PAN_DESC_ARRAY(rt_count, BLEND));
 #endif
 
-        mali_ptr blend_shaders[PIPE_MAX_COLOR_BUFS] = { 0 };
-        panfrost_get_blend_shaders(batch, blend_shaders);
+   mali_ptr blend_shaders[PIPE_MAX_COLOR_BUFS] = {0};
+   panfrost_get_blend_shaders(batch, blend_shaders);
 
-        panfrost_emit_frag_shader(ctx, (struct mali_renderer_state_packed *) xfer.cpu, blend_shaders);
+   panfrost_emit_frag_shader(ctx, (struct mali_renderer_state_packed *)xfer.cpu,
+                             blend_shaders);
 
 #if PAN_ARCH >= 5
-        panfrost_emit_blend(batch, xfer.cpu + pan_size(RENDERER_STATE), blend_shaders);
+   panfrost_emit_blend(batch, xfer.cpu + pan_size(RENDERER_STATE),
+                       blend_shaders);
 #endif
 
-        return xfer.gpu;
+   return xfer.gpu;
 }
 #endif
 
 static mali_ptr
 panfrost_emit_viewport(struct panfrost_batch *batch)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
-        const struct pipe_scissor_state *ss = &ctx->scissor;
-        const struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
+   struct panfrost_context *ctx = batch->ctx;
+   const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
+   const struct pipe_scissor_state *ss = &ctx->scissor;
+   const struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
 
-        /* Derive min/max from translate/scale. Note since |x| >= 0 by
-         * definition, we have that -|x| <= |x| hence translate - |scale| <=
-         * translate + |scale|, so the ordering is correct here. */
-        float vp_minx = vp->translate[0] - fabsf(vp->scale[0]);
-        float vp_maxx = vp->translate[0] + fabsf(vp->scale[0]);
-        float vp_miny = vp->translate[1] - fabsf(vp->scale[1]);
-        float vp_maxy = vp->translate[1] + fabsf(vp->scale[1]);
+   /* Derive min/max from translate/scale. Note since |x| >= 0 by
+    * definition, we have that -|x| <= |x| hence translate - |scale| <=
+    * translate + |scale|, so the ordering is correct here. */
+   float vp_minx = vp->translate[0] - fabsf(vp->scale[0]);
+   float vp_maxx = vp->translate[0] + fabsf(vp->scale[0]);
+   float vp_miny = vp->translate[1] - fabsf(vp->scale[1]);
+   float vp_maxy = vp->translate[1] + fabsf(vp->scale[1]);
 
-        float minz, maxz;
-        util_viewport_zmin_zmax(vp, rast->clip_halfz, &minz, &maxz);
+   float minz, maxz;
+   util_viewport_zmin_zmax(vp, rast->clip_halfz, &minz, &maxz);
 
-        /* Scissor to the intersection of viewport and to the scissor, clamped
-         * to the framebuffer */
+   /* Scissor to the intersection of viewport and to the scissor, clamped
+    * to the framebuffer */
 
-        unsigned minx = MIN2(batch->key.width, MAX2((int) vp_minx, 0));
-        unsigned maxx = MIN2(batch->key.width, MAX2((int) vp_maxx, 0));
-        unsigned miny = MIN2(batch->key.height, MAX2((int) vp_miny, 0));
-        unsigned maxy = MIN2(batch->key.height, MAX2((int) vp_maxy, 0));
+   unsigned minx = MIN2(batch->key.width, MAX2((int)vp_minx, 0));
+   unsigned maxx = MIN2(batch->key.width, MAX2((int)vp_maxx, 0));
+   unsigned miny = MIN2(batch->key.height, MAX2((int)vp_miny, 0));
+   unsigned maxy = MIN2(batch->key.height, MAX2((int)vp_maxy, 0));
 
-        if (ss && rast->scissor) {
-                minx = MAX2(ss->minx, minx);
-                miny = MAX2(ss->miny, miny);
-                maxx = MIN2(ss->maxx, maxx);
-                maxy = MIN2(ss->maxy, maxy);
-        }
+   if (ss && rast->scissor) {
+      minx = MAX2(ss->minx, minx);
+      miny = MAX2(ss->miny, miny);
+      maxx = MIN2(ss->maxx, maxx);
+      maxy = MIN2(ss->maxy, maxy);
+   }
 
-        /* Set the range to [1, 1) so max values don't wrap round */
-        if (maxx == 0 || maxy == 0)
-                maxx = maxy = minx = miny = 1;
+   /* Set the range to [1, 1) so max values don't wrap round */
+   if (maxx == 0 || maxy == 0)
+      maxx = maxy = minx = miny = 1;
 
-        panfrost_batch_union_scissor(batch, minx, miny, maxx, maxy);
-        batch->scissor_culls_everything = (minx >= maxx || miny >= maxy);
+   panfrost_batch_union_scissor(batch, minx, miny, maxx, maxy);
+   batch->scissor_culls_everything = (minx >= maxx || miny >= maxy);
 
-        /* [minx, maxx) and [miny, maxy) are exclusive ranges in the hardware */
-        maxx--;
-        maxy--;
+   /* [minx, maxx) and [miny, maxy) are exclusive ranges in the hardware */
+   maxx--;
+   maxy--;
 
-        batch->minimum_z = rast->depth_clip_near ? minz : -INFINITY;
-        batch->maximum_z = rast->depth_clip_far  ? maxz : +INFINITY;
+   batch->minimum_z = rast->depth_clip_near ? minz : -INFINITY;
+   batch->maximum_z = rast->depth_clip_far ? maxz : +INFINITY;
 
 #if PAN_ARCH <= 7
-        struct panfrost_ptr T = pan_pool_alloc_desc(&batch->pool.base, VIEWPORT);
+   struct panfrost_ptr T = pan_pool_alloc_desc(&batch->pool.base, VIEWPORT);
 
-        pan_pack(T.cpu, VIEWPORT, cfg) {
-                cfg.scissor_minimum_x = minx;
-                cfg.scissor_minimum_y = miny;
-                cfg.scissor_maximum_x = maxx;
-                cfg.scissor_maximum_y = maxy;
+   pan_pack(T.cpu, VIEWPORT, cfg) {
+      cfg.scissor_minimum_x = minx;
+      cfg.scissor_minimum_y = miny;
+      cfg.scissor_maximum_x = maxx;
+      cfg.scissor_maximum_y = maxy;
 
-                cfg.minimum_z = batch->minimum_z;
-                cfg.maximum_z = batch->maximum_z;
-        }
+      cfg.minimum_z = batch->minimum_z;
+      cfg.maximum_z = batch->maximum_z;
+   }
 
-        return T.gpu;
+   return T.gpu;
 #else
-        pan_pack(&batch->scissor, SCISSOR, cfg) {
-                cfg.scissor_minimum_x = minx;
-                cfg.scissor_minimum_y = miny;
-                cfg.scissor_maximum_x = maxx;
-                cfg.scissor_maximum_y = maxy;
-        }
+   pan_pack(&batch->scissor, SCISSOR, cfg) {
+      cfg.scissor_minimum_x = minx;
+      cfg.scissor_minimum_y = miny;
+      cfg.scissor_maximum_x = maxx;
+      cfg.scissor_maximum_y = maxy;
+   }
 
-        return 0;
+   return 0;
 #endif
 }
 
@@ -838,32 +846,33 @@ panfrost_emit_viewport(struct panfrost_batch *batch)
 static mali_ptr
 panfrost_emit_depth_stencil(struct panfrost_batch *batch)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        const struct panfrost_zsa_state *zsa = ctx->depth_stencil;
-        struct panfrost_rasterizer *rast = ctx->rasterizer;
-        struct panfrost_compiled_shader *fs = ctx->prog[PIPE_SHADER_FRAGMENT];
-        bool back_enab = zsa->base.stencil[1].enabled;
+   struct panfrost_context *ctx = batch->ctx;
+   const struct panfrost_zsa_state *zsa = ctx->depth_stencil;
+   struct panfrost_rasterizer *rast = ctx->rasterizer;
+   struct panfrost_compiled_shader *fs = ctx->prog[PIPE_SHADER_FRAGMENT];
+   bool back_enab = zsa->base.stencil[1].enabled;
 
-        struct panfrost_ptr T = pan_pool_alloc_desc(&batch->pool.base, DEPTH_STENCIL);
-        struct mali_depth_stencil_packed dynamic;
+   struct panfrost_ptr T =
+      pan_pool_alloc_desc(&batch->pool.base, DEPTH_STENCIL);
+   struct mali_depth_stencil_packed dynamic;
 
-        pan_pack(&dynamic, DEPTH_STENCIL, cfg) {
-                cfg.front_reference_value = ctx->stencil_ref.ref_value[0];
-                cfg.back_reference_value = ctx->stencil_ref.ref_value[back_enab ? 1 : 0];
+   pan_pack(&dynamic, DEPTH_STENCIL, cfg) {
+      cfg.front_reference_value = ctx->stencil_ref.ref_value[0];
+      cfg.back_reference_value = ctx->stencil_ref.ref_value[back_enab ? 1 : 0];
 
-                cfg.stencil_from_shader = fs->info.fs.writes_stencil;
-                cfg.depth_source = pan_depth_source(&fs->info);
+      cfg.stencil_from_shader = fs->info.fs.writes_stencil;
+      cfg.depth_source = pan_depth_source(&fs->info);
 
-                cfg.depth_bias_enable = rast->base.offset_tri;
-                cfg.depth_units = rast->base.offset_units * 2.0f;
-                cfg.depth_factor = rast->base.offset_scale;
-                cfg.depth_bias_clamp = rast->base.offset_clamp;
-        }
+      cfg.depth_bias_enable = rast->base.offset_tri;
+      cfg.depth_units = rast->base.offset_units * 2.0f;
+      cfg.depth_factor = rast->base.offset_scale;
+      cfg.depth_bias_clamp = rast->base.offset_clamp;
+   }
 
-        pan_merge(dynamic, zsa->desc, DEPTH_STENCIL);
-        memcpy(T.cpu, &dynamic, pan_size(DEPTH_STENCIL));
+   pan_merge(dynamic, zsa->desc, DEPTH_STENCIL);
+   memcpy(T.cpu, &dynamic, pan_size(DEPTH_STENCIL));
 
-        return T.gpu;
+   return T.gpu;
 }
 
 /**
@@ -873,24 +882,25 @@ panfrost_emit_depth_stencil(struct panfrost_batch *batch)
 static mali_ptr
 panfrost_emit_blend_valhall(struct panfrost_batch *batch)
 {
-        unsigned rt_count = MAX2(batch->key.nr_cbufs, 1);
+   unsigned rt_count = MAX2(batch->key.nr_cbufs, 1);
 
-        struct panfrost_ptr T = pan_pool_alloc_desc_array(&batch->pool.base, rt_count, BLEND);
+   struct panfrost_ptr T =
+      pan_pool_alloc_desc_array(&batch->pool.base, rt_count, BLEND);
 
-        mali_ptr blend_shaders[PIPE_MAX_COLOR_BUFS] = { 0 };
-        panfrost_get_blend_shaders(batch, blend_shaders);
+   mali_ptr blend_shaders[PIPE_MAX_COLOR_BUFS] = {0};
+   panfrost_get_blend_shaders(batch, blend_shaders);
 
-        panfrost_emit_blend(batch, T.cpu, blend_shaders);
+   panfrost_emit_blend(batch, T.cpu, blend_shaders);
 
-        /* Precalculate for the per-draw path */
-        bool has_blend_shader = false;
+   /* Precalculate for the per-draw path */
+   bool has_blend_shader = false;
 
-        for (unsigned i = 0; i < rt_count; ++i)
-                has_blend_shader |= !!blend_shaders[i];
+   for (unsigned i = 0; i < rt_count; ++i)
+      has_blend_shader |= !!blend_shaders[i];
 
-        batch->ctx->valhall_has_blend_shader = has_blend_shader;
+   batch->ctx->valhall_has_blend_shader = has_blend_shader;
 
-        return T.gpu;
+   return T.gpu;
 }
 
 /**
@@ -899,29 +909,28 @@ panfrost_emit_blend_valhall(struct panfrost_batch *batch)
 static mali_ptr
 panfrost_emit_vertex_buffers(struct panfrost_batch *batch)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        unsigned buffer_count = util_last_bit(ctx->vb_mask);
-        struct panfrost_ptr T = pan_pool_alloc_desc_array(&batch->pool.base,
-                                                          buffer_count, BUFFER);
-        struct mali_buffer_packed *buffers = T.cpu;
+   struct panfrost_context *ctx = batch->ctx;
+   unsigned buffer_count = util_last_bit(ctx->vb_mask);
+   struct panfrost_ptr T =
+      pan_pool_alloc_desc_array(&batch->pool.base, buffer_count, BUFFER);
+   struct mali_buffer_packed *buffers = T.cpu;
 
-        u_foreach_bit(i, ctx->vb_mask) {
-                struct pipe_vertex_buffer vb = ctx->vertex_buffers[i];
-                struct pipe_resource *prsrc = vb.buffer.resource;
-                struct panfrost_resource *rsrc = pan_resource(prsrc);
-                assert(!vb.is_user_buffer);
+   u_foreach_bit(i, ctx->vb_mask) {
+      struct pipe_vertex_buffer vb = ctx->vertex_buffers[i];
+      struct pipe_resource *prsrc = vb.buffer.resource;
+      struct panfrost_resource *rsrc = pan_resource(prsrc);
+      assert(!vb.is_user_buffer);
 
-                panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
+      panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
 
-                pan_pack(buffers + i, BUFFER, cfg) {
-                        cfg.address = rsrc->image.data.bo->ptr.gpu +
-                                      vb.buffer_offset;
+      pan_pack(buffers + i, BUFFER, cfg) {
+         cfg.address = rsrc->image.data.bo->ptr.gpu + vb.buffer_offset;
 
-                        cfg.size = prsrc->width0 - vb.buffer_offset;
-                }
-        }
+         cfg.size = prsrc->width0 - vb.buffer_offset;
+      }
+   }
 
-        return T.gpu;
+   return T.gpu;
 }
 
 /**
@@ -933,26 +942,25 @@ panfrost_emit_vertex_buffers(struct panfrost_batch *batch)
 static mali_ptr
 panfrost_emit_vertex_data(struct panfrost_batch *batch)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        struct panfrost_vertex_state *vtx = ctx->vertex;
-        struct panfrost_ptr T = pan_pool_alloc_desc_array(&batch->pool.base,
-                                                          vtx->num_elements,
-                                                          ATTRIBUTE);
-        struct mali_attribute_packed *attributes = T.cpu;
+   struct panfrost_context *ctx = batch->ctx;
+   struct panfrost_vertex_state *vtx = ctx->vertex;
+   struct panfrost_ptr T = pan_pool_alloc_desc_array(
+      &batch->pool.base, vtx->num_elements, ATTRIBUTE);
+   struct mali_attribute_packed *attributes = T.cpu;
 
-        for (unsigned i = 0; i < vtx->num_elements; ++i) {
-                struct mali_attribute_packed packed;
-                unsigned vbi = vtx->pipe[i].vertex_buffer_index;
+   for (unsigned i = 0; i < vtx->num_elements; ++i) {
+      struct mali_attribute_packed packed;
+      unsigned vbi = vtx->pipe[i].vertex_buffer_index;
 
-                pan_pack(&packed, ATTRIBUTE, cfg) {
-                        cfg.stride = ctx->vertex_buffers[vbi].stride;
-                }
+      pan_pack(&packed, ATTRIBUTE, cfg) {
+         cfg.stride = ctx->vertex_buffers[vbi].stride;
+      }
 
-                pan_merge(packed, vtx->attributes[i], ATTRIBUTE);
-                attributes[i] = packed;
-        }
+      pan_merge(packed, vtx->attributes[i], ATTRIBUTE);
+      attributes[i] = packed;
+   }
 
-        return T.gpu;
+   return T.gpu;
 }
 
 /*
@@ -964,83 +972,79 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch)
 static struct pipe_sampler_view
 panfrost_pipe_image_to_sampler_view(struct pipe_image_view *v)
 {
-        struct pipe_sampler_view out = {
-                .format = v->format,
-                .texture = v->resource,
-                .target = v->resource->target,
-                .swizzle_r = PIPE_SWIZZLE_X,
-                .swizzle_g = PIPE_SWIZZLE_Y,
-                .swizzle_b = PIPE_SWIZZLE_Z,
-                .swizzle_a = PIPE_SWIZZLE_W
-        };
+   struct pipe_sampler_view out = {.format = v->format,
+                                   .texture = v->resource,
+                                   .target = v->resource->target,
+                                   .swizzle_r = PIPE_SWIZZLE_X,
+                                   .swizzle_g = PIPE_SWIZZLE_Y,
+                                   .swizzle_b = PIPE_SWIZZLE_Z,
+                                   .swizzle_a = PIPE_SWIZZLE_W};
 
-        if (out.target == PIPE_BUFFER) {
-                out.u.buf.offset = v->u.buf.offset;
-                out.u.buf.size = v->u.buf.size;
-        } else {
-                out.u.tex.first_layer = v->u.tex.first_layer;
-                out.u.tex.last_layer = v->u.tex.last_layer;
+   if (out.target == PIPE_BUFFER) {
+      out.u.buf.offset = v->u.buf.offset;
+      out.u.buf.size = v->u.buf.size;
+   } else {
+      out.u.tex.first_layer = v->u.tex.first_layer;
+      out.u.tex.last_layer = v->u.tex.last_layer;
 
-                /* Single level only */
-                out.u.tex.first_level = v->u.tex.level;
-                out.u.tex.last_level = v->u.tex.level;
-        }
+      /* Single level only */
+      out.u.tex.first_level = v->u.tex.level;
+      out.u.tex.last_level = v->u.tex.level;
+   }
 
-        return out;
+   return out;
 }
 
-static void
-panfrost_update_sampler_view(struct panfrost_sampler_view *view,
-                             struct pipe_context *pctx);
+static void panfrost_update_sampler_view(struct panfrost_sampler_view *view,
+                                         struct pipe_context *pctx);
 
 static mali_ptr
 panfrost_emit_images(struct panfrost_batch *batch, enum pipe_shader_type stage)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        unsigned last_bit = util_last_bit(ctx->image_mask[stage]);
+   struct panfrost_context *ctx = batch->ctx;
+   unsigned last_bit = util_last_bit(ctx->image_mask[stage]);
 
-        struct panfrost_ptr T =
-                pan_pool_alloc_desc_array(&batch->pool.base, last_bit, TEXTURE);
+   struct panfrost_ptr T =
+      pan_pool_alloc_desc_array(&batch->pool.base, last_bit, TEXTURE);
 
-        struct mali_texture_packed *out = (struct mali_texture_packed *) T.cpu;
+   struct mali_texture_packed *out = (struct mali_texture_packed *)T.cpu;
 
-        for (int i = 0; i < last_bit; ++i) {
-                struct pipe_image_view *image = &ctx->images[stage][i];
+   for (int i = 0; i < last_bit; ++i) {
+      struct pipe_image_view *image = &ctx->images[stage][i];
 
-                if (!(ctx->image_mask[stage] & BITFIELD_BIT(i))) {
-                        memset(&out[i], 0, sizeof(out[i]));
-                        continue;
-                }
+      if (!(ctx->image_mask[stage] & BITFIELD_BIT(i))) {
+         memset(&out[i], 0, sizeof(out[i]));
+         continue;
+      }
 
-                /* Construct a synthetic sampler view so we can use our usual
-                 * sampler view code for the actual descriptor packing.
-                 *
-                 * Use the batch pool for a transient allocation, rather than
-                 * allocating a long-lived descriptor.
-                 */
-                struct panfrost_sampler_view view = {
-                        .base = panfrost_pipe_image_to_sampler_view(image),
-                        .pool = &batch->pool
-                };
+      /* Construct a synthetic sampler view so we can use our usual
+       * sampler view code for the actual descriptor packing.
+       *
+       * Use the batch pool for a transient allocation, rather than
+       * allocating a long-lived descriptor.
+       */
+      struct panfrost_sampler_view view = {
+         .base = panfrost_pipe_image_to_sampler_view(image),
+         .pool = &batch->pool};
 
-                /* If we specify a cube map, the hardware internally treat it as
-                 * a 2D array. Since cube maps as images can confuse our common
-                 * texturing code, explicitly use a 2D array.
-                 *
-                 * Similar concerns apply to 3D textures.
-                 */
-                if (view.base.target == PIPE_BUFFER)
-                        view.base.target = PIPE_BUFFER;
-                else
-                        view.base.target = PIPE_TEXTURE_2D_ARRAY;
+      /* If we specify a cube map, the hardware internally treat it as
+       * a 2D array. Since cube maps as images can confuse our common
+       * texturing code, explicitly use a 2D array.
+       *
+       * Similar concerns apply to 3D textures.
+       */
+      if (view.base.target == PIPE_BUFFER)
+         view.base.target = PIPE_BUFFER;
+      else
+         view.base.target = PIPE_TEXTURE_2D_ARRAY;
 
-                panfrost_update_sampler_view(&view, &ctx->base);
-                out[i] = view.bifrost_descriptor;
+      panfrost_update_sampler_view(&view, &ctx->base);
+      out[i] = view.bifrost_descriptor;
 
-                panfrost_track_image_access(batch, stage, image);
-        }
+      panfrost_track_image_access(batch, stage, image);
+   }
 
-        return T.gpu;
+   return T.gpu;
 }
 #endif
 
@@ -1050,213 +1054,205 @@ panfrost_map_constant_buffer_gpu(struct panfrost_batch *batch,
                                  struct panfrost_constant_buffer *buf,
                                  unsigned index)
 {
-        struct pipe_constant_buffer *cb = &buf->cb[index];
-        struct panfrost_resource *rsrc = pan_resource(cb->buffer);
+   struct pipe_constant_buffer *cb = &buf->cb[index];
+   struct panfrost_resource *rsrc = pan_resource(cb->buffer);
 
-        if (rsrc) {
-                panfrost_batch_read_rsrc(batch, rsrc, st);
+   if (rsrc) {
+      panfrost_batch_read_rsrc(batch, rsrc, st);
 
-                /* Alignment gauranteed by
-                 * PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT */
-                return rsrc->image.data.bo->ptr.gpu + cb->buffer_offset;
-        } else if (cb->user_buffer) {
-                return pan_pool_upload_aligned(&batch->pool.base,
-                                               cb->user_buffer +
-                                               cb->buffer_offset,
-                                               cb->buffer_size, 16);
-        } else {
-                unreachable("No constant buffer");
-        }
+      /* Alignment gauranteed by
+       * PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT */
+      return rsrc->image.data.bo->ptr.gpu + cb->buffer_offset;
+   } else if (cb->user_buffer) {
+      return pan_pool_upload_aligned(&batch->pool.base,
+                                     cb->user_buffer + cb->buffer_offset,
+                                     cb->buffer_size, 16);
+   } else {
+      unreachable("No constant buffer");
+   }
 }
 
 struct sysval_uniform {
-        union {
-                float f[4];
-                int32_t i[4];
-                uint32_t u[4];
-                uint64_t du[2];
-        };
+   union {
+      float f[4];
+      int32_t i[4];
+      uint32_t u[4];
+      uint64_t du[2];
+   };
 };
 
 static void
 panfrost_upload_viewport_scale_sysval(struct panfrost_batch *batch,
                                       struct sysval_uniform *uniform)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
+   struct panfrost_context *ctx = batch->ctx;
+   const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
 
-        uniform->f[0] = vp->scale[0];
-        uniform->f[1] = vp->scale[1];
-        uniform->f[2] = vp->scale[2];
+   uniform->f[0] = vp->scale[0];
+   uniform->f[1] = vp->scale[1];
+   uniform->f[2] = vp->scale[2];
 }
 
 static void
 panfrost_upload_viewport_offset_sysval(struct panfrost_batch *batch,
                                        struct sysval_uniform *uniform)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
+   struct panfrost_context *ctx = batch->ctx;
+   const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
 
-        uniform->f[0] = vp->translate[0];
-        uniform->f[1] = vp->translate[1];
-        uniform->f[2] = vp->translate[2];
+   uniform->f[0] = vp->translate[0];
+   uniform->f[1] = vp->translate[1];
+   uniform->f[2] = vp->translate[2];
 }
 
-static void panfrost_upload_txs_sysval(struct panfrost_batch *batch,
-                                       enum pipe_shader_type st,
-                                       unsigned int sysvalid,
-                                       struct sysval_uniform *uniform)
+static void
+panfrost_upload_txs_sysval(struct panfrost_batch *batch,
+                           enum pipe_shader_type st, unsigned int sysvalid,
+                           struct sysval_uniform *uniform)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        unsigned texidx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid);
-        unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid);
-        bool is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid);
-        struct pipe_sampler_view *tex = &ctx->sampler_views[st][texidx]->base;
+   struct panfrost_context *ctx = batch->ctx;
+   unsigned texidx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid);
+   unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid);
+   bool is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid);
+   struct pipe_sampler_view *tex = &ctx->sampler_views[st][texidx]->base;
 
-        assert(dim);
+   assert(dim);
 
-        if (tex->target == PIPE_BUFFER) {
-                assert(dim == 1);
-                uniform->i[0] =
-                        tex->u.buf.size / util_format_get_blocksize(tex->format);
-                return;
-        }
+   if (tex->target == PIPE_BUFFER) {
+      assert(dim == 1);
+      uniform->i[0] = tex->u.buf.size / util_format_get_blocksize(tex->format);
+      return;
+   }
 
-        uniform->i[0] = u_minify(tex->texture->width0, tex->u.tex.first_level);
+   uniform->i[0] = u_minify(tex->texture->width0, tex->u.tex.first_level);
 
-        if (dim > 1)
-                uniform->i[1] = u_minify(tex->texture->height0,
-                                         tex->u.tex.first_level);
+   if (dim > 1)
+      uniform->i[1] = u_minify(tex->texture->height0, tex->u.tex.first_level);
 
-        if (dim > 2)
-                uniform->i[2] = u_minify(tex->texture->depth0,
-                                         tex->u.tex.first_level);
+   if (dim > 2)
+      uniform->i[2] = u_minify(tex->texture->depth0, tex->u.tex.first_level);
 
-        if (is_array) {
-                unsigned size = tex->texture->array_size;
+   if (is_array) {
+      unsigned size = tex->texture->array_size;
 
-                /* Internally, we store the number of 2D images (faces * array
-                 * size). Externally, we report the array size in terms of
-                 * complete cubes. So divide by the # of faces per cube.
-                 */
-                if (tex->target == PIPE_TEXTURE_CUBE_ARRAY)
-                        size /= 6;
+      /* Internally, we store the number of 2D images (faces * array
+       * size). Externally, we report the array size in terms of
+       * complete cubes. So divide by the # of faces per cube.
+       */
+      if (tex->target == PIPE_TEXTURE_CUBE_ARRAY)
+         size /= 6;
 
-                uniform->i[dim] = size;
-        }
+      uniform->i[dim] = size;
+   }
 }
 
-static void panfrost_upload_image_size_sysval(struct panfrost_batch *batch,
-                                              enum pipe_shader_type st,
-                                              unsigned int sysvalid,
-                                              struct sysval_uniform *uniform)
+static void
+panfrost_upload_image_size_sysval(struct panfrost_batch *batch,
+                                  enum pipe_shader_type st,
+                                  unsigned int sysvalid,
+                                  struct sysval_uniform *uniform)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        unsigned idx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid);
-        unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid);
-        unsigned is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid);
+   struct panfrost_context *ctx = batch->ctx;
+   unsigned idx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid);
+   unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid);
+   unsigned is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid);
 
-        assert(dim && dim < 4);
+   assert(dim && dim < 4);
 
-        struct pipe_image_view *image = &ctx->images[st][idx];
+   struct pipe_image_view *image = &ctx->images[st][idx];
 
-        if (image->resource->target == PIPE_BUFFER) {
-                unsigned blocksize = util_format_get_blocksize(image->format);
-                uniform->i[0] = image->resource->width0 / blocksize;
-                return;
-        }
+   if (image->resource->target == PIPE_BUFFER) {
+      unsigned blocksize = util_format_get_blocksize(image->format);
+      uniform->i[0] = image->resource->width0 / blocksize;
+      return;
+   }
 
-        uniform->i[0] = u_minify(image->resource->width0,
-                                 image->u.tex.level);
+   uniform->i[0] = u_minify(image->resource->width0, image->u.tex.level);
 
-        if (dim > 1)
-                uniform->i[1] = u_minify(image->resource->height0,
-                                         image->u.tex.level);
+   if (dim > 1)
+      uniform->i[1] = u_minify(image->resource->height0, image->u.tex.level);
 
-        if (dim > 2)
-                uniform->i[2] = u_minify(image->resource->depth0,
-                                         image->u.tex.level);
+   if (dim > 2)
+      uniform->i[2] = u_minify(image->resource->depth0, image->u.tex.level);
 
-        if (is_array)
-                uniform->i[dim] = image->resource->array_size;
+   if (is_array)
+      uniform->i[dim] = image->resource->array_size;
 }
 
 static void
 panfrost_upload_ssbo_sysval(struct panfrost_batch *batch,
-                            enum pipe_shader_type st,
-                            unsigned ssbo_id,
+                            enum pipe_shader_type st, unsigned ssbo_id,
                             struct sysval_uniform *uniform)
 {
-        struct panfrost_context *ctx = batch->ctx;
+   struct panfrost_context *ctx = batch->ctx;
 
-        assert(ctx->ssbo_mask[st] & (1 << ssbo_id));
-        struct pipe_shader_buffer sb = ctx->ssbo[st][ssbo_id];
+   assert(ctx->ssbo_mask[st] & (1 << ssbo_id));
+   struct pipe_shader_buffer sb = ctx->ssbo[st][ssbo_id];
 
-        /* Compute address */
-        struct panfrost_resource *rsrc = pan_resource(sb.buffer);
-        struct panfrost_bo *bo = rsrc->image.data.bo;
+   /* Compute address */
+   struct panfrost_resource *rsrc = pan_resource(sb.buffer);
+   struct panfrost_bo *bo = rsrc->image.data.bo;
 
-        panfrost_batch_write_rsrc(batch, rsrc, st);
+   panfrost_batch_write_rsrc(batch, rsrc, st);
 
-        util_range_add(&rsrc->base, &rsrc->valid_buffer_range,
-                        sb.buffer_offset, sb.buffer_size);
+   util_range_add(&rsrc->base, &rsrc->valid_buffer_range, sb.buffer_offset,
+                  sb.buffer_size);
 
-        /* Upload address and size as sysval */
-        uniform->du[0] = bo->ptr.gpu + sb.buffer_offset;
-        uniform->u[2] = sb.buffer_size;
+   /* Upload address and size as sysval */
+   uniform->du[0] = bo->ptr.gpu + sb.buffer_offset;
+   uniform->u[2] = sb.buffer_size;
 }
 
 static void
 panfrost_upload_sampler_sysval(struct panfrost_batch *batch,
-                               enum pipe_shader_type st,
-                               unsigned samp_idx,
+                               enum pipe_shader_type st, unsigned samp_idx,
                                struct sysval_uniform *uniform)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        struct pipe_sampler_state *sampl = &ctx->samplers[st][samp_idx]->base;
+   struct panfrost_context *ctx = batch->ctx;
+   struct pipe_sampler_state *sampl = &ctx->samplers[st][samp_idx]->base;
 
-        uniform->f[0] = sampl->min_lod;
-        uniform->f[1] = sampl->max_lod;
-        uniform->f[2] = sampl->lod_bias;
+   uniform->f[0] = sampl->min_lod;
+   uniform->f[1] = sampl->max_lod;
+   uniform->f[2] = sampl->lod_bias;
 
-        /* Even without any errata, Midgard represents "no mipmapping" as
-         * fixing the LOD with the clamps; keep behaviour consistent. c.f.
-         * panfrost_create_sampler_state which also explains our choice of
-         * epsilon value (again to keep behaviour consistent) */
+   /* Even without any errata, Midgard represents "no mipmapping" as
+    * fixing the LOD with the clamps; keep behaviour consistent. c.f.
+    * panfrost_create_sampler_state which also explains our choice of
+    * epsilon value (again to keep behaviour consistent) */
 
-        if (sampl->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
-                uniform->f[1] = uniform->f[0] + (1.0/256.0);
+   if (sampl->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
+      uniform->f[1] = uniform->f[0] + (1.0 / 256.0);
 }
 
 static void
 panfrost_upload_num_work_groups_sysval(struct panfrost_batch *batch,
                                        struct sysval_uniform *uniform)
 {
-        struct panfrost_context *ctx = batch->ctx;
+   struct panfrost_context *ctx = batch->ctx;
 
-        uniform->u[0] = ctx->compute_grid->grid[0];
-        uniform->u[1] = ctx->compute_grid->grid[1];
-        uniform->u[2] = ctx->compute_grid->grid[2];
+   uniform->u[0] = ctx->compute_grid->grid[0];
+   uniform->u[1] = ctx->compute_grid->grid[1];
+   uniform->u[2] = ctx->compute_grid->grid[2];
 }
 
 static void
 panfrost_upload_local_group_size_sysval(struct panfrost_batch *batch,
                                         struct sysval_uniform *uniform)
 {
-        struct panfrost_context *ctx = batch->ctx;
+   struct panfrost_context *ctx = batch->ctx;
 
-        uniform->u[0] = ctx->compute_grid->block[0];
-        uniform->u[1] = ctx->compute_grid->block[1];
-        uniform->u[2] = ctx->compute_grid->block[2];
+   uniform->u[0] = ctx->compute_grid->block[0];
+   uniform->u[1] = ctx->compute_grid->block[1];
+   uniform->u[2] = ctx->compute_grid->block[2];
 }
 
 static void
 panfrost_upload_work_dim_sysval(struct panfrost_batch *batch,
                                 struct sysval_uniform *uniform)
 {
-        struct panfrost_context *ctx = batch->ctx;
+   struct panfrost_context *ctx = batch->ctx;
 
-        uniform->u[0] = ctx->compute_grid->work_dim;
+   uniform->u[0] = ctx->compute_grid->work_dim;
 }
 
 /* Sample positions are pushed in a Bifrost specific format on Bifrost. On
@@ -1265,168 +1261,156 @@ panfrost_upload_work_dim_sysval(struct panfrost_batch *batch,
 
 static void
 panfrost_upload_sample_positions_sysval(struct panfrost_batch *batch,
-                                struct sysval_uniform *uniform)
+                                        struct sysval_uniform *uniform)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        struct panfrost_device *dev = pan_device(ctx->base.screen);
+   struct panfrost_context *ctx = batch->ctx;
+   struct panfrost_device *dev = pan_device(ctx->base.screen);
 
-        unsigned samples = util_framebuffer_get_num_samples(&batch->key);
-        uniform->du[0] = panfrost_sample_positions(dev, panfrost_sample_pattern(samples));
+   unsigned samples = util_framebuffer_get_num_samples(&batch->key);
+   uniform->du[0] =
+      panfrost_sample_positions(dev, panfrost_sample_pattern(samples));
 }
 
 static void
 panfrost_upload_multisampled_sysval(struct panfrost_batch *batch,
-                                struct sysval_uniform *uniform)
+                                    struct sysval_uniform *uniform)
 {
-        unsigned samples = util_framebuffer_get_num_samples(&batch->key);
-        uniform->u[0] = samples > 1;
+   unsigned samples = util_framebuffer_get_num_samples(&batch->key);
+   uniform->u[0] = samples > 1;
 }
 
 #if PAN_ARCH >= 6
 static void
 panfrost_upload_rt_conversion_sysval(struct panfrost_batch *batch,
-                unsigned size_and_rt, struct sysval_uniform *uniform)
+                                     unsigned size_and_rt,
+                                     struct sysval_uniform *uniform)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        struct panfrost_device *dev = pan_device(ctx->base.screen);
-        unsigned rt = size_and_rt & 0xF;
-        unsigned size = size_and_rt >> 4;
+   struct panfrost_context *ctx = batch->ctx;
+   struct panfrost_device *dev = pan_device(ctx->base.screen);
+   unsigned rt = size_and_rt & 0xF;
+   unsigned size = size_and_rt >> 4;
 
-        if (rt < batch->key.nr_cbufs && batch->key.cbufs[rt]) {
-                enum pipe_format format = batch->key.cbufs[rt]->format;
-                uniform->u[0] =
-                        GENX(pan_blend_get_internal_desc)(dev, format, rt, size, false) >> 32;
-        } else {
-                pan_pack(&uniform->u[0], INTERNAL_CONVERSION, cfg)
-                        cfg.memory_format = dev->formats[PIPE_FORMAT_NONE].hw;
-        }
+   if (rt < batch->key.nr_cbufs && batch->key.cbufs[rt]) {
+      enum pipe_format format = batch->key.cbufs[rt]->format;
+      uniform->u[0] =
+         GENX(pan_blend_get_internal_desc)(dev, format, rt, size, false) >> 32;
+   } else {
+      pan_pack(&uniform->u[0], INTERNAL_CONVERSION, cfg)
+         cfg.memory_format = dev->formats[PIPE_FORMAT_NONE].hw;
+   }
 }
 #endif
 
 static unsigned
 panfrost_xfb_offset(unsigned stride, struct pipe_stream_output_target *target)
 {
-        return target->buffer_offset + (pan_so_target(target)->offset * stride);
+   return target->buffer_offset + (pan_so_target(target)->offset * stride);
 }
 
 static void
-panfrost_upload_sysvals(struct panfrost_batch *batch,
-                        void *ptr_cpu,
-                        mali_ptr ptr_gpu,
-                        struct panfrost_compiled_shader *ss,
+panfrost_upload_sysvals(struct panfrost_batch *batch, void *ptr_cpu,
+                        mali_ptr ptr_gpu, struct panfrost_compiled_shader *ss,
                         enum pipe_shader_type st)
 {
-        struct sysval_uniform *uniforms = ptr_cpu;
+   struct sysval_uniform *uniforms = ptr_cpu;
 
-        for (unsigned i = 0; i < ss->info.sysvals.sysval_count; ++i) {
-                int sysval = ss->info.sysvals.sysvals[i];
+   for (unsigned i = 0; i < ss->info.sysvals.sysval_count; ++i) {
+      int sysval = ss->info.sysvals.sysvals[i];
 
-                switch (PAN_SYSVAL_TYPE(sysval)) {
-                case PAN_SYSVAL_VIEWPORT_SCALE:
-                        panfrost_upload_viewport_scale_sysval(batch,
-                                                              &uniforms[i]);
-                        break;
-                case PAN_SYSVAL_VIEWPORT_OFFSET:
-                        panfrost_upload_viewport_offset_sysval(batch,
-                                                               &uniforms[i]);
-                        break;
-                case PAN_SYSVAL_TEXTURE_SIZE:
-                        panfrost_upload_txs_sysval(batch, st,
-                                                   PAN_SYSVAL_ID(sysval),
-                                                   &uniforms[i]);
-                        break;
-                case PAN_SYSVAL_SSBO:
-                        panfrost_upload_ssbo_sysval(batch, st,
-                                                    PAN_SYSVAL_ID(sysval),
-                                                    &uniforms[i]);
-                        break;
+      switch (PAN_SYSVAL_TYPE(sysval)) {
+      case PAN_SYSVAL_VIEWPORT_SCALE:
+         panfrost_upload_viewport_scale_sysval(batch, &uniforms[i]);
+         break;
+      case PAN_SYSVAL_VIEWPORT_OFFSET:
+         panfrost_upload_viewport_offset_sysval(batch, &uniforms[i]);
+         break;
+      case PAN_SYSVAL_TEXTURE_SIZE:
+         panfrost_upload_txs_sysval(batch, st, PAN_SYSVAL_ID(sysval),
+                                    &uniforms[i]);
+         break;
+      case PAN_SYSVAL_SSBO:
+         panfrost_upload_ssbo_sysval(batch, st, PAN_SYSVAL_ID(sysval),
+                                     &uniforms[i]);
+         break;
 
-                case PAN_SYSVAL_XFB:
-                {
-                        unsigned buf = PAN_SYSVAL_ID(sysval);
-                        struct panfrost_compiled_shader *vs =
-                                batch->ctx->prog[PIPE_SHADER_VERTEX];
-                        struct pipe_stream_output_info *so = &vs->stream_output;
-                        unsigned stride = so->stride[buf] * 4;
+      case PAN_SYSVAL_XFB: {
+         unsigned buf = PAN_SYSVAL_ID(sysval);
+         struct panfrost_compiled_shader *vs =
+            batch->ctx->prog[PIPE_SHADER_VERTEX];
+         struct pipe_stream_output_info *so = &vs->stream_output;
+         unsigned stride = so->stride[buf] * 4;
 
-                        struct pipe_stream_output_target *target = NULL;
-                        if (buf < batch->ctx->streamout.num_targets)
-                                target = batch->ctx->streamout.targets[buf];
+         struct pipe_stream_output_target *target = NULL;
+         if (buf < batch->ctx->streamout.num_targets)
+            target = batch->ctx->streamout.targets[buf];
 
-                        if (!target) {
-                                /* Memory sink */
-                                uniforms[i].du[0] = 0x8ull << 60;
-                                break;
-                        }
+         if (!target) {
+            /* Memory sink */
+            uniforms[i].du[0] = 0x8ull << 60;
+            break;
+         }
 
-                        struct panfrost_resource *rsrc = pan_resource(target->buffer);
-                        unsigned offset = panfrost_xfb_offset(stride, target);
+         struct panfrost_resource *rsrc = pan_resource(target->buffer);
+         unsigned offset = panfrost_xfb_offset(stride, target);
 
-                        util_range_add(&rsrc->base, &rsrc->valid_buffer_range,
-                                offset, target->buffer_size - offset);
+         util_range_add(&rsrc->base, &rsrc->valid_buffer_range, offset,
+                        target->buffer_size - offset);
 
-                        panfrost_batch_write_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
+         panfrost_batch_write_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
 
-                        uniforms[i].du[0] = rsrc->image.data.bo->ptr.gpu + offset;
-                        break;
-                }
+         uniforms[i].du[0] = rsrc->image.data.bo->ptr.gpu + offset;
+         break;
+      }
 
-                case PAN_SYSVAL_NUM_VERTICES:
-                        uniforms[i].u[0] = batch->ctx->vertex_count;
-                        break;
+      case PAN_SYSVAL_NUM_VERTICES:
+         uniforms[i].u[0] = batch->ctx->vertex_count;
+         break;
 
-                case PAN_SYSVAL_NUM_WORK_GROUPS:
-                        for (unsigned j = 0; j < 3; j++) {
-                                batch->num_wg_sysval[j] =
-                                        ptr_gpu + (i * sizeof(*uniforms)) + (j * 4);
-                        }
-                        panfrost_upload_num_work_groups_sysval(batch,
-                                                               &uniforms[i]);
-                        break;
-                case PAN_SYSVAL_LOCAL_GROUP_SIZE:
-                        panfrost_upload_local_group_size_sysval(batch,
-                                                                &uniforms[i]);
-                        break;
-                case PAN_SYSVAL_WORK_DIM:
-                        panfrost_upload_work_dim_sysval(batch,
-                                                        &uniforms[i]);
-                        break;
-                case PAN_SYSVAL_SAMPLER:
-                        panfrost_upload_sampler_sysval(batch, st,
-                                                       PAN_SYSVAL_ID(sysval),
-                                                       &uniforms[i]);
-                        break;
-                case PAN_SYSVAL_IMAGE_SIZE:
-                        panfrost_upload_image_size_sysval(batch, st,
-                                                          PAN_SYSVAL_ID(sysval),
-                                                          &uniforms[i]);
-                        break;
-                case PAN_SYSVAL_SAMPLE_POSITIONS:
-                        panfrost_upload_sample_positions_sysval(batch,
-                                                        &uniforms[i]);
-                        break;
-                case PAN_SYSVAL_MULTISAMPLED:
-                        panfrost_upload_multisampled_sysval(batch,
-                                                               &uniforms[i]);
-                        break;
+      case PAN_SYSVAL_NUM_WORK_GROUPS:
+         for (unsigned j = 0; j < 3; j++) {
+            batch->num_wg_sysval[j] =
+               ptr_gpu + (i * sizeof(*uniforms)) + (j * 4);
+         }
+         panfrost_upload_num_work_groups_sysval(batch, &uniforms[i]);
+         break;
+      case PAN_SYSVAL_LOCAL_GROUP_SIZE:
+         panfrost_upload_local_group_size_sysval(batch, &uniforms[i]);
+         break;
+      case PAN_SYSVAL_WORK_DIM:
+         panfrost_upload_work_dim_sysval(batch, &uniforms[i]);
+         break;
+      case PAN_SYSVAL_SAMPLER:
+         panfrost_upload_sampler_sysval(batch, st, PAN_SYSVAL_ID(sysval),
+                                        &uniforms[i]);
+         break;
+      case PAN_SYSVAL_IMAGE_SIZE:
+         panfrost_upload_image_size_sysval(batch, st, PAN_SYSVAL_ID(sysval),
+                                           &uniforms[i]);
+         break;
+      case PAN_SYSVAL_SAMPLE_POSITIONS:
+         panfrost_upload_sample_positions_sysval(batch, &uniforms[i]);
+         break;
+      case PAN_SYSVAL_MULTISAMPLED:
+         panfrost_upload_multisampled_sysval(batch, &uniforms[i]);
+         break;
 #if PAN_ARCH >= 6
-                case PAN_SYSVAL_RT_CONVERSION:
-                        panfrost_upload_rt_conversion_sysval(batch,
-                                        PAN_SYSVAL_ID(sysval), &uniforms[i]);
-                        break;
+      case PAN_SYSVAL_RT_CONVERSION:
+         panfrost_upload_rt_conversion_sysval(batch, PAN_SYSVAL_ID(sysval),
+                                              &uniforms[i]);
+         break;
 #endif
-                case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS:
-                        uniforms[i].u[0] = batch->ctx->offset_start;
-                        uniforms[i].u[1] = batch->ctx->base_vertex;
-                        uniforms[i].u[2] = batch->ctx->base_instance;
-                        break;
-                case PAN_SYSVAL_DRAWID:
-                        uniforms[i].u[0] = batch->ctx->drawid;
-                        break;
-                default:
-                        assert(0);
-                }
-        }
+      case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS:
+         uniforms[i].u[0] = batch->ctx->offset_start;
+         uniforms[i].u[1] = batch->ctx->base_vertex;
+         uniforms[i].u[2] = batch->ctx->base_instance;
+         break;
+      case PAN_SYSVAL_DRAWID:
+         uniforms[i].u[0] = batch->ctx->drawid;
+         break;
+      default:
+         assert(0);
+      }
+   }
 }
 
 static const void *
@@ -1434,19 +1418,19 @@ panfrost_map_constant_buffer_cpu(struct panfrost_context *ctx,
                                  struct panfrost_constant_buffer *buf,
                                  unsigned index)
 {
-        struct pipe_constant_buffer *cb = &buf->cb[index];
-        struct panfrost_resource *rsrc = pan_resource(cb->buffer);
+   struct pipe_constant_buffer *cb = &buf->cb[index];
+   struct panfrost_resource *rsrc = pan_resource(cb->buffer);
 
-        if (rsrc) {
-                panfrost_bo_mmap(rsrc->image.data.bo);
-                panfrost_flush_writer(ctx, rsrc, "CPU constant buffer mapping");
-                panfrost_bo_wait(rsrc->image.data.bo, INT64_MAX, false);
+   if (rsrc) {
+      panfrost_bo_mmap(rsrc->image.data.bo);
+      panfrost_flush_writer(ctx, rsrc, "CPU constant buffer mapping");
+      panfrost_bo_wait(rsrc->image.data.bo, INT64_MAX, false);
 
-                return rsrc->image.data.bo->ptr.cpu + cb->buffer_offset;
-        } else if (cb->user_buffer) {
-                return cb->user_buffer + cb->buffer_offset;
-        } else
-                unreachable("No constant buffer");
+      return rsrc->image.data.bo->ptr.cpu + cb->buffer_offset;
+   } else if (cb->user_buffer) {
+      return cb->user_buffer + cb->buffer_offset;
+   } else
+      unreachable("No constant buffer");
 }
 
 /* Emit a single UBO record. On Valhall, UBOs are dumb buffers and are
@@ -1458,125 +1442,121 @@ static void
 panfrost_emit_ubo(void *base, unsigned index, mali_ptr address, size_t size)
 {
 #if PAN_ARCH >= 9
-        struct mali_buffer_packed *out = base;
+   struct mali_buffer_packed *out = base;
 
-        pan_pack(out + index, BUFFER, cfg) {
-                cfg.size = size;
-                cfg.address = address;
-        }
+   pan_pack(out + index, BUFFER, cfg) {
+      cfg.size = size;
+      cfg.address = address;
+   }
 #else
-        struct mali_uniform_buffer_packed *out = base;
+   struct mali_uniform_buffer_packed *out = base;
 
-        /* Issue (57) for the ARB_uniform_buffer_object spec says that
-         * the buffer can be larger than the uniform data inside it,
-         * so clamp ubo size to what hardware supports. */
+   /* Issue (57) for the ARB_uniform_buffer_object spec says that
+    * the buffer can be larger than the uniform data inside it,
+    * so clamp ubo size to what hardware supports. */
 
-        pan_pack(out + index, UNIFORM_BUFFER, cfg) {
-                cfg.entries = MIN2(DIV_ROUND_UP(size, 16), 1 << 12);
-                cfg.pointer = address;
-        }
+   pan_pack(out + index, UNIFORM_BUFFER, cfg) {
+      cfg.entries = MIN2(DIV_ROUND_UP(size, 16), 1 << 12);
+      cfg.pointer = address;
+   }
 #endif
 }
 
 static mali_ptr
 panfrost_emit_const_buf(struct panfrost_batch *batch,
-                        enum pipe_shader_type stage,
-                        unsigned *buffer_count,
-                        mali_ptr *push_constants,
-                        unsigned *pushed_words)
+                        enum pipe_shader_type stage, unsigned *buffer_count,
+                        mali_ptr *push_constants, unsigned *pushed_words)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        struct panfrost_constant_buffer *buf = &ctx->constant_buffer[stage];
-        struct panfrost_compiled_shader *ss = ctx->prog[stage];
+   struct panfrost_context *ctx = batch->ctx;
+   struct panfrost_constant_buffer *buf = &ctx->constant_buffer[stage];
+   struct panfrost_compiled_shader *ss = ctx->prog[stage];
 
-        if (!ss)
-                return 0;
+   if (!ss)
+      return 0;
 
-        /* Allocate room for the sysval and the uniforms */
-        size_t sys_size = sizeof(float) * 4 * ss->info.sysvals.sysval_count;
-        struct panfrost_ptr transfer =
-                pan_pool_alloc_aligned(&batch->pool.base, sys_size, 16);
+   /* Allocate room for the sysval and the uniforms */
+   size_t sys_size = sizeof(float) * 4 * ss->info.sysvals.sysval_count;
+   struct panfrost_ptr transfer =
+      pan_pool_alloc_aligned(&batch->pool.base, sys_size, 16);
 
-        /* Upload sysvals requested by the shader */
-        uint8_t *sysvals = alloca(sys_size);
-        panfrost_upload_sysvals(batch, sysvals, transfer.gpu, ss, stage);
-        memcpy(transfer.cpu, sysvals, sys_size);
+   /* Upload sysvals requested by the shader */
+   uint8_t *sysvals = alloca(sys_size);
+   panfrost_upload_sysvals(batch, sysvals, transfer.gpu, ss, stage);
+   memcpy(transfer.cpu, sysvals, sys_size);
 
-        /* Next up, attach UBOs. UBO count includes gaps but no sysval UBO */
-        struct panfrost_compiled_shader *shader = ctx->prog[stage];
-        unsigned ubo_count = shader->info.ubo_count - (sys_size ? 1 : 0);
-        unsigned sysval_ubo = sys_size ? ubo_count : ~0;
-        struct panfrost_ptr ubos = { 0 };
+   /* Next up, attach UBOs. UBO count includes gaps but no sysval UBO */
+   struct panfrost_compiled_shader *shader = ctx->prog[stage];
+   unsigned ubo_count = shader->info.ubo_count - (sys_size ? 1 : 0);
+   unsigned sysval_ubo = sys_size ? ubo_count : ~0;
+   struct panfrost_ptr ubos = {0};
 
 #if PAN_ARCH >= 9
-        ubos = pan_pool_alloc_desc_array(&batch->pool.base,
-                                         ubo_count + 1,
-                                         BUFFER);
+   ubos = pan_pool_alloc_desc_array(&batch->pool.base, ubo_count + 1, BUFFER);
 #else
-        ubos = pan_pool_alloc_desc_array(&batch->pool.base,
-                                         ubo_count + 1,
-                                         UNIFORM_BUFFER);
+   ubos = pan_pool_alloc_desc_array(&batch->pool.base, ubo_count + 1,
+                                    UNIFORM_BUFFER);
 #endif
 
-        if (buffer_count)
-                *buffer_count = ubo_count + (sys_size ? 1 : 0);
+   if (buffer_count)
+      *buffer_count = ubo_count + (sys_size ? 1 : 0);
 
-        /* Upload sysval as a final UBO */
+   /* Upload sysval as a final UBO */
 
-        if (sys_size)
-                panfrost_emit_ubo(ubos.cpu, ubo_count, transfer.gpu, sys_size);
+   if (sys_size)
+      panfrost_emit_ubo(ubos.cpu, ubo_count, transfer.gpu, sys_size);
 
-        /* The rest are honest-to-goodness UBOs */
+   /* The rest are honest-to-goodness UBOs */
 
-        u_foreach_bit(ubo, ss->info.ubo_mask & buf->enabled_mask) {
-                size_t usz = buf->cb[ubo].buffer_size;
-                mali_ptr address = 0;
+   u_foreach_bit(ubo, ss->info.ubo_mask & buf->enabled_mask) {
+      size_t usz = buf->cb[ubo].buffer_size;
+      mali_ptr address = 0;
 
-                if (usz > 0) {
-                        address = panfrost_map_constant_buffer_gpu(batch,
-                                        stage, buf, ubo);
-                }
+      if (usz > 0) {
+         address = panfrost_map_constant_buffer_gpu(batch, stage, buf, ubo);
+      }
 
-                panfrost_emit_ubo(ubos.cpu, ubo, address, usz);
-        }
+      panfrost_emit_ubo(ubos.cpu, ubo, address, usz);
+   }
 
-        if (pushed_words)
-                *pushed_words = ss->info.push.count;
+   if (pushed_words)
+      *pushed_words = ss->info.push.count;
 
-        if (ss->info.push.count == 0)
-                return ubos.gpu;
+   if (ss->info.push.count == 0)
+      return ubos.gpu;
 
-        /* Copy push constants required by the shader */
-        struct panfrost_ptr push_transfer =
-                pan_pool_alloc_aligned(&batch->pool.base,
-                                       ss->info.push.count * 4, 16);
+   /* Copy push constants required by the shader */
+   struct panfrost_ptr push_transfer =
+      pan_pool_alloc_aligned(&batch->pool.base, ss->info.push.count * 4, 16);
 
-        uint32_t *push_cpu = (uint32_t *) push_transfer.cpu;
-        *push_constants = push_transfer.gpu;
+   uint32_t *push_cpu = (uint32_t *)push_transfer.cpu;
+   *push_constants = push_transfer.gpu;
 
-        for (unsigned i = 0; i < ss->info.push.count; ++i) {
-                struct panfrost_ubo_word src = ss->info.push.words[i];
+   for (unsigned i = 0; i < ss->info.push.count; ++i) {
+      struct panfrost_ubo_word src = ss->info.push.words[i];
 
-                if (src.ubo == sysval_ubo) {
-                        unsigned sysval_idx = src.offset / 16;
-                        unsigned sysval_comp = (src.offset % 16) / 4;
-                        unsigned sysval_type = PAN_SYSVAL_TYPE(ss->info.sysvals.sysvals[sysval_idx]);
-                        mali_ptr ptr = push_transfer.gpu + (4 * i);
+      if (src.ubo == sysval_ubo) {
+         unsigned sysval_idx = src.offset / 16;
+         unsigned sysval_comp = (src.offset % 16) / 4;
+         unsigned sysval_type =
+            PAN_SYSVAL_TYPE(ss->info.sysvals.sysvals[sysval_idx]);
+         mali_ptr ptr = push_transfer.gpu + (4 * i);
 
-                        if (sysval_type == PAN_SYSVAL_NUM_WORK_GROUPS)
-                                batch->num_wg_sysval[sysval_comp] = ptr;
-                }
-                /* Map the UBO, this should be cheap. For some buffers this may
-                 * read from write-combine memory which is slow, though :-(
-                 */
-                const void *mapped_ubo = (src.ubo == sysval_ubo) ? sysvals :
-                        panfrost_map_constant_buffer_cpu(ctx, buf, src.ubo);
+         if (sysval_type == PAN_SYSVAL_NUM_WORK_GROUPS)
+            batch->num_wg_sysval[sysval_comp] = ptr;
+      }
+      /* Map the UBO, this should be cheap. For some buffers this may
+       * read from write-combine memory which is slow, though :-(
+       */
+      const void *mapped_ubo =
+         (src.ubo == sysval_ubo)
+            ? sysvals
+            : panfrost_map_constant_buffer_cpu(ctx, buf, src.ubo);
 
-                /* TODO: Is there any benefit to combining ranges */
-                memcpy(push_cpu + i, (uint8_t *) mapped_ubo + src.offset, 4);
-        }
+      /* TODO: Is there any benefit to combining ranges */
+      memcpy(push_cpu + i, (uint8_t *)mapped_ubo + src.offset, 4);
+   }
 
-        return ubos.gpu;
+   return ubos.gpu;
 }
 
 /*
@@ -1592,71 +1572,66 @@ panfrost_emit_const_buf(struct panfrost_batch *batch,
 static unsigned
 panfrost_choose_wls_instance_count(const struct pipe_grid_info *grid)
 {
-        if (grid->indirect) {
-                /* May need tuning in the future, conservative guess */
-                return 128;
-        } else {
-                return util_next_power_of_two(grid->grid[0]) *
-                       util_next_power_of_two(grid->grid[1]) *
-                       util_next_power_of_two(grid->grid[2]);
-        }
+   if (grid->indirect) {
+      /* May need tuning in the future, conservative guess */
+      return 128;
+   } else {
+      return util_next_power_of_two(grid->grid[0]) *
+             util_next_power_of_two(grid->grid[1]) *
+             util_next_power_of_two(grid->grid[2]);
+   }
 }
 
 static mali_ptr
 panfrost_emit_shared_memory(struct panfrost_batch *batch,
                             const struct pipe_grid_info *grid)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        struct panfrost_device *dev = pan_device(ctx->base.screen);
-        struct panfrost_compiled_shader *ss = ctx->prog[PIPE_SHADER_COMPUTE];
-        struct panfrost_ptr t =
-                pan_pool_alloc_desc(&batch->pool.base, LOCAL_STORAGE);
+   struct panfrost_context *ctx = batch->ctx;
+   struct panfrost_device *dev = pan_device(ctx->base.screen);
+   struct panfrost_compiled_shader *ss = ctx->prog[PIPE_SHADER_COMPUTE];
+   struct panfrost_ptr t =
+      pan_pool_alloc_desc(&batch->pool.base, LOCAL_STORAGE);
 
-        struct pan_tls_info info = {
-                .tls.size = ss->info.tls_size,
-                .wls.size = ss->info.wls_size + grid->variable_shared_mem,
-                .wls.instances = panfrost_choose_wls_instance_count(grid),
-        };
+   struct pan_tls_info info = {
+      .tls.size = ss->info.tls_size,
+      .wls.size = ss->info.wls_size + grid->variable_shared_mem,
+      .wls.instances = panfrost_choose_wls_instance_count(grid),
+   };
 
-        if (ss->info.tls_size) {
-                struct panfrost_bo *bo =
-                        panfrost_batch_get_scratchpad(batch,
-                                                      ss->info.tls_size,
-                                                      dev->thread_tls_alloc,
-                                                      dev->core_id_range);
-                info.tls.ptr = bo->ptr.gpu;
-        }
+   if (ss->info.tls_size) {
+      struct panfrost_bo *bo = panfrost_batch_get_scratchpad(
+         batch, ss->info.tls_size, dev->thread_tls_alloc, dev->core_id_range);
+      info.tls.ptr = bo->ptr.gpu;
+   }
 
-        if (ss->info.wls_size) {
-                unsigned size = pan_wls_adjust_size(info.wls.size) *
-                                info.wls.instances * dev->core_id_range;
+   if (ss->info.wls_size) {
+      unsigned size = pan_wls_adjust_size(info.wls.size) * info.wls.instances *
+                      dev->core_id_range;
 
-                struct panfrost_bo *bo =
-                        panfrost_batch_get_shared_memory(batch, size, 1);
+      struct panfrost_bo *bo = panfrost_batch_get_shared_memory(batch, size, 1);
 
-                info.wls.ptr = bo->ptr.gpu;
-        }
+      info.wls.ptr = bo->ptr.gpu;
+   }
 
-        GENX(pan_emit_tls)(&info, t.cpu);
-        return t.gpu;
+   GENX(pan_emit_tls)(&info, t.cpu);
+   return t.gpu;
 }
 
 #if PAN_ARCH <= 5
 static mali_ptr
-panfrost_get_tex_desc(struct panfrost_batch *batch,
-                      enum pipe_shader_type st,
+panfrost_get_tex_desc(struct panfrost_batch *batch, enum pipe_shader_type st,
                       struct panfrost_sampler_view *view)
 {
-        if (!view)
-                return (mali_ptr) 0;
+   if (!view)
+      return (mali_ptr)0;
 
-        struct pipe_sampler_view *pview = &view->base;
-        struct panfrost_resource *rsrc = pan_resource(pview->texture);
+   struct pipe_sampler_view *pview = &view->base;
+   struct panfrost_resource *rsrc = pan_resource(pview->texture);
 
-        panfrost_batch_read_rsrc(batch, rsrc, st);
-        panfrost_batch_add_bo(batch, view->state.bo, st);
+   panfrost_batch_read_rsrc(batch, rsrc, st);
+   panfrost_batch_add_bo(batch, view->state.bo, st);
 
-        return view->state.gpu;
+   return view->state.gpu;
 }
 #endif
 
@@ -1665,155 +1640,150 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so,
                                 struct pipe_context *pctx,
                                 struct pipe_resource *texture)
 {
-        struct panfrost_device *device = pan_device(pctx->screen);
-        struct panfrost_context *ctx = pan_context(pctx);
-        struct panfrost_resource *prsrc = (struct panfrost_resource *)texture;
-        enum pipe_format format = so->base.format;
-        assert(prsrc->image.data.bo);
+   struct panfrost_device *device = pan_device(pctx->screen);
+   struct panfrost_context *ctx = pan_context(pctx);
+   struct panfrost_resource *prsrc = (struct panfrost_resource *)texture;
+   enum pipe_format format = so->base.format;
+   assert(prsrc->image.data.bo);
 
-        /* Format to access the stencil/depth portion of a Z32_S8 texture */
-        if (format == PIPE_FORMAT_X32_S8X24_UINT) {
-                assert(prsrc->separate_stencil);
-                texture = &prsrc->separate_stencil->base;
-                prsrc = (struct panfrost_resource *)texture;
-                format = texture->format;
-        } else if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
-                format = PIPE_FORMAT_Z32_FLOAT;
-        }
+   /* Format to access the stencil/depth portion of a Z32_S8 texture */
+   if (format == PIPE_FORMAT_X32_S8X24_UINT) {
+      assert(prsrc->separate_stencil);
+      texture = &prsrc->separate_stencil->base;
+      prsrc = (struct panfrost_resource *)texture;
+      format = texture->format;
+   } else if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
+      format = PIPE_FORMAT_Z32_FLOAT;
+   }
 
-        so->texture_bo = prsrc->image.data.bo->ptr.gpu;
-        so->modifier = prsrc->image.layout.modifier;
+   so->texture_bo = prsrc->image.data.bo->ptr.gpu;
+   so->modifier = prsrc->image.layout.modifier;
 
-        /* MSAA only supported for 2D textures */
+   /* MSAA only supported for 2D textures */
 
-        assert(texture->nr_samples <= 1 ||
-               so->base.target == PIPE_TEXTURE_2D ||
-               so->base.target == PIPE_TEXTURE_2D_ARRAY);
+   assert(texture->nr_samples <= 1 || so->base.target == PIPE_TEXTURE_2D ||
+          so->base.target == PIPE_TEXTURE_2D_ARRAY);
 
-        enum mali_texture_dimension type =
-                panfrost_translate_texture_dimension(so->base.target);
+   enum mali_texture_dimension type =
+      panfrost_translate_texture_dimension(so->base.target);
 
-        bool is_buffer = (so->base.target == PIPE_BUFFER);
+   bool is_buffer = (so->base.target == PIPE_BUFFER);
 
-        unsigned first_level = is_buffer ? 0 : so->base.u.tex.first_level;
-        unsigned last_level = is_buffer ? 0 : so->base.u.tex.last_level;
-        unsigned first_layer = is_buffer ? 0 : so->base.u.tex.first_layer;
-        unsigned last_layer = is_buffer ? 0 : so->base.u.tex.last_layer;
-        unsigned buf_offset = is_buffer ? so->base.u.buf.offset : 0;
-        unsigned buf_size = (is_buffer ? so->base.u.buf.size : 0) /
-                            util_format_get_blocksize(format);
+   unsigned first_level = is_buffer ? 0 : so->base.u.tex.first_level;
+   unsigned last_level = is_buffer ? 0 : so->base.u.tex.last_level;
+   unsigned first_layer = is_buffer ? 0 : so->base.u.tex.first_layer;
+   unsigned last_layer = is_buffer ? 0 : so->base.u.tex.last_layer;
+   unsigned buf_offset = is_buffer ? so->base.u.buf.offset : 0;
+   unsigned buf_size =
+      (is_buffer ? so->base.u.buf.size : 0) / util_format_get_blocksize(format);
 
-        if (so->base.target == PIPE_TEXTURE_3D) {
-                first_layer /= prsrc->image.layout.depth;
-                last_layer /= prsrc->image.layout.depth;
-                assert(!first_layer && !last_layer);
-        }
+   if (so->base.target == PIPE_TEXTURE_3D) {
+      first_layer /= prsrc->image.layout.depth;
+      last_layer /= prsrc->image.layout.depth;
+      assert(!first_layer && !last_layer);
+   }
 
-        struct pan_image_view iview = {
-                .format = format,
-                .dim = type,
-                .first_level = first_level,
-                .last_level = last_level,
-                .first_layer = first_layer,
-                .last_layer = last_layer,
-                .swizzle = {
-                        so->base.swizzle_r,
-                        so->base.swizzle_g,
-                        so->base.swizzle_b,
-                        so->base.swizzle_a,
-                },
-                .image = &prsrc->image,
+   struct pan_image_view iview = {
+      .format = format,
+      .dim = type,
+      .first_level = first_level,
+      .last_level = last_level,
+      .first_layer = first_layer,
+      .last_layer = last_layer,
+      .swizzle =
+         {
+            so->base.swizzle_r,
+            so->base.swizzle_g,
+            so->base.swizzle_b,
+            so->base.swizzle_a,
+         },
+      .image = &prsrc->image,
 
-                .buf.offset = buf_offset,
-                .buf.size = buf_size,
-        };
+      .buf.offset = buf_offset,
+      .buf.size = buf_size,
+   };
 
-        unsigned size =
-                (PAN_ARCH <= 5 ? pan_size(TEXTURE) : 0) +
-                GENX(panfrost_estimate_texture_payload_size)(&iview);
+   unsigned size = (PAN_ARCH <= 5 ? pan_size(TEXTURE) : 0) +
+                   GENX(panfrost_estimate_texture_payload_size)(&iview);
 
-        struct panfrost_pool *pool = so->pool ?: &ctx->descs;
-        struct panfrost_ptr payload = pan_pool_alloc_aligned(&pool->base, size, 64);
-        so->state = panfrost_pool_take_ref(&ctx->descs, payload.gpu);
+   struct panfrost_pool *pool = so->pool ?: &ctx->descs;
+   struct panfrost_ptr payload = pan_pool_alloc_aligned(&pool->base, size, 64);
+   so->state = panfrost_pool_take_ref(&ctx->descs, payload.gpu);
 
-        void *tex = (PAN_ARCH >= 6) ? &so->bifrost_descriptor : payload.cpu;
+   void *tex = (PAN_ARCH >= 6) ? &so->bifrost_descriptor : payload.cpu;
 
-        if (PAN_ARCH <= 5) {
-                payload.cpu += pan_size(TEXTURE);
-                payload.gpu += pan_size(TEXTURE);
-        }
+   if (PAN_ARCH <= 5) {
+      payload.cpu += pan_size(TEXTURE);
+      payload.gpu += pan_size(TEXTURE);
+   }
 
-        GENX(panfrost_new_texture)(device, &iview, tex, &payload);
+   GENX(panfrost_new_texture)(device, &iview, tex, &payload);
 }
 
 static void
 panfrost_update_sampler_view(struct panfrost_sampler_view *view,
                              struct pipe_context *pctx)
 {
-        struct panfrost_resource *rsrc = pan_resource(view->base.texture);
-        if (view->texture_bo != rsrc->image.data.bo->ptr.gpu ||
-            view->modifier != rsrc->image.layout.modifier) {
-                panfrost_bo_unreference(view->state.bo);
-                panfrost_create_sampler_view_bo(view, pctx, &rsrc->base);
-        }
+   struct panfrost_resource *rsrc = pan_resource(view->base.texture);
+   if (view->texture_bo != rsrc->image.data.bo->ptr.gpu ||
+       view->modifier != rsrc->image.layout.modifier) {
+      panfrost_bo_unreference(view->state.bo);
+      panfrost_create_sampler_view_bo(view, pctx, &rsrc->base);
+   }
 }
 
 static mali_ptr
 panfrost_emit_texture_descriptors(struct panfrost_batch *batch,
                                   enum pipe_shader_type stage)
 {
-        struct panfrost_context *ctx = batch->ctx;
+   struct panfrost_context *ctx = batch->ctx;
 
-        if (!ctx->sampler_view_count[stage])
-                return 0;
+   if (!ctx->sampler_view_count[stage])
+      return 0;
 
 #if PAN_ARCH >= 6
-        struct panfrost_ptr T =
-                pan_pool_alloc_desc_array(&batch->pool.base,
-                                          ctx->sampler_view_count[stage],
-                                          TEXTURE);
-        struct mali_texture_packed *out =
-                (struct mali_texture_packed *) T.cpu;
+   struct panfrost_ptr T = pan_pool_alloc_desc_array(
+      &batch->pool.base, ctx->sampler_view_count[stage], TEXTURE);
+   struct mali_texture_packed *out = (struct mali_texture_packed *)T.cpu;
 
-        for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
-                struct panfrost_sampler_view *view = ctx->sampler_views[stage][i];
+   for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
+      struct panfrost_sampler_view *view = ctx->sampler_views[stage][i];
 
-                if (!view) {
-                        memset(&out[i], 0, sizeof(out[i]));
-                        continue;
-                }
+      if (!view) {
+         memset(&out[i], 0, sizeof(out[i]));
+         continue;
+      }
 
-                struct pipe_sampler_view *pview = &view->base;
-                struct panfrost_resource *rsrc = pan_resource(pview->texture);
+      struct pipe_sampler_view *pview = &view->base;
+      struct panfrost_resource *rsrc = pan_resource(pview->texture);
 
-                panfrost_update_sampler_view(view, &ctx->base);
-                out[i] = view->bifrost_descriptor;
+      panfrost_update_sampler_view(view, &ctx->base);
+      out[i] = view->bifrost_descriptor;
 
-                panfrost_batch_read_rsrc(batch, rsrc, stage);
-                panfrost_batch_add_bo(batch, view->state.bo, stage);
-        }
+      panfrost_batch_read_rsrc(batch, rsrc, stage);
+      panfrost_batch_add_bo(batch, view->state.bo, stage);
+   }
 
-        return T.gpu;
+   return T.gpu;
 #else
-        uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+   uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
 
-        for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
-                struct panfrost_sampler_view *view = ctx->sampler_views[stage][i];
+   for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
+      struct panfrost_sampler_view *view = ctx->sampler_views[stage][i];
 
-                if (!view) {
-                        trampolines[i] = 0;
-                        continue;
-                }
+      if (!view) {
+         trampolines[i] = 0;
+         continue;
+      }
 
-                panfrost_update_sampler_view(view, &ctx->base);
+      panfrost_update_sampler_view(view, &ctx->base);
 
-                trampolines[i] = panfrost_get_tex_desc(batch, stage, view);
-        }
+      trampolines[i] = panfrost_get_tex_desc(batch, stage, view);
+   }
 
-        return pan_pool_upload_aligned(&batch->pool.base, trampolines,
-                                       sizeof(uint64_t) *
-                                       ctx->sampler_view_count[stage],
-                                       sizeof(uint64_t));
+   return pan_pool_upload_aligned(
+      &batch->pool.base, trampolines,
+      sizeof(uint64_t) * ctx->sampler_view_count[stage], sizeof(uint64_t));
 #endif
 }
 
@@ -1821,60 +1791,59 @@ static mali_ptr
 panfrost_emit_sampler_descriptors(struct panfrost_batch *batch,
                                   enum pipe_shader_type stage)
 {
-        struct panfrost_context *ctx = batch->ctx;
+   struct panfrost_context *ctx = batch->ctx;
 
-        if (!ctx->sampler_count[stage])
-                return 0;
+   if (!ctx->sampler_count[stage])
+      return 0;
 
-        struct panfrost_ptr T =
-                pan_pool_alloc_desc_array(&batch->pool.base,
-                                          ctx->sampler_count[stage],
-                                          SAMPLER);
-        struct mali_sampler_packed *out = (struct mali_sampler_packed *) T.cpu;
+   struct panfrost_ptr T = pan_pool_alloc_desc_array(
+      &batch->pool.base, ctx->sampler_count[stage], SAMPLER);
+   struct mali_sampler_packed *out = (struct mali_sampler_packed *)T.cpu;
 
-        for (unsigned i = 0; i < ctx->sampler_count[stage]; ++i) {
-                struct panfrost_sampler_state *st = ctx->samplers[stage][i];
+   for (unsigned i = 0; i < ctx->sampler_count[stage]; ++i) {
+      struct panfrost_sampler_state *st = ctx->samplers[stage][i];
 
-                out[i] = st ? st->hw : (struct mali_sampler_packed){0};
-        }
+      out[i] = st ? st->hw : (struct mali_sampler_packed){0};
+   }
 
-        return T.gpu;
+   return T.gpu;
 }
 
 #if PAN_ARCH <= 7
 /* Packs all image attribute descs and attribute buffer descs.
- * `first_image_buf_index` must be the index of the first image attribute buffer descriptor.
+ * `first_image_buf_index` must be the index of the first image attribute buffer
+ * descriptor.
  */
 static void
 emit_image_attribs(struct panfrost_context *ctx, enum pipe_shader_type shader,
                    struct mali_attribute_packed *attribs, unsigned first_buf)
 {
-        struct panfrost_device *dev = pan_device(ctx->base.screen);
-        unsigned last_bit = util_last_bit(ctx->image_mask[shader]);
+   struct panfrost_device *dev = pan_device(ctx->base.screen);
+   unsigned last_bit = util_last_bit(ctx->image_mask[shader]);
 
-        for (unsigned i = 0; i < last_bit; ++i) {
-                enum pipe_format format = ctx->images[shader][i].format;
+   for (unsigned i = 0; i < last_bit; ++i) {
+      enum pipe_format format = ctx->images[shader][i].format;
 
-                pan_pack(attribs + i, ATTRIBUTE, cfg) {
-                        /* Continuation record means 2 buffers per image */
-                        cfg.buffer_index = first_buf + (i * 2);
-                        cfg.offset_enable = (PAN_ARCH <= 5);
-                        cfg.format = dev->formats[format].hw;
-                }
-        }
+      pan_pack(attribs + i, ATTRIBUTE, cfg) {
+         /* Continuation record means 2 buffers per image */
+         cfg.buffer_index = first_buf + (i * 2);
+         cfg.offset_enable = (PAN_ARCH <= 5);
+         cfg.format = dev->formats[format].hw;
+      }
+   }
 }
 
 static enum mali_attribute_type
 pan_modifier_to_attr_type(uint64_t modifier)
 {
-        switch (modifier) {
-        case DRM_FORMAT_MOD_LINEAR:
-                return MALI_ATTRIBUTE_TYPE_3D_LINEAR;
-        case DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED:
-                return MALI_ATTRIBUTE_TYPE_3D_INTERLEAVED;
-        default:
-                unreachable("Invalid modifier for attribute record");
-        }
+   switch (modifier) {
+   case DRM_FORMAT_MOD_LINEAR:
+      return MALI_ATTRIBUTE_TYPE_3D_LINEAR;
+   case DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED:
+      return MALI_ATTRIBUTE_TYPE_3D_INTERLEAVED;
+   default:
+      unreachable("Invalid modifier for attribute record");
+   }
 }
 
 static void
@@ -1882,347 +1851,345 @@ emit_image_bufs(struct panfrost_batch *batch, enum pipe_shader_type shader,
                 struct mali_attribute_buffer_packed *bufs,
                 unsigned first_image_buf_index)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        unsigned last_bit = util_last_bit(ctx->image_mask[shader]);
+   struct panfrost_context *ctx = batch->ctx;
+   unsigned last_bit = util_last_bit(ctx->image_mask[shader]);
 
-        for (unsigned i = 0; i < last_bit; ++i) {
-                struct pipe_image_view *image = &ctx->images[shader][i];
+   for (unsigned i = 0; i < last_bit; ++i) {
+      struct pipe_image_view *image = &ctx->images[shader][i];
 
-                if (!(ctx->image_mask[shader] & (1 << i)) ||
-                    !(image->shader_access & PIPE_IMAGE_ACCESS_READ_WRITE)) {
-                        /* Unused image bindings */
-                        pan_pack(bufs + (i * 2), ATTRIBUTE_BUFFER, cfg);
-                        pan_pack(bufs + (i * 2) + 1, ATTRIBUTE_BUFFER, cfg);
-                        continue;
-                }
+      if (!(ctx->image_mask[shader] & (1 << i)) ||
+          !(image->shader_access & PIPE_IMAGE_ACCESS_READ_WRITE)) {
+         /* Unused image bindings */
+         pan_pack(bufs + (i * 2), ATTRIBUTE_BUFFER, cfg)
+            ;
+         pan_pack(bufs + (i * 2) + 1, ATTRIBUTE_BUFFER, cfg)
+            ;
+         continue;
+      }
 
-                struct panfrost_resource *rsrc = pan_resource(image->resource);
+      struct panfrost_resource *rsrc = pan_resource(image->resource);
 
-                /* TODO: MSAA */
-                assert(image->resource->nr_samples <= 1 && "MSAA'd images not supported");
+      /* TODO: MSAA */
+      assert(image->resource->nr_samples <= 1 && "MSAA'd images not supported");
 
-                bool is_3d = rsrc->base.target == PIPE_TEXTURE_3D;
-                bool is_buffer = rsrc->base.target == PIPE_BUFFER;
+      bool is_3d = rsrc->base.target == PIPE_TEXTURE_3D;
+      bool is_buffer = rsrc->base.target == PIPE_BUFFER;
 
-                unsigned offset = is_buffer ? image->u.buf.offset :
-                        panfrost_texture_offset(&rsrc->image.layout,
-                                                image->u.tex.level,
-                                                is_3d ? 0 : image->u.tex.first_layer,
-                                                is_3d ? image->u.tex.first_layer : 0);
+      unsigned offset = is_buffer ? image->u.buf.offset
+                                  : panfrost_texture_offset(
+                                       &rsrc->image.layout, image->u.tex.level,
+                                       is_3d ? 0 : image->u.tex.first_layer,
+                                       is_3d ? image->u.tex.first_layer : 0);
 
-                panfrost_track_image_access(batch, shader, image);
+      panfrost_track_image_access(batch, shader, image);
 
-                pan_pack(bufs + (i * 2), ATTRIBUTE_BUFFER, cfg) {
-                        cfg.type = pan_modifier_to_attr_type(rsrc->image.layout.modifier);
-                        cfg.pointer = rsrc->image.data.bo->ptr.gpu + offset;
-                        cfg.stride = util_format_get_blocksize(image->format);
-                        cfg.size = rsrc->image.data.bo->size - offset;
-                }
+      pan_pack(bufs + (i * 2), ATTRIBUTE_BUFFER, cfg) {
+         cfg.type = pan_modifier_to_attr_type(rsrc->image.layout.modifier);
+         cfg.pointer = rsrc->image.data.bo->ptr.gpu + offset;
+         cfg.stride = util_format_get_blocksize(image->format);
+         cfg.size = rsrc->image.data.bo->size - offset;
+      }
 
-                if (is_buffer) {
-                        pan_pack(bufs + (i * 2) + 1, ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) {
-                                cfg.s_dimension = rsrc->base.width0 /
-                                        util_format_get_blocksize(image->format);
-                                cfg.t_dimension = cfg.r_dimension = 1;
-                        }
+      if (is_buffer) {
+         pan_pack(bufs + (i * 2) + 1, ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) {
+            cfg.s_dimension =
+               rsrc->base.width0 / util_format_get_blocksize(image->format);
+            cfg.t_dimension = cfg.r_dimension = 1;
+         }
 
-                        continue;
-                }
+         continue;
+      }
 
-                pan_pack(bufs + (i * 2) + 1, ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) {
-                        unsigned level = image->u.tex.level;
+      pan_pack(bufs + (i * 2) + 1, ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) {
+         unsigned level = image->u.tex.level;
 
-                        cfg.s_dimension = u_minify(rsrc->base.width0, level);
-                        cfg.t_dimension = u_minify(rsrc->base.height0, level);
-                        cfg.r_dimension = is_3d ?
-                                u_minify(rsrc->base.depth0, level) :
-                                image->u.tex.last_layer - image->u.tex.first_layer + 1;
+         cfg.s_dimension = u_minify(rsrc->base.width0, level);
+         cfg.t_dimension = u_minify(rsrc->base.height0, level);
+         cfg.r_dimension =
+            is_3d ? u_minify(rsrc->base.depth0, level)
+                  : image->u.tex.last_layer - image->u.tex.first_layer + 1;
 
-                        cfg.row_stride =
-                                rsrc->image.layout.slices[level].row_stride;
+         cfg.row_stride = rsrc->image.layout.slices[level].row_stride;
 
-                        if (rsrc->base.target != PIPE_TEXTURE_2D) {
-                                cfg.slice_stride =
-                                        panfrost_get_layer_stride(&rsrc->image.layout,
-                                                                  level);
-                        }
-                }
-        }
+         if (rsrc->base.target != PIPE_TEXTURE_2D) {
+            cfg.slice_stride =
+               panfrost_get_layer_stride(&rsrc->image.layout, level);
+         }
+      }
+   }
 }
 
 static mali_ptr
-panfrost_emit_image_attribs(struct panfrost_batch *batch,
-                            mali_ptr *buffers,
+panfrost_emit_image_attribs(struct panfrost_batch *batch, mali_ptr *buffers,
                             enum pipe_shader_type type)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        struct panfrost_compiled_shader *shader = ctx->prog[type];
+   struct panfrost_context *ctx = batch->ctx;
+   struct panfrost_compiled_shader *shader = ctx->prog[type];
 
-        if (!shader->info.attribute_count) {
-                *buffers = 0;
-                return 0;
-        }
+   if (!shader->info.attribute_count) {
+      *buffers = 0;
+      return 0;
+   }
 
-        /* Images always need a MALI_ATTRIBUTE_BUFFER_CONTINUATION_3D */
-        unsigned attr_count = shader->info.attribute_count;
-        unsigned buf_count = (attr_count * 2) + (PAN_ARCH >= 6 ? 1 : 0);
+   /* Images always need a MALI_ATTRIBUTE_BUFFER_CONTINUATION_3D */
+   unsigned attr_count = shader->info.attribute_count;
+   unsigned buf_count = (attr_count * 2) + (PAN_ARCH >= 6 ? 1 : 0);
 
-        struct panfrost_ptr bufs =
-                pan_pool_alloc_desc_array(&batch->pool.base, buf_count, ATTRIBUTE_BUFFER);
+   struct panfrost_ptr bufs =
+      pan_pool_alloc_desc_array(&batch->pool.base, buf_count, ATTRIBUTE_BUFFER);
 
-        struct panfrost_ptr attribs =
-                pan_pool_alloc_desc_array(&batch->pool.base, attr_count, ATTRIBUTE);
+   struct panfrost_ptr attribs =
+      pan_pool_alloc_desc_array(&batch->pool.base, attr_count, ATTRIBUTE);
 
-        emit_image_attribs(ctx, type, attribs.cpu, 0);
-        emit_image_bufs(batch, type, bufs.cpu, 0);
+   emit_image_attribs(ctx, type, attribs.cpu, 0);
+   emit_image_bufs(batch, type, bufs.cpu, 0);
 
-        /* We need an empty attrib buf to stop the prefetching on Bifrost */
+   /* We need an empty attrib buf to stop the prefetching on Bifrost */
 #if PAN_ARCH >= 6
-        pan_pack(bufs.cpu + ((buf_count - 1) * pan_size(ATTRIBUTE_BUFFER)),
-                 ATTRIBUTE_BUFFER, cfg);
+   pan_pack(bufs.cpu + ((buf_count - 1) * pan_size(ATTRIBUTE_BUFFER)),
+            ATTRIBUTE_BUFFER, cfg)
+      ;
 #endif
 
-        *buffers = bufs.gpu;
-        return attribs.gpu;
+   *buffers = bufs.gpu;
+   return attribs.gpu;
 }
 
 static mali_ptr
-panfrost_emit_vertex_data(struct panfrost_batch *batch,
-                          mali_ptr *buffers)
+panfrost_emit_vertex_data(struct panfrost_batch *batch, mali_ptr *buffers)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        struct panfrost_vertex_state *so = ctx->vertex;
-        struct panfrost_compiled_shader *vs = ctx->prog[PIPE_SHADER_VERTEX];
-        bool instanced = ctx->instance_count > 1;
-        uint32_t image_mask = ctx->image_mask[PIPE_SHADER_VERTEX];
-        unsigned nr_images = util_last_bit(image_mask);
+   struct panfrost_context *ctx = batch->ctx;
+   struct panfrost_vertex_state *so = ctx->vertex;
+   struct panfrost_compiled_shader *vs = ctx->prog[PIPE_SHADER_VERTEX];
+   bool instanced = ctx->instance_count > 1;
+   uint32_t image_mask = ctx->image_mask[PIPE_SHADER_VERTEX];
+   unsigned nr_images = util_last_bit(image_mask);
 
-        /* Worst case: everything is NPOT, which is only possible if instancing
-         * is enabled. Otherwise single record is gauranteed.
-         * Also, we allocate more memory than what's needed here if either instancing
-         * is enabled or images are present, this can be improved. */
-        unsigned bufs_per_attrib = (instanced || nr_images > 0) ? 2 : 1;
-        unsigned nr_bufs = ((so->nr_bufs + nr_images) * bufs_per_attrib) +
-                           (PAN_ARCH >= 6 ? 1 : 0);
+   /* Worst case: everything is NPOT, which is only possible if instancing
+    * is enabled. Otherwise single record is gauranteed.
+    * Also, we allocate more memory than what's needed here if either instancing
+    * is enabled or images are present, this can be improved. */
+   unsigned bufs_per_attrib = (instanced || nr_images > 0) ? 2 : 1;
+   unsigned nr_bufs =
+      ((so->nr_bufs + nr_images) * bufs_per_attrib) + (PAN_ARCH >= 6 ? 1 : 0);
 
-        unsigned count = vs->info.attribute_count;
+   unsigned count = vs->info.attribute_count;
 
-        struct panfrost_compiled_shader *xfb =
-                ctx->uncompiled[PIPE_SHADER_VERTEX]->xfb;
+   struct panfrost_compiled_shader *xfb =
+      ctx->uncompiled[PIPE_SHADER_VERTEX]->xfb;
 
-        if (xfb)
-                count = MAX2(count, xfb->info.attribute_count);
+   if (xfb)
+      count = MAX2(count, xfb->info.attribute_count);
 
 #if PAN_ARCH <= 5
-        /* Midgard needs vertexid/instanceid handled specially */
-        bool special_vbufs = count >= PAN_VERTEX_ID;
+   /* Midgard needs vertexid/instanceid handled specially */
+   bool special_vbufs = count >= PAN_VERTEX_ID;
 
-        if (special_vbufs)
-                nr_bufs += 2;
+   if (special_vbufs)
+      nr_bufs += 2;
 #endif
 
-        if (!nr_bufs) {
-                *buffers = 0;
-                return 0;
-        }
+   if (!nr_bufs) {
+      *buffers = 0;
+      return 0;
+   }
 
-        struct panfrost_ptr S =
-                pan_pool_alloc_desc_array(&batch->pool.base, nr_bufs,
-                                          ATTRIBUTE_BUFFER);
-        struct panfrost_ptr T =
-                pan_pool_alloc_desc_array(&batch->pool.base, count,
-                                          ATTRIBUTE);
+   struct panfrost_ptr S =
+      pan_pool_alloc_desc_array(&batch->pool.base, nr_bufs, ATTRIBUTE_BUFFER);
+   struct panfrost_ptr T =
+      pan_pool_alloc_desc_array(&batch->pool.base, count, ATTRIBUTE);
 
-        struct mali_attribute_buffer_packed *bufs =
-                (struct mali_attribute_buffer_packed *) S.cpu;
+   struct mali_attribute_buffer_packed *bufs =
+      (struct mali_attribute_buffer_packed *)S.cpu;
 
-        struct mali_attribute_packed *out =
-                (struct mali_attribute_packed *) T.cpu;
+   struct mali_attribute_packed *out = (struct mali_attribute_packed *)T.cpu;
 
-        unsigned attrib_to_buffer[PIPE_MAX_ATTRIBS] = { 0 };
-        unsigned k = 0;
+   unsigned attrib_to_buffer[PIPE_MAX_ATTRIBS] = {0};
+   unsigned k = 0;
 
-        for (unsigned i = 0; i < so->nr_bufs; ++i) {
-                unsigned vbi = so->buffers[i].vbi;
-                unsigned divisor = so->buffers[i].divisor;
-                attrib_to_buffer[i] = k;
+   for (unsigned i = 0; i < so->nr_bufs; ++i) {
+      unsigned vbi = so->buffers[i].vbi;
+      unsigned divisor = so->buffers[i].divisor;
+      attrib_to_buffer[i] = k;
 
-                if (!(ctx->vb_mask & (1 << vbi)))
-                        continue;
+      if (!(ctx->vb_mask & (1 << vbi)))
+         continue;
 
-                struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
-                struct panfrost_resource *rsrc;
+      struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
+      struct panfrost_resource *rsrc;
 
-                rsrc = pan_resource(buf->buffer.resource);
-                if (!rsrc)
-                        continue;
+      rsrc = pan_resource(buf->buffer.resource);
+      if (!rsrc)
+         continue;
 
-                panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
+      panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
 
-                /* Mask off lower bits, see offset fixup below */
-                mali_ptr raw_addr = rsrc->image.data.bo->ptr.gpu + buf->buffer_offset;
-                mali_ptr addr = raw_addr & ~63;
+      /* Mask off lower bits, see offset fixup below */
+      mali_ptr raw_addr = rsrc->image.data.bo->ptr.gpu + buf->buffer_offset;
+      mali_ptr addr = raw_addr & ~63;
 
-                /* Since we advanced the base pointer, we shrink the buffer
-                 * size, but add the offset we subtracted */
-                unsigned size = rsrc->base.width0 + (raw_addr - addr)
-                        - buf->buffer_offset;
+      /* Since we advanced the base pointer, we shrink the buffer
+       * size, but add the offset we subtracted */
+      unsigned size =
+         rsrc->base.width0 + (raw_addr - addr) - buf->buffer_offset;
 
-                /* When there is a divisor, the hardware-level divisor is
-                 * the product of the instance divisor and the padded count */
-                unsigned stride = buf->stride;
-                unsigned hw_divisor = ctx->padded_count * divisor;
+      /* When there is a divisor, the hardware-level divisor is
+       * the product of the instance divisor and the padded count */
+      unsigned stride = buf->stride;
+      unsigned hw_divisor = ctx->padded_count * divisor;
 
-                if (ctx->instance_count <= 1) {
-                        /* Per-instance would be every attribute equal */
-                        if (divisor)
-                                stride = 0;
+      if (ctx->instance_count <= 1) {
+         /* Per-instance would be every attribute equal */
+         if (divisor)
+            stride = 0;
 
-                        pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
-                                cfg.pointer = addr;
-                                cfg.stride = stride;
-                                cfg.size = size;
-                        }
-                } else if (!divisor) {
-                        pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
-                                cfg.type = MALI_ATTRIBUTE_TYPE_1D_MODULUS;
-                                cfg.pointer = addr;
-                                cfg.stride = stride;
-                                cfg.size = size;
-                                cfg.divisor = ctx->padded_count;
-                        }
-                } else if (util_is_power_of_two_or_zero(hw_divisor)) {
-                        pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
-                                cfg.type = MALI_ATTRIBUTE_TYPE_1D_POT_DIVISOR;
-                                cfg.pointer = addr;
-                                cfg.stride = stride;
-                                cfg.size = size;
-                                cfg.divisor_r = __builtin_ctz(hw_divisor);
-                        }
+         pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
+            cfg.pointer = addr;
+            cfg.stride = stride;
+            cfg.size = size;
+         }
+      } else if (!divisor) {
+         pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
+            cfg.type = MALI_ATTRIBUTE_TYPE_1D_MODULUS;
+            cfg.pointer = addr;
+            cfg.stride = stride;
+            cfg.size = size;
+            cfg.divisor = ctx->padded_count;
+         }
+      } else if (util_is_power_of_two_or_zero(hw_divisor)) {
+         pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
+            cfg.type = MALI_ATTRIBUTE_TYPE_1D_POT_DIVISOR;
+            cfg.pointer = addr;
+            cfg.stride = stride;
+            cfg.size = size;
+            cfg.divisor_r = __builtin_ctz(hw_divisor);
+         }
 
-                } else {
-                        unsigned shift = 0, extra_flags = 0;
+      } else {
+         unsigned shift = 0, extra_flags = 0;
 
-                        unsigned magic_divisor =
-                                panfrost_compute_magic_divisor(hw_divisor, &shift, &extra_flags);
+         unsigned magic_divisor =
+            panfrost_compute_magic_divisor(hw_divisor, &shift, &extra_flags);
 
-                        /* Records with continuations must be aligned */
-                        k = ALIGN_POT(k, 2);
-                        attrib_to_buffer[i] = k;
+         /* Records with continuations must be aligned */
+         k = ALIGN_POT(k, 2);
+         attrib_to_buffer[i] = k;
 
-                        pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
-                                cfg.type = MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR;
-                                cfg.pointer = addr;
-                                cfg.stride = stride;
-                                cfg.size = size;
+         pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
+            cfg.type = MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR;
+            cfg.pointer = addr;
+            cfg.stride = stride;
+            cfg.size = size;
 
-                                cfg.divisor_r = shift;
-                                cfg.divisor_e = extra_flags;
-                        }
+            cfg.divisor_r = shift;
+            cfg.divisor_e = extra_flags;
+         }
 
-                        pan_pack(bufs + k + 1, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) {
-                                cfg.divisor_numerator = magic_divisor;
-                                cfg.divisor = divisor;
-                        }
+         pan_pack(bufs + k + 1, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) {
+            cfg.divisor_numerator = magic_divisor;
+            cfg.divisor = divisor;
+         }
 
-                        ++k;
-                }
+         ++k;
+      }
 
-                ++k;
-        }
+      ++k;
+   }
 
 #if PAN_ARCH <= 5
-        /* Add special gl_VertexID/gl_InstanceID buffers */
-        if (special_vbufs) {
-                panfrost_vertex_id(ctx->padded_count, &bufs[k], ctx->instance_count > 1);
+   /* Add special gl_VertexID/gl_InstanceID buffers */
+   if (special_vbufs) {
+      panfrost_vertex_id(ctx->padded_count, &bufs[k], ctx->instance_count > 1);
 
-                pan_pack(out + PAN_VERTEX_ID, ATTRIBUTE, cfg) {
-                        cfg.buffer_index = k++;
-                        cfg.format = so->formats[PAN_VERTEX_ID];
-                }
+      pan_pack(out + PAN_VERTEX_ID, ATTRIBUTE, cfg) {
+         cfg.buffer_index = k++;
+         cfg.format = so->formats[PAN_VERTEX_ID];
+      }
 
-                panfrost_instance_id(ctx->padded_count, &bufs[k], ctx->instance_count > 1);
+      panfrost_instance_id(ctx->padded_count, &bufs[k],
+                           ctx->instance_count > 1);
 
-                pan_pack(out + PAN_INSTANCE_ID, ATTRIBUTE, cfg) {
-                        cfg.buffer_index = k++;
-                        cfg.format = so->formats[PAN_INSTANCE_ID];
-                }
-        }
+      pan_pack(out + PAN_INSTANCE_ID, ATTRIBUTE, cfg) {
+         cfg.buffer_index = k++;
+         cfg.format = so->formats[PAN_INSTANCE_ID];
+      }
+   }
 #endif
 
-        if (nr_images) {
-                k = ALIGN_POT(k, 2);
-                emit_image_attribs(ctx, PIPE_SHADER_VERTEX, out + so->num_elements, k);
-                emit_image_bufs(batch, PIPE_SHADER_VERTEX, bufs + k, k);
-                k += (util_last_bit(ctx->image_mask[PIPE_SHADER_VERTEX]) * 2);
-        }
+   if (nr_images) {
+      k = ALIGN_POT(k, 2);
+      emit_image_attribs(ctx, PIPE_SHADER_VERTEX, out + so->num_elements, k);
+      emit_image_bufs(batch, PIPE_SHADER_VERTEX, bufs + k, k);
+      k += (util_last_bit(ctx->image_mask[PIPE_SHADER_VERTEX]) * 2);
+   }
 
 #if PAN_ARCH >= 6
-        /* We need an empty attrib buf to stop the prefetching on Bifrost */
-        pan_pack(&bufs[k], ATTRIBUTE_BUFFER, cfg);
+   /* We need an empty attrib buf to stop the prefetching on Bifrost */
+   pan_pack(&bufs[k], ATTRIBUTE_BUFFER, cfg)
+      ;
 #endif
 
-        /* Attribute addresses require 64-byte alignment, so let:
-         *
-         *      base' = base & ~63 = base - (base & 63)
-         *      offset' = offset + (base & 63)
-         *
-         * Since base' + offset' = base + offset, these are equivalent
-         * addressing modes and now base is 64 aligned.
-         */
+   /* Attribute addresses require 64-byte alignment, so let:
+    *
+    *      base' = base & ~63 = base - (base & 63)
+    *      offset' = offset + (base & 63)
+    *
+    * Since base' + offset' = base + offset, these are equivalent
+    * addressing modes and now base is 64 aligned.
+    */
 
-        /* While these are usually equal, they are not required to be. In some
-         * cases, u_blitter passes too high a value for num_elements.
-         */
-        assert(vs->info.attributes_read_count <= so->num_elements);
+   /* While these are usually equal, they are not required to be. In some
+    * cases, u_blitter passes too high a value for num_elements.
+    */
+   assert(vs->info.attributes_read_count <= so->num_elements);
 
-        for (unsigned i = 0; i < vs->info.attributes_read_count; ++i) {
-                unsigned vbi = so->pipe[i].vertex_buffer_index;
-                struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
+   for (unsigned i = 0; i < vs->info.attributes_read_count; ++i) {
+      unsigned vbi = so->pipe[i].vertex_buffer_index;
+      struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
 
-                /* BOs are aligned; just fixup for buffer_offset */
-                signed src_offset = so->pipe[i].src_offset;
-                src_offset += (buf->buffer_offset & 63);
+      /* BOs are aligned; just fixup for buffer_offset */
+      signed src_offset = so->pipe[i].src_offset;
+      src_offset += (buf->buffer_offset & 63);
 
-                /* Base instance offset */
-                if (ctx->base_instance && so->pipe[i].instance_divisor) {
-                        src_offset += (ctx->base_instance * buf->stride) /
-                                      so->pipe[i].instance_divisor;
-                }
+      /* Base instance offset */
+      if (ctx->base_instance && so->pipe[i].instance_divisor) {
+         src_offset +=
+            (ctx->base_instance * buf->stride) / so->pipe[i].instance_divisor;
+      }
 
-                /* Also, somewhat obscurely per-instance data needs to be
-                 * offset in response to a delayed start in an indexed draw */
+      /* Also, somewhat obscurely per-instance data needs to be
+       * offset in response to a delayed start in an indexed draw */
 
-                if (so->pipe[i].instance_divisor && ctx->instance_count > 1)
-                        src_offset -= buf->stride * ctx->offset_start;
+      if (so->pipe[i].instance_divisor && ctx->instance_count > 1)
+         src_offset -= buf->stride * ctx->offset_start;
 
-                pan_pack(out + i, ATTRIBUTE, cfg) {
-                        cfg.buffer_index = attrib_to_buffer[so->element_buffer[i]];
-                        cfg.format = so->formats[i];
-                        cfg.offset = src_offset;
-                }
-        }
+      pan_pack(out + i, ATTRIBUTE, cfg) {
+         cfg.buffer_index = attrib_to_buffer[so->element_buffer[i]];
+         cfg.format = so->formats[i];
+         cfg.offset = src_offset;
+      }
+   }
 
-        *buffers = S.gpu;
-        return T.gpu;
+   *buffers = S.gpu;
+   return T.gpu;
 }
 
 static mali_ptr
 panfrost_emit_varyings(struct panfrost_batch *batch,
-                struct mali_attribute_buffer_packed *slot,
-                unsigned stride, unsigned count)
+                       struct mali_attribute_buffer_packed *slot,
+                       unsigned stride, unsigned count)
 {
-        unsigned size = stride * count;
-        mali_ptr ptr =
-                pan_pool_alloc_aligned(&batch->invisible_pool.base, size, 64).gpu;
+   unsigned size = stride * count;
+   mali_ptr ptr =
+      pan_pool_alloc_aligned(&batch->invisible_pool.base, size, 64).gpu;
 
-        pan_pack(slot, ATTRIBUTE_BUFFER, cfg) {
-                cfg.stride = stride;
-                cfg.size = size;
-                cfg.pointer = ptr;
-        }
+   pan_pack(slot, ATTRIBUTE_BUFFER, cfg) {
+      cfg.stride = stride;
+      cfg.size = size;
+      cfg.pointer = ptr;
+   }
 
-        return ptr;
+   return ptr;
 }
 
 /* Given a varying, figure out which index it corresponds to */
@@ -2230,7 +2197,7 @@ panfrost_emit_varyings(struct panfrost_batch *batch,
 static inline unsigned
 pan_varying_index(unsigned present, enum pan_special_varying v)
 {
-        return util_bitcount(present & BITFIELD_MASK(v));
+   return util_bitcount(present & BITFIELD_MASK(v));
 }
 
 /* Determines which varying buffers are required */
@@ -2238,59 +2205,58 @@ pan_varying_index(unsigned present, enum pan_special_varying v)
 static inline unsigned
 pan_varying_present(const struct panfrost_device *dev,
                     struct pan_shader_info *producer,
-                    struct pan_shader_info *consumer,
-                    uint16_t point_coord_mask)
+                    struct pan_shader_info *consumer, uint16_t point_coord_mask)
 {
-        /* At the moment we always emit general and position buffers. Not
-         * strictly necessary but usually harmless */
+   /* At the moment we always emit general and position buffers. Not
+    * strictly necessary but usually harmless */
 
-        unsigned present = BITFIELD_BIT(PAN_VARY_GENERAL) | BITFIELD_BIT(PAN_VARY_POSITION);
+   unsigned present =
+      BITFIELD_BIT(PAN_VARY_GENERAL) | BITFIELD_BIT(PAN_VARY_POSITION);
 
-        /* Enable special buffers by the shader info */
+   /* Enable special buffers by the shader info */
 
-        if (producer->vs.writes_point_size)
-                present |= BITFIELD_BIT(PAN_VARY_PSIZ);
+   if (producer->vs.writes_point_size)
+      present |= BITFIELD_BIT(PAN_VARY_PSIZ);
 
 #if PAN_ARCH <= 5
-        /* On Midgard, these exist as real varyings. Later architectures use
-         * LD_VAR_SPECIAL reads instead. */
+   /* On Midgard, these exist as real varyings. Later architectures use
+    * LD_VAR_SPECIAL reads instead. */
 
-        if (consumer->fs.reads_point_coord)
-                present |= BITFIELD_BIT(PAN_VARY_PNTCOORD);
+   if (consumer->fs.reads_point_coord)
+      present |= BITFIELD_BIT(PAN_VARY_PNTCOORD);
 
-        if (consumer->fs.reads_face)
-                present |= BITFIELD_BIT(PAN_VARY_FACE);
+   if (consumer->fs.reads_face)
+      present |= BITFIELD_BIT(PAN_VARY_FACE);
 
-        if (consumer->fs.reads_frag_coord)
-                present |= BITFIELD_BIT(PAN_VARY_FRAGCOORD);
+   if (consumer->fs.reads_frag_coord)
+      present |= BITFIELD_BIT(PAN_VARY_FRAGCOORD);
 
-        /* Also, if we have a point sprite, we need a point coord buffer */
+   /* Also, if we have a point sprite, we need a point coord buffer */
 
-        for (unsigned i = 0; i < consumer->varyings.input_count; i++)  {
-                gl_varying_slot loc = consumer->varyings.input[i].location;
+   for (unsigned i = 0; i < consumer->varyings.input_count; i++) {
+      gl_varying_slot loc = consumer->varyings.input[i].location;
 
-                if (util_varying_is_point_coord(loc, point_coord_mask))
-                        present |= BITFIELD_BIT(PAN_VARY_PNTCOORD);
-        }
+      if (util_varying_is_point_coord(loc, point_coord_mask))
+         present |= BITFIELD_BIT(PAN_VARY_PNTCOORD);
+   }
 #endif
 
-        return present;
+   return present;
 }
 
 /* Emitters for varying records */
 
 static void
 pan_emit_vary(const struct panfrost_device *dev,
-              struct mali_attribute_packed *out,
-              unsigned buffer_index,
+              struct mali_attribute_packed *out, unsigned buffer_index,
               mali_pixel_format format, unsigned offset)
 {
-        pan_pack(out, ATTRIBUTE, cfg) {
-                cfg.buffer_index = buffer_index;
-                cfg.offset_enable = (PAN_ARCH <= 5);
-                cfg.format = format;
-                cfg.offset = offset;
-        }
+   pan_pack(out, ATTRIBUTE, cfg) {
+      cfg.buffer_index = buffer_index;
+      cfg.offset_enable = (PAN_ARCH <= 5);
+      cfg.format = format;
+      cfg.offset = offset;
+   }
 }
 
 /* Special records */
@@ -2310,40 +2276,40 @@ static const struct {
 
 static mali_pixel_format
 pan_special_format(const struct panfrost_device *dev,
-                enum pan_special_varying buf)
+                   enum pan_special_varying buf)
 {
-        assert(buf < PAN_VARY_MAX);
-        mali_pixel_format format = (pan_varying_formats[buf].format << 12);
+   assert(buf < PAN_VARY_MAX);
+   mali_pixel_format format = (pan_varying_formats[buf].format << 12);
 
 #if PAN_ARCH <= 6
-        unsigned nr = pan_varying_formats[buf].components;
-        format |= panfrost_get_default_swizzle(nr);
+   unsigned nr = pan_varying_formats[buf].components;
+   format |= panfrost_get_default_swizzle(nr);
 #endif
 
-        return format;
+   return format;
 }
 
 static void
 pan_emit_vary_special(const struct panfrost_device *dev,
-                      struct mali_attribute_packed *out,
-                      unsigned present, enum pan_special_varying buf)
+                      struct mali_attribute_packed *out, unsigned present,
+                      enum pan_special_varying buf)
 {
-        pan_emit_vary(dev, out, pan_varying_index(present, buf),
-                        pan_special_format(dev, buf), 0);
+   pan_emit_vary(dev, out, pan_varying_index(present, buf),
+                 pan_special_format(dev, buf), 0);
 }
 
 /* Negative indicates a varying is not found */
 
 static signed
-pan_find_vary(const struct pan_shader_varying *vary,
-                unsigned vary_count, unsigned loc)
+pan_find_vary(const struct pan_shader_varying *vary, unsigned vary_count,
+              unsigned loc)
 {
-        for (unsigned i = 0; i < vary_count; ++i) {
-                if (vary[i].location == loc)
-                        return i;
-        }
+   for (unsigned i = 0; i < vary_count; ++i) {
+      if (vary[i].location == loc)
+         return i;
+   }
 
-        return -1;
+   return -1;
 }
 
 /* Assign varying locations for the general buffer. Returns the calculated
@@ -2353,33 +2319,31 @@ pan_find_vary(const struct pan_shader_varying *vary,
 static unsigned
 pan_assign_varyings(const struct panfrost_device *dev,
                     struct pan_shader_info *producer,
-                    struct pan_shader_info *consumer,
-                    signed *offsets)
+                    struct pan_shader_info *consumer, signed *offsets)
 {
-        unsigned producer_count = producer->varyings.output_count;
-        unsigned consumer_count = consumer->varyings.input_count;
+   unsigned producer_count = producer->varyings.output_count;
+   unsigned consumer_count = consumer->varyings.input_count;
 
-        const struct pan_shader_varying *producer_vars = producer->varyings.output;
-        const struct pan_shader_varying *consumer_vars = consumer->varyings.input;
+   const struct pan_shader_varying *producer_vars = producer->varyings.output;
+   const struct pan_shader_varying *consumer_vars = consumer->varyings.input;
 
-        unsigned stride = 0;
+   unsigned stride = 0;
 
-        for (unsigned i = 0; i < producer_count; ++i) {
-                signed loc = pan_find_vary(consumer_vars, consumer_count,
-                                producer_vars[i].location);
-                enum pipe_format format = loc >= 0 ?
-                                          consumer_vars[loc].format :
-                                          PIPE_FORMAT_NONE;
+   for (unsigned i = 0; i < producer_count; ++i) {
+      signed loc = pan_find_vary(consumer_vars, consumer_count,
+                                 producer_vars[i].location);
+      enum pipe_format format =
+         loc >= 0 ? consumer_vars[loc].format : PIPE_FORMAT_NONE;
 
-                if (format != PIPE_FORMAT_NONE) {
-                        offsets[i] = stride;
-                        stride += util_format_get_blocksize(format);
-                } else {
-                        offsets[i] = -1;
-                }
-        }
+      if (format != PIPE_FORMAT_NONE) {
+         offsets[i] = stride;
+         stride += util_format_get_blocksize(format);
+      } else {
+         offsets[i] = -1;
+      }
+   }
 
-        return stride;
+   return stride;
 }
 
 /* Emitter for a single varying (attribute) descriptor */
@@ -2388,225 +2352,208 @@ static void
 panfrost_emit_varying(const struct panfrost_device *dev,
                       struct mali_attribute_packed *out,
                       const struct pan_shader_varying varying,
-                      enum pipe_format pipe_format,
-                      unsigned present,
-                      uint16_t point_sprite_mask,
-                      signed offset,
+                      enum pipe_format pipe_format, unsigned present,
+                      uint16_t point_sprite_mask, signed offset,
                       enum pan_special_varying pos_varying)
 {
-        /* Note: varying.format != pipe_format in some obscure cases due to a
-         * limitation of the NIR linker. This should be fixed in the future to
-         * eliminate the additional lookups. See:
-         * dEQP-GLES3.functional.shaders.conditionals.if.sequence_statements_vertex
-         */
-        gl_varying_slot loc = varying.location;
-        mali_pixel_format format = dev->formats[pipe_format].hw;
+   /* Note: varying.format != pipe_format in some obscure cases due to a
+    * limitation of the NIR linker. This should be fixed in the future to
+    * eliminate the additional lookups. See:
+    * dEQP-GLES3.functional.shaders.conditionals.if.sequence_statements_vertex
+    */
+   gl_varying_slot loc = varying.location;
+   mali_pixel_format format = dev->formats[pipe_format].hw;
 
-        if (util_varying_is_point_coord(loc, point_sprite_mask)) {
-                pan_emit_vary_special(dev, out, present, PAN_VARY_PNTCOORD);
-        } else if (loc == VARYING_SLOT_POS) {
-                pan_emit_vary_special(dev, out, present, pos_varying);
-        } else if (loc == VARYING_SLOT_PSIZ) {
-                pan_emit_vary_special(dev, out, present, PAN_VARY_PSIZ);
-        } else if (loc == VARYING_SLOT_FACE) {
-                pan_emit_vary_special(dev, out, present, PAN_VARY_FACE);
-        } else if (offset < 0) {
-                pan_emit_vary(dev, out, 0, (MALI_CONSTANT << 12), 0);
-        } else {
-                STATIC_ASSERT(PAN_VARY_GENERAL == 0);
-                pan_emit_vary(dev, out, 0, format, offset);
-        }
+   if (util_varying_is_point_coord(loc, point_sprite_mask)) {
+      pan_emit_vary_special(dev, out, present, PAN_VARY_PNTCOORD);
+   } else if (loc == VARYING_SLOT_POS) {
+      pan_emit_vary_special(dev, out, present, pos_varying);
+   } else if (loc == VARYING_SLOT_PSIZ) {
+      pan_emit_vary_special(dev, out, present, PAN_VARY_PSIZ);
+   } else if (loc == VARYING_SLOT_FACE) {
+      pan_emit_vary_special(dev, out, present, PAN_VARY_FACE);
+   } else if (offset < 0) {
+      pan_emit_vary(dev, out, 0, (MALI_CONSTANT << 12), 0);
+   } else {
+      STATIC_ASSERT(PAN_VARY_GENERAL == 0);
+      pan_emit_vary(dev, out, 0, format, offset);
+   }
 }
 
 /* Links varyings and uploads ATTRIBUTE descriptors. Can execute at link time,
  * rather than draw time (under good conditions). */
 
 static void
-panfrost_emit_varying_descs(
-                struct panfrost_pool *pool,
-                struct panfrost_compiled_shader *producer,
-                struct panfrost_compiled_shader *consumer,
-                uint16_t point_coord_mask,
-                struct pan_linkage *out)
+panfrost_emit_varying_descs(struct panfrost_pool *pool,
+                            struct panfrost_compiled_shader *producer,
+                            struct panfrost_compiled_shader *consumer,
+                            uint16_t point_coord_mask, struct pan_linkage *out)
 {
-        struct panfrost_device *dev = pool->base.dev;
-        unsigned producer_count = producer->info.varyings.output_count;
-        unsigned consumer_count = consumer->info.varyings.input_count;
+   struct panfrost_device *dev = pool->base.dev;
+   unsigned producer_count = producer->info.varyings.output_count;
+   unsigned consumer_count = consumer->info.varyings.input_count;
 
-        /* Offsets within the general varying buffer, indexed by location */
-        signed offsets[PAN_MAX_VARYINGS];
-        assert(producer_count <= ARRAY_SIZE(offsets));
-        assert(consumer_count <= ARRAY_SIZE(offsets));
+   /* Offsets within the general varying buffer, indexed by location */
+   signed offsets[PAN_MAX_VARYINGS];
+   assert(producer_count <= ARRAY_SIZE(offsets));
+   assert(consumer_count <= ARRAY_SIZE(offsets));
 
-        /* Allocate enough descriptors for both shader stages */
-        struct panfrost_ptr T =
-                pan_pool_alloc_desc_array(&pool->base,
-                                          producer_count + consumer_count,
-                                          ATTRIBUTE);
+   /* Allocate enough descriptors for both shader stages */
+   struct panfrost_ptr T = pan_pool_alloc_desc_array(
+      &pool->base, producer_count + consumer_count, ATTRIBUTE);
 
-        /* Take a reference if we're being put on the CSO */
-        if (!pool->owned) {
-                out->bo = pool->transient_bo;
-                panfrost_bo_reference(out->bo);
-        }
+   /* Take a reference if we're being put on the CSO */
+   if (!pool->owned) {
+      out->bo = pool->transient_bo;
+      panfrost_bo_reference(out->bo);
+   }
 
-        struct mali_attribute_packed *descs = T.cpu;
-        out->producer = producer_count ? T.gpu : 0;
-        out->consumer = consumer_count ? T.gpu +
-                (pan_size(ATTRIBUTE) * producer_count) : 0;
+   struct mali_attribute_packed *descs = T.cpu;
+   out->producer = producer_count ? T.gpu : 0;
+   out->consumer =
+      consumer_count ? T.gpu + (pan_size(ATTRIBUTE) * producer_count) : 0;
 
-        /* Lay out the varyings. Must use producer to lay out, in order to
-         * respect transform feedback precisions. */
-        out->present = pan_varying_present(dev, &producer->info,
-                        &consumer->info, point_coord_mask);
+   /* Lay out the varyings. Must use producer to lay out, in order to
+    * respect transform feedback precisions. */
+   out->present = pan_varying_present(dev, &producer->info, &consumer->info,
+                                      point_coord_mask);
 
-        out->stride = pan_assign_varyings(dev, &producer->info,
-                        &consumer->info, offsets);
+   out->stride =
+      pan_assign_varyings(dev, &producer->info, &consumer->info, offsets);
 
-        for (unsigned i = 0; i < producer_count; ++i) {
-                signed j = pan_find_vary(consumer->info.varyings.input,
-                                consumer->info.varyings.input_count,
-                                producer->info.varyings.output[i].location);
+   for (unsigned i = 0; i < producer_count; ++i) {
+      signed j = pan_find_vary(consumer->info.varyings.input,
+                               consumer->info.varyings.input_count,
+                               producer->info.varyings.output[i].location);
 
-                enum pipe_format format = (j >= 0) ?
-                        consumer->info.varyings.input[j].format :
-                        producer->info.varyings.output[i].format;
+      enum pipe_format format = (j >= 0)
+                                   ? consumer->info.varyings.input[j].format
+                                   : producer->info.varyings.output[i].format;
 
-                panfrost_emit_varying(dev, descs + i,
-                                producer->info.varyings.output[i], format,
-                                out->present, 0, offsets[i], PAN_VARY_POSITION);
-        }
+      panfrost_emit_varying(dev, descs + i, producer->info.varyings.output[i],
+                            format, out->present, 0, offsets[i],
+                            PAN_VARY_POSITION);
+   }
 
-        for (unsigned i = 0; i < consumer_count; ++i) {
-                signed j = pan_find_vary(producer->info.varyings.output,
-                                producer->info.varyings.output_count,
-                                consumer->info.varyings.input[i].location);
+   for (unsigned i = 0; i < consumer_count; ++i) {
+      signed j = pan_find_vary(producer->info.varyings.output,
+                               producer->info.varyings.output_count,
+                               consumer->info.varyings.input[i].location);
 
-                signed offset = (j >= 0) ? offsets[j] : -1;
+      signed offset = (j >= 0) ? offsets[j] : -1;
 
-                panfrost_emit_varying(dev, descs + producer_count + i,
-                                consumer->info.varyings.input[i],
-                                consumer->info.varyings.input[i].format,
-                                out->present, point_coord_mask,
-                                offset, PAN_VARY_FRAGCOORD);
-        }
+      panfrost_emit_varying(
+         dev, descs + producer_count + i, consumer->info.varyings.input[i],
+         consumer->info.varyings.input[i].format, out->present,
+         point_coord_mask, offset, PAN_VARY_FRAGCOORD);
+   }
 }
 
 #if PAN_ARCH <= 5
 static void
 pan_emit_special_input(struct mali_attribute_buffer_packed *out,
-                unsigned present,
-                enum pan_special_varying v,
-                unsigned special)
+                       unsigned present, enum pan_special_varying v,
+                       unsigned special)
 {
-        if (present & BITFIELD_BIT(v)) {
-                unsigned idx = pan_varying_index(present, v);
+   if (present & BITFIELD_BIT(v)) {
+      unsigned idx = pan_varying_index(present, v);
 
-                pan_pack(out + idx, ATTRIBUTE_BUFFER, cfg) {
-                        cfg.special = special;
-                        cfg.type = 0;
-                }
-        }
+      pan_pack(out + idx, ATTRIBUTE_BUFFER, cfg) {
+         cfg.special = special;
+         cfg.type = 0;
+      }
+   }
 }
 #endif
 
 static void
 panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
-                                 unsigned vertex_count,
-                                 mali_ptr *vs_attribs,
-                                 mali_ptr *fs_attribs,
-                                 mali_ptr *buffers,
-                                 unsigned *buffer_count,
-                                 mali_ptr *position,
-                                 mali_ptr *psiz,
-                                 bool point_coord_replace)
+                                 unsigned vertex_count, mali_ptr *vs_attribs,
+                                 mali_ptr *fs_attribs, mali_ptr *buffers,
+                                 unsigned *buffer_count, mali_ptr *position,
+                                 mali_ptr *psiz, bool point_coord_replace)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        struct panfrost_compiled_shader *vs = ctx->prog[PIPE_SHADER_VERTEX];
-        struct panfrost_compiled_shader *fs = ctx->prog[PIPE_SHADER_FRAGMENT];
+   struct panfrost_context *ctx = batch->ctx;
+   struct panfrost_compiled_shader *vs = ctx->prog[PIPE_SHADER_VERTEX];
+   struct panfrost_compiled_shader *fs = ctx->prog[PIPE_SHADER_FRAGMENT];
 
-        uint16_t point_coord_mask = 0;
+   uint16_t point_coord_mask = 0;
 
 #if PAN_ARCH <= 5
-        struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
+   struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
 
-        /* Point sprites are lowered on Bifrost and newer */
-        if (point_coord_replace)
-                point_coord_mask = ctx->rasterizer->base.sprite_coord_enable;
+   /* Point sprites are lowered on Bifrost and newer */
+   if (point_coord_replace)
+      point_coord_mask = ctx->rasterizer->base.sprite_coord_enable;
 #endif
 
-        /* In good conditions, we only need to link varyings once */
-        bool prelink =
-                (point_coord_mask == 0) &&
-                !vs->info.separable &&
-                !fs->info.separable;
+   /* In good conditions, we only need to link varyings once */
+   bool prelink =
+      (point_coord_mask == 0) && !vs->info.separable && !fs->info.separable;
 
-        /* Try to reduce copies */
-        struct pan_linkage _linkage;
-        struct pan_linkage *linkage = prelink ? &vs->linkage : &_linkage;
+   /* Try to reduce copies */
+   struct pan_linkage _linkage;
+   struct pan_linkage *linkage = prelink ? &vs->linkage : &_linkage;
 
-        /* Emit ATTRIBUTE descriptors if needed */
-        if (!prelink || vs->linkage.bo == NULL) {
-                struct panfrost_pool *pool =
-                        prelink ? &ctx->descs : &batch->pool;
+   /* Emit ATTRIBUTE descriptors if needed */
+   if (!prelink || vs->linkage.bo == NULL) {
+      struct panfrost_pool *pool = prelink ? &ctx->descs : &batch->pool;
 
-                panfrost_emit_varying_descs(pool, vs, fs, point_coord_mask, linkage);
-        }
+      panfrost_emit_varying_descs(pool, vs, fs, point_coord_mask, linkage);
+   }
 
-        unsigned present = linkage->present, stride = linkage->stride;
-        unsigned count = util_bitcount(present);
-        struct panfrost_ptr T =
-                pan_pool_alloc_desc_array(&batch->pool.base,
-                                          count + 1,
-                                          ATTRIBUTE_BUFFER);
-        struct mali_attribute_buffer_packed *varyings =
-                (struct mali_attribute_buffer_packed *) T.cpu;
+   unsigned present = linkage->present, stride = linkage->stride;
+   unsigned count = util_bitcount(present);
+   struct panfrost_ptr T =
+      pan_pool_alloc_desc_array(&batch->pool.base, count + 1, ATTRIBUTE_BUFFER);
+   struct mali_attribute_buffer_packed *varyings =
+      (struct mali_attribute_buffer_packed *)T.cpu;
 
-        if (buffer_count)
-                *buffer_count = count;
+   if (buffer_count)
+      *buffer_count = count;
 
 #if PAN_ARCH >= 6
-        /* Suppress prefetch on Bifrost */
-        memset(varyings + count, 0, sizeof(*varyings));
+   /* Suppress prefetch on Bifrost */
+   memset(varyings + count, 0, sizeof(*varyings));
 #endif
 
-        if (stride) {
-                panfrost_emit_varyings(batch,
-                                &varyings[pan_varying_index(present, PAN_VARY_GENERAL)],
-                                stride, vertex_count);
-        } else {
-                /* The indirect draw code reads the stride field, make sure
-                 * that it is initialised */
-                memset(varyings + pan_varying_index(present, PAN_VARY_GENERAL), 0,
-                       sizeof(*varyings));
-        }
+   if (stride) {
+      panfrost_emit_varyings(
+         batch, &varyings[pan_varying_index(present, PAN_VARY_GENERAL)], stride,
+         vertex_count);
+   } else {
+      /* The indirect draw code reads the stride field, make sure
+       * that it is initialised */
+      memset(varyings + pan_varying_index(present, PAN_VARY_GENERAL), 0,
+             sizeof(*varyings));
+   }
 
-        /* fp32 vec4 gl_Position */
-        *position = panfrost_emit_varyings(batch,
-                        &varyings[pan_varying_index(present, PAN_VARY_POSITION)],
-                        sizeof(float) * 4, vertex_count);
+   /* fp32 vec4 gl_Position */
+   *position = panfrost_emit_varyings(
+      batch, &varyings[pan_varying_index(present, PAN_VARY_POSITION)],
+      sizeof(float) * 4, vertex_count);
 
-        if (present & BITFIELD_BIT(PAN_VARY_PSIZ)) {
-                *psiz = panfrost_emit_varyings(batch,
-                                &varyings[pan_varying_index(present, PAN_VARY_PSIZ)],
-                                2, vertex_count);
-        }
+   if (present & BITFIELD_BIT(PAN_VARY_PSIZ)) {
+      *psiz = panfrost_emit_varyings(
+         batch, &varyings[pan_varying_index(present, PAN_VARY_PSIZ)], 2,
+         vertex_count);
+   }
 
 #if PAN_ARCH <= 5
-        pan_emit_special_input(varyings, present,
-                        PAN_VARY_PNTCOORD,
-                        (rast->sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT) ?
-                        MALI_ATTRIBUTE_SPECIAL_POINT_COORD_MAX_Y :
-                        MALI_ATTRIBUTE_SPECIAL_POINT_COORD_MIN_Y);
-        pan_emit_special_input(varyings, present, PAN_VARY_FACE,
-                        MALI_ATTRIBUTE_SPECIAL_FRONT_FACING);
-        pan_emit_special_input(varyings, present, PAN_VARY_FRAGCOORD,
-                        MALI_ATTRIBUTE_SPECIAL_FRAG_COORD);
+   pan_emit_special_input(
+      varyings, present, PAN_VARY_PNTCOORD,
+      (rast->sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
+         ? MALI_ATTRIBUTE_SPECIAL_POINT_COORD_MAX_Y
+         : MALI_ATTRIBUTE_SPECIAL_POINT_COORD_MIN_Y);
+   pan_emit_special_input(varyings, present, PAN_VARY_FACE,
+                          MALI_ATTRIBUTE_SPECIAL_FRONT_FACING);
+   pan_emit_special_input(varyings, present, PAN_VARY_FRAGCOORD,
+                          MALI_ATTRIBUTE_SPECIAL_FRAG_COORD);
 #endif
 
-        *buffers = T.gpu;
-        *vs_attribs = linkage->producer;
-        *fs_attribs = linkage->consumer;
+   *buffers = T.gpu;
+   *vs_attribs = linkage->producer;
+   *fs_attribs = linkage->consumer;
 }
 
 /*
@@ -2619,64 +2566,60 @@ panfrost_emit_vertex_tiler_jobs(struct panfrost_batch *batch,
                                 const struct panfrost_ptr *vertex_job,
                                 const struct panfrost_ptr *tiler_job)
 {
-        unsigned vertex = panfrost_add_job(&batch->pool.base, &batch->scoreboard,
-                                           MALI_JOB_TYPE_VERTEX, false, false,
-                                           0, 0, vertex_job, false);
+   unsigned vertex = panfrost_add_job(&batch->pool.base, &batch->scoreboard,
+                                      MALI_JOB_TYPE_VERTEX, false, false, 0, 0,
+                                      vertex_job, false);
 
-        panfrost_add_job(&batch->pool.base, &batch->scoreboard,
-                         MALI_JOB_TYPE_TILER, false, false,
-                         vertex, 0, tiler_job, false);
+   panfrost_add_job(&batch->pool.base, &batch->scoreboard, MALI_JOB_TYPE_TILER,
+                    false, false, vertex, 0, tiler_job, false);
 }
 #endif
 
 static void
 emit_tls(struct panfrost_batch *batch)
 {
-        struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
+   struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
 
-        /* Emitted with the FB descriptor on Midgard. */
-        if (PAN_ARCH <= 5 && batch->framebuffer.gpu)
-                return;
+   /* Emitted with the FB descriptor on Midgard. */
+   if (PAN_ARCH <= 5 && batch->framebuffer.gpu)
+      return;
 
-        struct panfrost_bo *tls_bo =
-                batch->stack_size ?
-                panfrost_batch_get_scratchpad(batch,
-                                              batch->stack_size,
-                                              dev->thread_tls_alloc,
-                                              dev->core_id_range):
-                NULL;
-        struct pan_tls_info tls = {
-                .tls = {
-                        .ptr = tls_bo ? tls_bo->ptr.gpu : 0,
-                        .size = batch->stack_size,
-                },
-        };
+   struct panfrost_bo *tls_bo =
+      batch->stack_size ? panfrost_batch_get_scratchpad(
+                             batch, batch->stack_size, dev->thread_tls_alloc,
+                             dev->core_id_range)
+                        : NULL;
+   struct pan_tls_info tls = {
+      .tls =
+         {
+            .ptr = tls_bo ? tls_bo->ptr.gpu : 0,
+            .size = batch->stack_size,
+         },
+   };
 
-        assert(batch->tls.cpu);
-        GENX(pan_emit_tls)(&tls, batch->tls.cpu);
+   assert(batch->tls.cpu);
+   GENX(pan_emit_tls)(&tls, batch->tls.cpu);
 }
 
 static void
 emit_fbd(struct panfrost_batch *batch, const struct pan_fb_info *fb)
 {
-        struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
-        struct panfrost_bo *tls_bo =
-                batch->stack_size ?
-                panfrost_batch_get_scratchpad(batch,
-                                              batch->stack_size,
-                                              dev->thread_tls_alloc,
-                                              dev->core_id_range):
-                NULL;
-        struct pan_tls_info tls = {
-                .tls = {
-                        .ptr = tls_bo ? tls_bo->ptr.gpu : 0,
-                        .size = batch->stack_size,
-                },
-        };
+   struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
+   struct panfrost_bo *tls_bo =
+      batch->stack_size ? panfrost_batch_get_scratchpad(
+                             batch, batch->stack_size, dev->thread_tls_alloc,
+                             dev->core_id_range)
+                        : NULL;
+   struct pan_tls_info tls = {
+      .tls =
+         {
+            .ptr = tls_bo ? tls_bo->ptr.gpu : 0,
+            .size = batch->stack_size,
+         },
+   };
 
-        batch->framebuffer.gpu |=
-                GENX(pan_emit_fbd)(dev, fb, &tls, &batch->tiler_ctx,
-                                   batch->framebuffer.cpu);
+   batch->framebuffer.gpu |= GENX(pan_emit_fbd)(
+      dev, fb, &tls, &batch->tiler_ctx, batch->framebuffer.cpu);
 }
 
 /* Mark a surface as written */
@@ -2685,10 +2628,10 @@ static void
 panfrost_initialize_surface(struct panfrost_batch *batch,
                             struct pipe_surface *surf)
 {
-        if (surf) {
-                struct panfrost_resource *rsrc = pan_resource(surf->texture);
-                BITSET_SET(rsrc->valid.data, surf->u.tex.level);
-        }
+   if (surf) {
+      struct panfrost_resource *rsrc = pan_resource(surf->texture);
+      BITSET_SET(rsrc->valid.data, surf->u.tex.level);
+   }
 }
 
 /* Generate a fragment job. This should be called once per frame. (Usually,
@@ -2697,67 +2640,68 @@ panfrost_initialize_surface(struct panfrost_batch *batch,
 static mali_ptr
 emit_fragment_job(struct panfrost_batch *batch, const struct pan_fb_info *pfb)
 {
-        /* Mark the affected buffers as initialized, since we're writing to it.
-         * Also, add the surfaces we're writing to to the batch */
+   /* Mark the affected buffers as initialized, since we're writing to it.
+    * Also, add the surfaces we're writing to to the batch */
 
-        struct pipe_framebuffer_state *fb = &batch->key;
+   struct pipe_framebuffer_state *fb = &batch->key;
 
-        for (unsigned i = 0; i < fb->nr_cbufs; ++i)
-                panfrost_initialize_surface(batch, fb->cbufs[i]);
+   for (unsigned i = 0; i < fb->nr_cbufs; ++i)
+      panfrost_initialize_surface(batch, fb->cbufs[i]);
 
-        panfrost_initialize_surface(batch, fb->zsbuf);
+   panfrost_initialize_surface(batch, fb->zsbuf);
 
-        /* The passed tile coords can be out of range in some cases, so we need
-         * to clamp them to the framebuffer size to avoid a TILE_RANGE_FAULT.
-         * Theoretically we also need to clamp the coordinates positive, but we
-         * avoid that edge case as all four values are unsigned. Also,
-         * theoretically we could clamp the minima, but if that has to happen
-         * the asserts would fail anyway (since the maxima would get clamped
-         * and then be smaller than the minima). An edge case of sorts occurs
-         * when no scissors are added to draw, so by default min=~0 and max=0.
-         * But that can't happen if any actual drawing occurs (beyond a
-         * wallpaper reload), so this is again irrelevant in practice. */
+   /* The passed tile coords can be out of range in some cases, so we need
+    * to clamp them to the framebuffer size to avoid a TILE_RANGE_FAULT.
+    * Theoretically we also need to clamp the coordinates positive, but we
+    * avoid that edge case as all four values are unsigned. Also,
+    * theoretically we could clamp the minima, but if that has to happen
+    * the asserts would fail anyway (since the maxima would get clamped
+    * and then be smaller than the minima). An edge case of sorts occurs
+    * when no scissors are added to draw, so by default min=~0 and max=0.
+    * But that can't happen if any actual drawing occurs (beyond a
+    * wallpaper reload), so this is again irrelevant in practice. */
 
-        batch->maxx = MIN2(batch->maxx, fb->width);
-        batch->maxy = MIN2(batch->maxy, fb->height);
+   batch->maxx = MIN2(batch->maxx, fb->width);
+   batch->maxy = MIN2(batch->maxy, fb->height);
 
-        /* Rendering region must be at least 1x1; otherwise, there is nothing
-         * to do and the whole job chain should have been discarded. */
+   /* Rendering region must be at least 1x1; otherwise, there is nothing
+    * to do and the whole job chain should have been discarded. */
 
-        assert(batch->maxx > batch->minx);
-        assert(batch->maxy > batch->miny);
+   assert(batch->maxx > batch->minx);
+   assert(batch->maxy > batch->miny);
 
-        struct panfrost_ptr transfer =
-                pan_pool_alloc_desc(&batch->pool.base, FRAGMENT_JOB);
+   struct panfrost_ptr transfer =
+      pan_pool_alloc_desc(&batch->pool.base, FRAGMENT_JOB);
 
-        GENX(pan_emit_fragment_job)(pfb, batch->framebuffer.gpu,
-                                    transfer.cpu);
+   GENX(pan_emit_fragment_job)(pfb, batch->framebuffer.gpu, transfer.cpu);
 
-        return transfer.gpu;
+   return transfer.gpu;
 }
 
-#define DEFINE_CASE(c) case PIPE_PRIM_##c: return MALI_DRAW_MODE_##c;
+#define DEFINE_CASE(c)                                                         \
+   case PIPE_PRIM_##c:                                                         \
+      return MALI_DRAW_MODE_##c;
 
 static uint8_t
 pan_draw_mode(enum pipe_prim_type mode)
 {
-        switch (mode) {
-                DEFINE_CASE(POINTS);
-                DEFINE_CASE(LINES);
-                DEFINE_CASE(LINE_LOOP);
-                DEFINE_CASE(LINE_STRIP);
-                DEFINE_CASE(TRIANGLES);
-                DEFINE_CASE(TRIANGLE_STRIP);
-                DEFINE_CASE(TRIANGLE_FAN);
-                DEFINE_CASE(QUADS);
-                DEFINE_CASE(POLYGON);
+   switch (mode) {
+      DEFINE_CASE(POINTS);
+      DEFINE_CASE(LINES);
+      DEFINE_CASE(LINE_LOOP);
+      DEFINE_CASE(LINE_STRIP);
+      DEFINE_CASE(TRIANGLES);
+      DEFINE_CASE(TRIANGLE_STRIP);
+      DEFINE_CASE(TRIANGLE_FAN);
+      DEFINE_CASE(QUADS);
+      DEFINE_CASE(POLYGON);
 #if PAN_ARCH <= 6
-                DEFINE_CASE(QUAD_STRIP);
+      DEFINE_CASE(QUAD_STRIP);
 #endif
 
-        default:
-                unreachable("Invalid draw mode");
-        }
+   default:
+      unreachable("Invalid draw mode");
+   }
 }
 
 #undef DEFINE_CASE
@@ -2766,61 +2710,60 @@ pan_draw_mode(enum pipe_prim_type mode)
  * transform feedback */
 
 static void
-panfrost_statistics_record(
-                struct panfrost_context *ctx,
-                const struct pipe_draw_info *info,
-                const struct pipe_draw_start_count_bias *draw)
+panfrost_statistics_record(struct panfrost_context *ctx,
+                           const struct pipe_draw_info *info,
+                           const struct pipe_draw_start_count_bias *draw)
 {
-        if (!ctx->active_queries)
-                return;
+   if (!ctx->active_queries)
+      return;
 
-        uint32_t prims = u_prims_for_vertices(info->mode, draw->count);
-        ctx->prims_generated += prims;
+   uint32_t prims = u_prims_for_vertices(info->mode, draw->count);
+   ctx->prims_generated += prims;
 
-        if (!ctx->streamout.num_targets)
-                return;
+   if (!ctx->streamout.num_targets)
+      return;
 
-        ctx->tf_prims_generated += prims;
-        ctx->dirty |= PAN_DIRTY_SO;
+   ctx->tf_prims_generated += prims;
+   ctx->dirty |= PAN_DIRTY_SO;
 }
 
 static void
 panfrost_update_streamout_offsets(struct panfrost_context *ctx)
 {
-        unsigned count = u_stream_outputs_for_vertices(ctx->active_prim,
-                                                       ctx->vertex_count);
+   unsigned count =
+      u_stream_outputs_for_vertices(ctx->active_prim, ctx->vertex_count);
 
-        for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
-                if (!ctx->streamout.targets[i])
-                        continue;
+   for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
+      if (!ctx->streamout.targets[i])
+         continue;
 
-                pan_so_target(ctx->streamout.targets[i])->offset += count;
-        }
+      pan_so_target(ctx->streamout.targets[i])->offset += count;
+   }
 }
 
 static inline enum mali_index_type
 panfrost_translate_index_size(unsigned size)
 {
-        STATIC_ASSERT(MALI_INDEX_TYPE_NONE  == 0);
-        STATIC_ASSERT(MALI_INDEX_TYPE_UINT8  == 1);
-        STATIC_ASSERT(MALI_INDEX_TYPE_UINT16 == 2);
+   STATIC_ASSERT(MALI_INDEX_TYPE_NONE == 0);
+   STATIC_ASSERT(MALI_INDEX_TYPE_UINT8 == 1);
+   STATIC_ASSERT(MALI_INDEX_TYPE_UINT16 == 2);
 
-        return (size == 4) ? MALI_INDEX_TYPE_UINT32 : size;
+   return (size == 4) ? MALI_INDEX_TYPE_UINT32 : size;
 }
 
 #if PAN_ARCH <= 7
 static inline void
-pan_emit_draw_descs(struct panfrost_batch *batch,
-                struct MALI_DRAW *d, enum pipe_shader_type st)
+pan_emit_draw_descs(struct panfrost_batch *batch, struct MALI_DRAW *d,
+                    enum pipe_shader_type st)
 {
-        d->offset_start = batch->ctx->offset_start;
-        d->instance_size = batch->ctx->instance_count > 1 ?
-                           batch->ctx->padded_count : 1;
+   d->offset_start = batch->ctx->offset_start;
+   d->instance_size =
+      batch->ctx->instance_count > 1 ? batch->ctx->padded_count : 1;
 
-        d->uniform_buffers = batch->uniform_buffers[st];
-        d->push_uniforms = batch->push_uniforms[st];
-        d->textures = batch->textures[st];
-        d->samplers = batch->samplers[st];
+   d->uniform_buffers = batch->uniform_buffers[st];
+   d->push_uniforms = batch->push_uniforms[st];
+   d->textures = batch->textures[st];
+   d->samplers = batch->samplers[st];
 }
 
 static void
@@ -2829,64 +2772,59 @@ panfrost_draw_emit_vertex_section(struct panfrost_batch *batch,
                                   mali_ptr attribs, mali_ptr attrib_bufs,
                                   void *section)
 {
-        pan_pack(section, DRAW, cfg) {
-                cfg.state = batch->rsd[PIPE_SHADER_VERTEX];
-                cfg.attributes = attribs;
-                cfg.attribute_buffers = attrib_bufs;
-                cfg.varyings = vs_vary;
-                cfg.varying_buffers = vs_vary ? varyings : 0;
-                cfg.thread_storage = batch->tls.gpu;
-                pan_emit_draw_descs(batch, &cfg, PIPE_SHADER_VERTEX);
-        }
+   pan_pack(section, DRAW, cfg) {
+      cfg.state = batch->rsd[PIPE_SHADER_VERTEX];
+      cfg.attributes = attribs;
+      cfg.attribute_buffers = attrib_bufs;
+      cfg.varyings = vs_vary;
+      cfg.varying_buffers = vs_vary ? varyings : 0;
+      cfg.thread_storage = batch->tls.gpu;
+      pan_emit_draw_descs(batch, &cfg, PIPE_SHADER_VERTEX);
+   }
 }
 
 static void
 panfrost_draw_emit_vertex(struct panfrost_batch *batch,
                           const struct pipe_draw_info *info,
-                          void *invocation_template,
-                          mali_ptr vs_vary, mali_ptr varyings,
-                          mali_ptr attribs, mali_ptr attrib_bufs,
-                          void *job)
+                          void *invocation_template, mali_ptr vs_vary,
+                          mali_ptr varyings, mali_ptr attribs,
+                          mali_ptr attrib_bufs, void *job)
 {
-        void *section =
-                pan_section_ptr(job, COMPUTE_JOB, INVOCATION);
-        memcpy(section, invocation_template, pan_size(INVOCATION));
+   void *section = pan_section_ptr(job, COMPUTE_JOB, INVOCATION);
+   memcpy(section, invocation_template, pan_size(INVOCATION));
 
-        pan_section_pack(job, COMPUTE_JOB, PARAMETERS, cfg) {
-                cfg.job_task_split = 5;
-        }
+   pan_section_pack(job, COMPUTE_JOB, PARAMETERS, cfg) {
+      cfg.job_task_split = 5;
+   }
 
-        section = pan_section_ptr(job, COMPUTE_JOB, DRAW);
-        panfrost_draw_emit_vertex_section(batch, vs_vary, varyings,
-                                          attribs, attrib_bufs, section);
+   section = pan_section_ptr(job, COMPUTE_JOB, DRAW);
+   panfrost_draw_emit_vertex_section(batch, vs_vary, varyings, attribs,
+                                     attrib_bufs, section);
 }
 #endif
 
 static void
-panfrost_emit_primitive_size(struct panfrost_context *ctx,
-                             bool points, mali_ptr size_array,
-                             void *prim_size)
+panfrost_emit_primitive_size(struct panfrost_context *ctx, bool points,
+                             mali_ptr size_array, void *prim_size)
 {
-        struct panfrost_rasterizer *rast = ctx->rasterizer;
+   struct panfrost_rasterizer *rast = ctx->rasterizer;
 
-        pan_pack(prim_size, PRIMITIVE_SIZE, cfg) {
-                if (panfrost_writes_point_size(ctx)) {
-                        cfg.size_array = size_array;
-                } else {
-                        cfg.constant = points ?
-                                       rast->base.point_size :
-                                       rast->base.line_width;
-                }
-        }
+   pan_pack(prim_size, PRIMITIVE_SIZE, cfg) {
+      if (panfrost_writes_point_size(ctx)) {
+         cfg.size_array = size_array;
+      } else {
+         cfg.constant = points ? rast->base.point_size : rast->base.line_width;
+      }
+   }
 }
 
 static bool
 panfrost_is_implicit_prim_restart(const struct pipe_draw_info *info)
 {
-       /* As a reminder primitive_restart should always be checked before any
-          access to restart_index. */
-        return info->primitive_restart &&
-                info->restart_index == (unsigned)BITFIELD_MASK(info->index_size * 8);
+   /* As a reminder primitive_restart should always be checked before any
+      access to restart_index. */
+   return info->primitive_restart &&
+          info->restart_index == (unsigned)BITFIELD_MASK(info->index_size * 8);
 }
 
 /* On Bifrost and older, the Renderer State Descriptor aggregates many pieces of
@@ -2900,128 +2838,125 @@ panfrost_is_implicit_prim_restart(const struct pipe_draw_info *info)
  * specified in the draw call descriptor, but must be considered when determing
  * early-Z state which is part of the RSD.
  */
-#define FRAGMENT_RSD_DIRTY_MASK ( \
-        PAN_DIRTY_ZS | PAN_DIRTY_BLEND | PAN_DIRTY_MSAA | \
-        PAN_DIRTY_RASTERIZER | PAN_DIRTY_OQ)
+#define FRAGMENT_RSD_DIRTY_MASK                                                \
+   (PAN_DIRTY_ZS | PAN_DIRTY_BLEND | PAN_DIRTY_MSAA | PAN_DIRTY_RASTERIZER |   \
+    PAN_DIRTY_OQ)
 
 static inline void
 panfrost_update_shader_state(struct panfrost_batch *batch,
                              enum pipe_shader_type st)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        struct panfrost_compiled_shader *ss = ctx->prog[st];
+   struct panfrost_context *ctx = batch->ctx;
+   struct panfrost_compiled_shader *ss = ctx->prog[st];
 
-        bool frag = (st == PIPE_SHADER_FRAGMENT);
-        unsigned dirty_3d = ctx->dirty;
-        unsigned dirty = ctx->dirty_shader[st];
+   bool frag = (st == PIPE_SHADER_FRAGMENT);
+   unsigned dirty_3d = ctx->dirty;
+   unsigned dirty = ctx->dirty_shader[st];
 
-        if (dirty & PAN_DIRTY_STAGE_TEXTURE) {
-                batch->textures[st] =
-                        panfrost_emit_texture_descriptors(batch, st);
-        }
+   if (dirty & PAN_DIRTY_STAGE_TEXTURE) {
+      batch->textures[st] = panfrost_emit_texture_descriptors(batch, st);
+   }
 
-        if (dirty & PAN_DIRTY_STAGE_SAMPLER) {
-                batch->samplers[st] =
-                        panfrost_emit_sampler_descriptors(batch, st);
-        }
+   if (dirty & PAN_DIRTY_STAGE_SAMPLER) {
+      batch->samplers[st] = panfrost_emit_sampler_descriptors(batch, st);
+   }
 
-        /* On Bifrost and older, the fragment shader descriptor is fused
-         * together with the renderer state; the combined renderer state
-         * descriptor is emitted below. Otherwise, the shader descriptor is
-         * standalone and is emitted here.
-         */
-        if ((dirty & PAN_DIRTY_STAGE_SHADER) && !((PAN_ARCH <= 7) && frag)) {
-                batch->rsd[st] = panfrost_emit_compute_shader_meta(batch, st);
-        }
+   /* On Bifrost and older, the fragment shader descriptor is fused
+    * together with the renderer state; the combined renderer state
+    * descriptor is emitted below. Otherwise, the shader descriptor is
+    * standalone and is emitted here.
+    */
+   if ((dirty & PAN_DIRTY_STAGE_SHADER) && !((PAN_ARCH <= 7) && frag)) {
+      batch->rsd[st] = panfrost_emit_compute_shader_meta(batch, st);
+   }
 
 #if PAN_ARCH >= 9
-        if (dirty & PAN_DIRTY_STAGE_IMAGE)
-                batch->images[st] = panfrost_emit_images(batch, st);
+   if (dirty & PAN_DIRTY_STAGE_IMAGE)
+      batch->images[st] = panfrost_emit_images(batch, st);
 #endif
 
-        if ((dirty & ss->dirty_shader) || (dirty_3d & ss->dirty_3d)) {
-                batch->uniform_buffers[st] = panfrost_emit_const_buf(batch, st,
-                                NULL, &batch->push_uniforms[st], NULL);
-        }
+   if ((dirty & ss->dirty_shader) || (dirty_3d & ss->dirty_3d)) {
+      batch->uniform_buffers[st] = panfrost_emit_const_buf(
+         batch, st, NULL, &batch->push_uniforms[st], NULL);
+   }
 
 #if PAN_ARCH <= 7
-        /* On Bifrost and older, if the fragment shader changes OR any renderer
-         * state specified with the fragment shader, the whole renderer state
-         * descriptor is dirtied and must be reemited.
-         */
-        if (frag && ((dirty & PAN_DIRTY_STAGE_SHADER) ||
-                     (dirty_3d & FRAGMENT_RSD_DIRTY_MASK))) {
+   /* On Bifrost and older, if the fragment shader changes OR any renderer
+    * state specified with the fragment shader, the whole renderer state
+    * descriptor is dirtied and must be reemited.
+    */
+   if (frag && ((dirty & PAN_DIRTY_STAGE_SHADER) ||
+                (dirty_3d & FRAGMENT_RSD_DIRTY_MASK))) {
 
-                batch->rsd[st] = panfrost_emit_frag_shader_meta(batch);
-        }
+      batch->rsd[st] = panfrost_emit_frag_shader_meta(batch);
+   }
 
-        if (frag && (dirty & PAN_DIRTY_STAGE_IMAGE)) {
-                batch->attribs[st] = panfrost_emit_image_attribs(batch,
-                                &batch->attrib_bufs[st], st);
-        }
+   if (frag && (dirty & PAN_DIRTY_STAGE_IMAGE)) {
+      batch->attribs[st] =
+         panfrost_emit_image_attribs(batch, &batch->attrib_bufs[st], st);
+   }
 #endif
 }
 
 static inline void
 panfrost_update_state_3d(struct panfrost_batch *batch)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        unsigned dirty = ctx->dirty;
+   struct panfrost_context *ctx = batch->ctx;
+   unsigned dirty = ctx->dirty;
 
-        if (dirty & PAN_DIRTY_TLS_SIZE)
-                panfrost_batch_adjust_stack_size(batch);
+   if (dirty & PAN_DIRTY_TLS_SIZE)
+      panfrost_batch_adjust_stack_size(batch);
 
-        if (dirty & PAN_DIRTY_BLEND)
-                panfrost_set_batch_masks_blend(batch);
+   if (dirty & PAN_DIRTY_BLEND)
+      panfrost_set_batch_masks_blend(batch);
 
-        if (dirty & PAN_DIRTY_ZS)
-                panfrost_set_batch_masks_zs(batch);
+   if (dirty & PAN_DIRTY_ZS)
+      panfrost_set_batch_masks_zs(batch);
 
 #if PAN_ARCH >= 9
-        if ((dirty & (PAN_DIRTY_ZS | PAN_DIRTY_RASTERIZER)) ||
-            (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & PAN_DIRTY_STAGE_SHADER))
-                batch->depth_stencil = panfrost_emit_depth_stencil(batch);
+   if ((dirty & (PAN_DIRTY_ZS | PAN_DIRTY_RASTERIZER)) ||
+       (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & PAN_DIRTY_STAGE_SHADER))
+      batch->depth_stencil = panfrost_emit_depth_stencil(batch);
 
-        if (dirty & PAN_DIRTY_BLEND)
-                batch->blend = panfrost_emit_blend_valhall(batch);
+   if (dirty & PAN_DIRTY_BLEND)
+      batch->blend = panfrost_emit_blend_valhall(batch);
 
-        if (dirty & PAN_DIRTY_VERTEX) {
-                batch->attribs[PIPE_SHADER_VERTEX] =
-                        panfrost_emit_vertex_data(batch);
+   if (dirty & PAN_DIRTY_VERTEX) {
+      batch->attribs[PIPE_SHADER_VERTEX] = panfrost_emit_vertex_data(batch);
 
-                batch->attrib_bufs[PIPE_SHADER_VERTEX] =
-                        panfrost_emit_vertex_buffers(batch);
-        }
+      batch->attrib_bufs[PIPE_SHADER_VERTEX] =
+         panfrost_emit_vertex_buffers(batch);
+   }
 #endif
 }
 
 #if PAN_ARCH >= 6
 static mali_ptr
-panfrost_batch_get_bifrost_tiler(struct panfrost_batch *batch, unsigned vertex_count)
+panfrost_batch_get_bifrost_tiler(struct panfrost_batch *batch,
+                                 unsigned vertex_count)
 {
-        struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
+   struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
 
-        if (!vertex_count)
-                return 0;
+   if (!vertex_count)
+      return 0;
 
-        if (batch->tiler_ctx.bifrost)
-                return batch->tiler_ctx.bifrost;
+   if (batch->tiler_ctx.bifrost)
+      return batch->tiler_ctx.bifrost;
 
-        struct panfrost_ptr t =
-                pan_pool_alloc_desc(&batch->pool.base, TILER_HEAP);
+   struct panfrost_ptr t = pan_pool_alloc_desc(&batch->pool.base, TILER_HEAP);
 
-        GENX(pan_emit_tiler_heap)(dev, t.cpu);
+   GENX(pan_emit_tiler_heap)(dev, t.cpu);
 
-        mali_ptr heap = t.gpu;
+   mali_ptr heap = t.gpu;
 
-        t = pan_pool_alloc_desc(&batch->pool.base, TILER_CONTEXT);
-        GENX(pan_emit_tiler_ctx)(dev, batch->key.width, batch->key.height,
-                                 util_framebuffer_get_num_samples(&batch->key),
-                                 pan_tristate_get(batch->first_provoking_vertex),
-                                 heap, t.cpu);
+   t = pan_pool_alloc_desc(&batch->pool.base, TILER_CONTEXT);
+   GENX(pan_emit_tiler_ctx)
+   (dev, batch->key.width, batch->key.height,
+    util_framebuffer_get_num_samples(&batch->key),
+    pan_tristate_get(batch->first_provoking_vertex), heap, t.cpu);
 
-        batch->tiler_ctx.bifrost = t.gpu;
-        return batch->tiler_ctx.bifrost;
+   batch->tiler_ctx.bifrost = t.gpu;
+   return batch->tiler_ctx.bifrost;
 }
 #endif
 
@@ -3034,318 +2969,308 @@ panfrost_emit_primitive(struct panfrost_context *ctx,
                         const struct pipe_draw_start_count_bias *draw,
                         mali_ptr indices, bool secondary_shader, void *out)
 {
-        UNUSED struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
+   UNUSED struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
 
-        bool lines = (info->mode == PIPE_PRIM_LINES ||
-                      info->mode == PIPE_PRIM_LINE_LOOP ||
-                      info->mode == PIPE_PRIM_LINE_STRIP);
+   bool lines =
+      (info->mode == PIPE_PRIM_LINES || info->mode == PIPE_PRIM_LINE_LOOP ||
+       info->mode == PIPE_PRIM_LINE_STRIP);
 
-        pan_pack(out, PRIMITIVE, cfg) {
-                cfg.draw_mode = pan_draw_mode(info->mode);
-                if (panfrost_writes_point_size(ctx))
-                        cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16;
+   pan_pack(out, PRIMITIVE, cfg) {
+      cfg.draw_mode = pan_draw_mode(info->mode);
+      if (panfrost_writes_point_size(ctx))
+         cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16;
 
 #if PAN_ARCH <= 8
-                /* For line primitives, PRIMITIVE.first_provoking_vertex must
-                 * be set to true and the provoking vertex is selected with
-                 * DRAW.flat_shading_vertex.
-                 */
-                if (lines)
-                        cfg.first_provoking_vertex = true;
-                else
-                        cfg.first_provoking_vertex = rast->flatshade_first;
+      /* For line primitives, PRIMITIVE.first_provoking_vertex must
+       * be set to true and the provoking vertex is selected with
+       * DRAW.flat_shading_vertex.
+       */
+      if (lines)
+         cfg.first_provoking_vertex = true;
+      else
+         cfg.first_provoking_vertex = rast->flatshade_first;
 
-                if (panfrost_is_implicit_prim_restart(info)) {
-                        cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT;
-                } else if (info->primitive_restart) {
-                        cfg.primitive_restart = MALI_PRIMITIVE_RESTART_EXPLICIT;
-                        cfg.primitive_restart_index = info->restart_index;
-                }
+      if (panfrost_is_implicit_prim_restart(info)) {
+         cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT;
+      } else if (info->primitive_restart) {
+         cfg.primitive_restart = MALI_PRIMITIVE_RESTART_EXPLICIT;
+         cfg.primitive_restart_index = info->restart_index;
+      }
 
-                cfg.job_task_split = 6;
+      cfg.job_task_split = 6;
 #else
-                struct panfrost_compiled_shader *fs =
-                        ctx->prog[PIPE_SHADER_FRAGMENT];
+      struct panfrost_compiled_shader *fs = ctx->prog[PIPE_SHADER_FRAGMENT];
 
-                cfg.allow_rotating_primitives = !(lines || fs->info.bifrost.uses_flat_shading);
-                cfg.primitive_restart = info->primitive_restart;
+      cfg.allow_rotating_primitives =
+         !(lines || fs->info.bifrost.uses_flat_shading);
+      cfg.primitive_restart = info->primitive_restart;
 
-                /* Non-fixed restart indices should have been lowered */
-                assert(!cfg.primitive_restart || panfrost_is_implicit_prim_restart(info));
+      /* Non-fixed restart indices should have been lowered */
+      assert(!cfg.primitive_restart || panfrost_is_implicit_prim_restart(info));
 #endif
 
-                cfg.index_count = draw->count;
-                cfg.index_type = panfrost_translate_index_size(info->index_size);
+      cfg.index_count = draw->count;
+      cfg.index_type = panfrost_translate_index_size(info->index_size);
 
-                if (PAN_ARCH >= 9) {
-                        /* Base vertex offset on Valhall is used for both
-                         * indexed and non-indexed draws, in a simple way for
-                         * either. Handle both cases.
-                         */
-                        if (cfg.index_type)
-                                cfg.base_vertex_offset = draw->index_bias;
-                        else
-                                cfg.base_vertex_offset = draw->start;
+      if (PAN_ARCH >= 9) {
+         /* Base vertex offset on Valhall is used for both
+          * indexed and non-indexed draws, in a simple way for
+          * either. Handle both cases.
+          */
+         if (cfg.index_type)
+            cfg.base_vertex_offset = draw->index_bias;
+         else
+            cfg.base_vertex_offset = draw->start;
 
-                        /* Indices are moved outside the primitive descriptor
-                         * on Valhall, so we don't need to set that here
-                         */
-                } else if (cfg.index_type) {
-                        cfg.base_vertex_offset = draw->index_bias - ctx->offset_start;
+         /* Indices are moved outside the primitive descriptor
+          * on Valhall, so we don't need to set that here
+          */
+      } else if (cfg.index_type) {
+         cfg.base_vertex_offset = draw->index_bias - ctx->offset_start;
 
 #if PAN_ARCH <= 7
-                        cfg.indices = indices;
+         cfg.indices = indices;
 #endif
-                }
+      }
 
 #if PAN_ARCH >= 6
-                cfg.secondary_shader = secondary_shader;
+      cfg.secondary_shader = secondary_shader;
 #endif
-        }
+   }
 }
 
 #if PAN_ARCH >= 9
 static mali_ptr
 panfrost_upload_wa_sampler(struct panfrost_batch *batch)
 {
-        struct panfrost_ptr T = pan_pool_alloc_desc(&batch->pool.base, SAMPLER);
-        pan_pack(T.cpu, SAMPLER, cfg);
-        return T.gpu;
+   struct panfrost_ptr T = pan_pool_alloc_desc(&batch->pool.base, SAMPLER);
+   pan_pack(T.cpu, SAMPLER, cfg)
+      ;
+   return T.gpu;
 }
 
 static mali_ptr
 panfrost_emit_resources(struct panfrost_batch *batch,
-                        enum pipe_shader_type stage,
-                        mali_ptr ubos, unsigned ubo_count)
+                        enum pipe_shader_type stage, mali_ptr ubos,
+                        unsigned ubo_count)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        struct panfrost_ptr T;
-        unsigned nr_tables = 12;
+   struct panfrost_context *ctx = batch->ctx;
+   struct panfrost_ptr T;
+   unsigned nr_tables = 12;
 
-        /* Although individual resources need only 16 byte alignment, the
-         * resource table as a whole must be 64-byte aligned.
-         */
-        T = pan_pool_alloc_aligned(&batch->pool.base, nr_tables * pan_size(RESOURCE), 64);
-        memset(T.cpu, 0, nr_tables * pan_size(RESOURCE));
+   /* Although individual resources need only 16 byte alignment, the
+    * resource table as a whole must be 64-byte aligned.
+    */
+   T = pan_pool_alloc_aligned(&batch->pool.base, nr_tables * pan_size(RESOURCE),
+                              64);
+   memset(T.cpu, 0, nr_tables * pan_size(RESOURCE));
 
-        panfrost_make_resource_table(T, PAN_TABLE_UBO, ubos, ubo_count);
+   panfrost_make_resource_table(T, PAN_TABLE_UBO, ubos, ubo_count);
 
-        panfrost_make_resource_table(T, PAN_TABLE_TEXTURE,
-                                     batch->textures[stage],
-                                     ctx->sampler_view_count[stage]);
+   panfrost_make_resource_table(T, PAN_TABLE_TEXTURE, batch->textures[stage],
+                                ctx->sampler_view_count[stage]);
 
+   if (ctx->sampler_count[stage]) {
+      panfrost_make_resource_table(T, PAN_TABLE_SAMPLER, batch->samplers[stage],
+                                   ctx->sampler_count[stage]);
+   } else {
+      /* We always need at least 1 sampler for txf to work */
+      panfrost_make_resource_table(T, PAN_TABLE_SAMPLER,
+                                   panfrost_upload_wa_sampler(batch), 1);
+   }
 
-        if (ctx->sampler_count[stage]) {
-                panfrost_make_resource_table(T, PAN_TABLE_SAMPLER,
-                                             batch->samplers[stage],
-                                             ctx->sampler_count[stage]);
-        } else {
-                /* We always need at least 1 sampler for txf to work */
-                panfrost_make_resource_table(T, PAN_TABLE_SAMPLER,
-                                             panfrost_upload_wa_sampler(batch),
-                                             1);
-        }
+   panfrost_make_resource_table(T, PAN_TABLE_IMAGE, batch->images[stage],
+                                util_last_bit(ctx->image_mask[stage]));
 
-        panfrost_make_resource_table(T, PAN_TABLE_IMAGE,
-                                     batch->images[stage],
-                                     util_last_bit(ctx->image_mask[stage]));
+   if (stage == PIPE_SHADER_VERTEX) {
+      panfrost_make_resource_table(T, PAN_TABLE_ATTRIBUTE,
+                                   batch->attribs[stage],
+                                   ctx->vertex->num_elements);
 
-        if (stage == PIPE_SHADER_VERTEX) {
-                panfrost_make_resource_table(T, PAN_TABLE_ATTRIBUTE,
-                                             batch->attribs[stage],
-                                             ctx->vertex->num_elements);
+      panfrost_make_resource_table(T, PAN_TABLE_ATTRIBUTE_BUFFER,
+                                   batch->attrib_bufs[stage],
+                                   util_last_bit(ctx->vb_mask));
+   }
 
-                panfrost_make_resource_table(T, PAN_TABLE_ATTRIBUTE_BUFFER,
-                                             batch->attrib_bufs[stage],
-                                             util_last_bit(ctx->vb_mask));
-        }
-
-        return T.gpu | nr_tables;
+   return T.gpu | nr_tables;
 }
 
 static void
 panfrost_emit_shader(struct panfrost_batch *batch,
                      struct MALI_SHADER_ENVIRONMENT *cfg,
-                     enum pipe_shader_type stage,
-                     mali_ptr shader_ptr,
+                     enum pipe_shader_type stage, mali_ptr shader_ptr,
                      mali_ptr thread_storage)
 {
-        unsigned fau_words = 0, ubo_count = 0;
-        mali_ptr ubos, resources;
+   unsigned fau_words = 0, ubo_count = 0;
+   mali_ptr ubos, resources;
 
-        ubos = panfrost_emit_const_buf(batch, stage, &ubo_count, &cfg->fau,
-                                       &fau_words);
+   ubos =
+      panfrost_emit_const_buf(batch, stage, &ubo_count, &cfg->fau, &fau_words);
 
-        resources = panfrost_emit_resources(batch, stage, ubos, ubo_count);
+   resources = panfrost_emit_resources(batch, stage, ubos, ubo_count);
 
-        cfg->thread_storage = thread_storage;
-        cfg->shader = shader_ptr;
-        cfg->resources = resources;
+   cfg->thread_storage = thread_storage;
+   cfg->shader = shader_ptr;
+   cfg->resources = resources;
 
-        /* Each entry of FAU is 64-bits */
-        cfg->fau_count = DIV_ROUND_UP(fau_words, 2);
+   /* Each entry of FAU is 64-bits */
+   cfg->fau_count = DIV_ROUND_UP(fau_words, 2);
 }
 #endif
 
 static void
-panfrost_emit_draw(void *out,
-                   struct panfrost_batch *batch,
-                   bool fs_required,
-                   enum pipe_prim_type prim,
-                   mali_ptr pos, mali_ptr fs_vary, mali_ptr varyings)
+panfrost_emit_draw(void *out, struct panfrost_batch *batch, bool fs_required,
+                   enum pipe_prim_type prim, mali_ptr pos, mali_ptr fs_vary,
+                   mali_ptr varyings)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
-        bool polygon = (prim == PIPE_PRIM_TRIANGLES);
+   struct panfrost_context *ctx = batch->ctx;
+   struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
+   bool polygon = (prim == PIPE_PRIM_TRIANGLES);
 
-        pan_pack(out, DRAW, cfg) {
-                /*
-                 * From the Gallium documentation,
-                 * pipe_rasterizer_state::cull_face "indicates which faces of
-                 * polygons to cull". Points and lines are not considered
-                 * polygons and should be drawn even if all faces are culled.
-                 * The hardware does not take primitive type into account when
-                 * culling, so we need to do that check ourselves.
-                 */
-                cfg.cull_front_face = polygon && (rast->cull_face & PIPE_FACE_FRONT);
-                cfg.cull_back_face = polygon && (rast->cull_face & PIPE_FACE_BACK);
-                cfg.front_face_ccw = rast->front_ccw;
+   pan_pack(out, DRAW, cfg) {
+      /*
+       * From the Gallium documentation,
+       * pipe_rasterizer_state::cull_face "indicates which faces of
+       * polygons to cull". Points and lines are not considered
+       * polygons and should be drawn even if all faces are culled.
+       * The hardware does not take primitive type into account when
+       * culling, so we need to do that check ourselves.
+       */
+      cfg.cull_front_face = polygon && (rast->cull_face & PIPE_FACE_FRONT);
+      cfg.cull_back_face = polygon && (rast->cull_face & PIPE_FACE_BACK);
+      cfg.front_face_ccw = rast->front_ccw;
 
-                if (ctx->occlusion_query && ctx->active_queries) {
-                        if (ctx->occlusion_query->type == PIPE_QUERY_OCCLUSION_COUNTER)
-                                cfg.occlusion_query = MALI_OCCLUSION_MODE_COUNTER;
-                        else
-                                cfg.occlusion_query = MALI_OCCLUSION_MODE_PREDICATE;
+      if (ctx->occlusion_query && ctx->active_queries) {
+         if (ctx->occlusion_query->type == PIPE_QUERY_OCCLUSION_COUNTER)
+            cfg.occlusion_query = MALI_OCCLUSION_MODE_COUNTER;
+         else
+            cfg.occlusion_query = MALI_OCCLUSION_MODE_PREDICATE;
 
-                        struct panfrost_resource *rsrc = pan_resource(ctx->occlusion_query->rsrc);
-                        cfg.occlusion = rsrc->image.data.bo->ptr.gpu;
-                        panfrost_batch_write_rsrc(ctx->batch, rsrc,
-                                              PIPE_SHADER_FRAGMENT);
-                }
+         struct panfrost_resource *rsrc =
+            pan_resource(ctx->occlusion_query->rsrc);
+         cfg.occlusion = rsrc->image.data.bo->ptr.gpu;
+         panfrost_batch_write_rsrc(ctx->batch, rsrc, PIPE_SHADER_FRAGMENT);
+      }
 
 #if PAN_ARCH >= 9
-                struct panfrost_compiled_shader *fs =
-                        ctx->prog[PIPE_SHADER_FRAGMENT];
+      struct panfrost_compiled_shader *fs = ctx->prog[PIPE_SHADER_FRAGMENT];
 
-                cfg.multisample_enable = rast->multisample;
-                cfg.sample_mask = rast->multisample ? ctx->sample_mask : 0xFFFF;
+      cfg.multisample_enable = rast->multisample;
+      cfg.sample_mask = rast->multisample ? ctx->sample_mask : 0xFFFF;
 
-                /* Use per-sample shading if required by API Also use it when a
-                 * blend shader is used with multisampling, as this is handled
-                 * by a single ST_TILE in the blend shader with the current
-                 * sample ID, requiring per-sample shading.
-                 */
-                cfg.evaluate_per_sample =
-                        (rast->multisample &&
-                         ((ctx->min_samples > 1) || ctx->valhall_has_blend_shader));
+      /* Use per-sample shading if required by API Also use it when a
+       * blend shader is used with multisampling, as this is handled
+       * by a single ST_TILE in the blend shader with the current
+       * sample ID, requiring per-sample shading.
+       */
+      cfg.evaluate_per_sample =
+         (rast->multisample &&
+          ((ctx->min_samples > 1) || ctx->valhall_has_blend_shader));
 
-                cfg.single_sampled_lines = !rast->multisample;
+      cfg.single_sampled_lines = !rast->multisample;
 
-                cfg.vertex_array.packet = true;
+      cfg.vertex_array.packet = true;
 
-                cfg.minimum_z = batch->minimum_z;
-                cfg.maximum_z = batch->maximum_z;
+      cfg.minimum_z = batch->minimum_z;
+      cfg.maximum_z = batch->maximum_z;
 
-                cfg.depth_stencil = batch->depth_stencil;
+      cfg.depth_stencil = batch->depth_stencil;
 
-                if (fs_required) {
-                        bool has_oq = ctx->occlusion_query && ctx->active_queries;
+      if (fs_required) {
+         bool has_oq = ctx->occlusion_query && ctx->active_queries;
 
-                        struct pan_earlyzs_state earlyzs =
-                               pan_earlyzs_get(fs->earlyzs,
-                                               ctx->depth_stencil->writes_zs || has_oq,
-                                               ctx->blend->base.alpha_to_coverage,
-                                               ctx->depth_stencil->zs_always_passes);
+         struct pan_earlyzs_state earlyzs = pan_earlyzs_get(
+            fs->earlyzs, ctx->depth_stencil->writes_zs || has_oq,
+            ctx->blend->base.alpha_to_coverage,
+            ctx->depth_stencil->zs_always_passes);
 
-                        cfg.pixel_kill_operation = earlyzs.kill;
-                        cfg.zs_update_operation = earlyzs.update;
+         cfg.pixel_kill_operation = earlyzs.kill;
+         cfg.zs_update_operation = earlyzs.update;
 
-                        cfg.allow_forward_pixel_to_kill = pan_allow_forward_pixel_to_kill(ctx, fs);
-                        cfg.allow_forward_pixel_to_be_killed = !fs->info.writes_global;
+         cfg.allow_forward_pixel_to_kill =
+            pan_allow_forward_pixel_to_kill(ctx, fs);
+         cfg.allow_forward_pixel_to_be_killed = !fs->info.writes_global;
 
-                        /* Mask of render targets that may be written. A render
-                         * target may be written if the fragment shader writes
-                         * to it AND it actually exists. If the render target
-                         * doesn't actually exist, the blend descriptor will be
-                         * OFF so it may be omitted from the mask.
-                         *
-                         * Only set when there is a fragment shader, since
-                         * otherwise no colour updates are possible.
-                         */
-                        cfg.render_target_mask =
-                                (fs->info.outputs_written >> FRAG_RESULT_DATA0) &
-                                ctx->fb_rt_mask;
+         /* Mask of render targets that may be written. A render
+          * target may be written if the fragment shader writes
+          * to it AND it actually exists. If the render target
+          * doesn't actually exist, the blend descriptor will be
+          * OFF so it may be omitted from the mask.
+          *
+          * Only set when there is a fragment shader, since
+          * otherwise no colour updates are possible.
+          */
+         cfg.render_target_mask =
+            (fs->info.outputs_written >> FRAG_RESULT_DATA0) & ctx->fb_rt_mask;
 
-                        /* Also use per-sample shading if required by the shader
-                         */
-                        cfg.evaluate_per_sample |= fs->info.fs.sample_shading;
+         /* Also use per-sample shading if required by the shader
+          */
+         cfg.evaluate_per_sample |= fs->info.fs.sample_shading;
 
-                        /* Unlike Bifrost, alpha-to-coverage must be included in
-                         * this identically-named flag. Confusing, isn't it?
-                         */
-                        cfg.shader_modifies_coverage = fs->info.fs.writes_coverage ||
-                                                       fs->info.fs.can_discard ||
-                                                       ctx->blend->base.alpha_to_coverage;
+         /* Unlike Bifrost, alpha-to-coverage must be included in
+          * this identically-named flag. Confusing, isn't it?
+          */
+         cfg.shader_modifies_coverage = fs->info.fs.writes_coverage ||
+                                        fs->info.fs.can_discard ||
+                                        ctx->blend->base.alpha_to_coverage;
 
-                        /* Blend descriptors are only accessed by a BLEND
-                         * instruction on Valhall. It follows that if the
-                         * fragment shader is omitted, we may also emit the
-                         * blend descriptors.
-                         */
-                        cfg.blend = batch->blend;
-                        cfg.blend_count = MAX2(batch->key.nr_cbufs, 1);
-                        cfg.alpha_to_coverage = ctx->blend->base.alpha_to_coverage;
+         /* Blend descriptors are only accessed by a BLEND
+          * instruction on Valhall. It follows that if the
+          * fragment shader is omitted, we may also emit the
+          * blend descriptors.
+          */
+         cfg.blend = batch->blend;
+         cfg.blend_count = MAX2(batch->key.nr_cbufs, 1);
+         cfg.alpha_to_coverage = ctx->blend->base.alpha_to_coverage;
 
-                        cfg.overdraw_alpha0 = panfrost_overdraw_alpha(ctx, 0);
-                        cfg.overdraw_alpha1 = panfrost_overdraw_alpha(ctx, 1);
+         cfg.overdraw_alpha0 = panfrost_overdraw_alpha(ctx, 0);
+         cfg.overdraw_alpha1 = panfrost_overdraw_alpha(ctx, 1);
 
-                        panfrost_emit_shader(batch, &cfg.shader, PIPE_SHADER_FRAGMENT,
-                                             batch->rsd[PIPE_SHADER_FRAGMENT],
-                                             batch->tls.gpu);
-                } else {
-                        /* These operations need to be FORCE to benefit from the
-                         * depth-only pass optimizations.
-                         */
-                        cfg.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
-                        cfg.zs_update_operation = MALI_PIXEL_KILL_FORCE_EARLY;
+         panfrost_emit_shader(batch, &cfg.shader, PIPE_SHADER_FRAGMENT,
+                              batch->rsd[PIPE_SHADER_FRAGMENT], batch->tls.gpu);
+      } else {
+         /* These operations need to be FORCE to benefit from the
+          * depth-only pass optimizations.
+          */
+         cfg.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
+         cfg.zs_update_operation = MALI_PIXEL_KILL_FORCE_EARLY;
 
-                        /* No shader and no blend => no shader or blend
-                         * reasons to disable FPK. The only FPK-related state
-                         * not covered is alpha-to-coverage which we don't set
-                         * without blend.
-                         */
-                        cfg.allow_forward_pixel_to_kill = true;
+         /* No shader and no blend => no shader or blend
+          * reasons to disable FPK. The only FPK-related state
+          * not covered is alpha-to-coverage which we don't set
+          * without blend.
+          */
+         cfg.allow_forward_pixel_to_kill = true;
 
-                        /* No shader => no shader side effects */
-                        cfg.allow_forward_pixel_to_be_killed = true;
+         /* No shader => no shader side effects */
+         cfg.allow_forward_pixel_to_be_killed = true;
 
-                        /* Alpha isn't written so these are vacuous */
-                        cfg.overdraw_alpha0 = true;
-                        cfg.overdraw_alpha1 = true;
-                }
+         /* Alpha isn't written so these are vacuous */
+         cfg.overdraw_alpha0 = true;
+         cfg.overdraw_alpha1 = true;
+      }
 #else
-                cfg.position = pos;
-                cfg.state = batch->rsd[PIPE_SHADER_FRAGMENT];
-                cfg.attributes = batch->attribs[PIPE_SHADER_FRAGMENT];
-                cfg.attribute_buffers = batch->attrib_bufs[PIPE_SHADER_FRAGMENT];
-                cfg.viewport = batch->viewport;
-                cfg.varyings = fs_vary;
-                cfg.varying_buffers = fs_vary ? varyings : 0;
-                cfg.thread_storage = batch->tls.gpu;
+      cfg.position = pos;
+      cfg.state = batch->rsd[PIPE_SHADER_FRAGMENT];
+      cfg.attributes = batch->attribs[PIPE_SHADER_FRAGMENT];
+      cfg.attribute_buffers = batch->attrib_bufs[PIPE_SHADER_FRAGMENT];
+      cfg.viewport = batch->viewport;
+      cfg.varyings = fs_vary;
+      cfg.varying_buffers = fs_vary ? varyings : 0;
+      cfg.thread_storage = batch->tls.gpu;
 
-                /* For all primitives but lines DRAW.flat_shading_vertex must
-                 * be set to 0 and the provoking vertex is selected with the
-                 * PRIMITIVE.first_provoking_vertex field.
-                 */
-                if (prim == PIPE_PRIM_LINES) {
-                        /* The logic is inverted across arches. */
-                        cfg.flat_shading_vertex = rast->flatshade_first
-                                                ^ (PAN_ARCH <= 5);
-                }
+      /* For all primitives but lines DRAW.flat_shading_vertex must
+       * be set to 0 and the provoking vertex is selected with the
+       * PRIMITIVE.first_provoking_vertex field.
+       */
+      if (prim == PIPE_PRIM_LINES) {
+         /* The logic is inverted across arches. */
+         cfg.flat_shading_vertex = rast->flatshade_first ^ (PAN_ARCH <= 5);
+      }
 
-                pan_emit_draw_descs(batch, &cfg, PIPE_SHADER_FRAGMENT);
+      pan_emit_draw_descs(batch, &cfg, PIPE_SHADER_FRAGMENT);
 #endif
-        }
+   }
 }
 
 #if PAN_ARCH >= 9
@@ -3353,90 +3278,90 @@ static void
 panfrost_emit_malloc_vertex(struct panfrost_batch *batch,
                             const struct pipe_draw_info *info,
                             const struct pipe_draw_start_count_bias *draw,
-                            mali_ptr indices, bool secondary_shader,
-                            void *job)
+                            mali_ptr indices, bool secondary_shader, void *job)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        struct panfrost_compiled_shader *vs = ctx->prog[PIPE_SHADER_VERTEX];
-        struct panfrost_compiled_shader *fs = ctx->prog[PIPE_SHADER_FRAGMENT];
+   struct panfrost_context *ctx = batch->ctx;
+   struct panfrost_compiled_shader *vs = ctx->prog[PIPE_SHADER_VERTEX];
+   struct panfrost_compiled_shader *fs = ctx->prog[PIPE_SHADER_FRAGMENT];
 
-        bool fs_required = panfrost_fs_required(fs, ctx->blend,
-                                                &ctx->pipe_framebuffer,
-                                                ctx->depth_stencil);
+   bool fs_required = panfrost_fs_required(
+      fs, ctx->blend, &ctx->pipe_framebuffer, ctx->depth_stencil);
 
-        /* Varying shaders only feed data to the fragment shader, so if we omit
-         * the fragment shader, we should omit the varying shader too.
-         */
-        secondary_shader &= fs_required;
+   /* Varying shaders only feed data to the fragment shader, so if we omit
+    * the fragment shader, we should omit the varying shader too.
+    */
+   secondary_shader &= fs_required;
 
-        panfrost_emit_primitive(ctx, info, draw, 0, secondary_shader,
-                                pan_section_ptr(job, MALLOC_VERTEX_JOB, PRIMITIVE));
+   panfrost_emit_primitive(ctx, info, draw, 0, secondary_shader,
+                           pan_section_ptr(job, MALLOC_VERTEX_JOB, PRIMITIVE));
 
-        pan_section_pack(job, MALLOC_VERTEX_JOB, INSTANCE_COUNT, cfg) {
-                cfg.count = info->instance_count;
-        }
+   pan_section_pack(job, MALLOC_VERTEX_JOB, INSTANCE_COUNT, cfg) {
+      cfg.count = info->instance_count;
+   }
 
-        pan_section_pack(job, MALLOC_VERTEX_JOB, ALLOCATION, cfg) {
-                if (secondary_shader) {
-                        unsigned v = vs->info.varyings.output_count;
-                        unsigned f = fs->info.varyings.input_count;
-                        unsigned slots = MAX2(v, f);
-                        slots += util_bitcount(fs->key.fs.fixed_varying_mask);
-                        unsigned size = slots * 16;
+   pan_section_pack(job, MALLOC_VERTEX_JOB, ALLOCATION, cfg) {
+      if (secondary_shader) {
+         unsigned v = vs->info.varyings.output_count;
+         unsigned f = fs->info.varyings.input_count;
+         unsigned slots = MAX2(v, f);
+         slots += util_bitcount(fs->key.fs.fixed_varying_mask);
+         unsigned size = slots * 16;
 
-                        /* Assumes 16 byte slots. We could do better. */
-                        cfg.vertex_packet_stride = size + 16;
-                        cfg.vertex_attribute_stride = size;
-                } else {
-                        /* Hardware requirement for "no varyings" */
-                        cfg.vertex_packet_stride = 16;
-                        cfg.vertex_attribute_stride = 0;
-                }
-        }
+         /* Assumes 16 byte slots. We could do better. */
+         cfg.vertex_packet_stride = size + 16;
+         cfg.vertex_attribute_stride = size;
+      } else {
+         /* Hardware requirement for "no varyings" */
+         cfg.vertex_packet_stride = 16;
+         cfg.vertex_attribute_stride = 0;
+      }
+   }
 
-        pan_section_pack(job, MALLOC_VERTEX_JOB, TILER, cfg) {
-                cfg.address = panfrost_batch_get_bifrost_tiler(batch, ~0);
-        }
+   pan_section_pack(job, MALLOC_VERTEX_JOB, TILER, cfg) {
+      cfg.address = panfrost_batch_get_bifrost_tiler(batch, ~0);
+   }
 
-        STATIC_ASSERT(sizeof(batch->scissor) == pan_size(SCISSOR));
-        memcpy(pan_section_ptr(job, MALLOC_VERTEX_JOB, SCISSOR),
-               &batch->scissor, pan_size(SCISSOR));
+   STATIC_ASSERT(sizeof(batch->scissor) == pan_size(SCISSOR));
+   memcpy(pan_section_ptr(job, MALLOC_VERTEX_JOB, SCISSOR), &batch->scissor,
+          pan_size(SCISSOR));
 
-        panfrost_emit_primitive_size(ctx, info->mode == PIPE_PRIM_POINTS, 0,
-                                     pan_section_ptr(job, MALLOC_VERTEX_JOB, PRIMITIVE_SIZE));
+   panfrost_emit_primitive_size(
+      ctx, info->mode == PIPE_PRIM_POINTS, 0,
+      pan_section_ptr(job, MALLOC_VERTEX_JOB, PRIMITIVE_SIZE));
 
-        pan_section_pack(job, MALLOC_VERTEX_JOB, INDICES, cfg) {
-                cfg.address = indices;
-        }
+   pan_section_pack(job, MALLOC_VERTEX_JOB, INDICES, cfg) {
+      cfg.address = indices;
+   }
 
-        panfrost_emit_draw(pan_section_ptr(job, MALLOC_VERTEX_JOB, DRAW),
-                           batch, fs_required, u_reduced_prim(info->mode), 0, 0, 0);
+   panfrost_emit_draw(pan_section_ptr(job, MALLOC_VERTEX_JOB, DRAW), batch,
+                      fs_required, u_reduced_prim(info->mode), 0, 0, 0);
 
-        pan_section_pack(job, MALLOC_VERTEX_JOB, POSITION, cfg) {
-                /* IDVS/points vertex shader */
-                mali_ptr vs_ptr = batch->rsd[PIPE_SHADER_VERTEX];
+   pan_section_pack(job, MALLOC_VERTEX_JOB, POSITION, cfg) {
+      /* IDVS/points vertex shader */
+      mali_ptr vs_ptr = batch->rsd[PIPE_SHADER_VERTEX];
 
-                /* IDVS/triangle vertex shader */
-                if (vs_ptr && info->mode != PIPE_PRIM_POINTS)
-                        vs_ptr += pan_size(SHADER_PROGRAM);
+      /* IDVS/triangle vertex shader */
+      if (vs_ptr && info->mode != PIPE_PRIM_POINTS)
+         vs_ptr += pan_size(SHADER_PROGRAM);
 
-                panfrost_emit_shader(batch, &cfg, PIPE_SHADER_VERTEX, vs_ptr,
-                                     batch->tls.gpu);
-        }
+      panfrost_emit_shader(batch, &cfg, PIPE_SHADER_VERTEX, vs_ptr,
+                           batch->tls.gpu);
+   }
 
-        pan_section_pack(job, MALLOC_VERTEX_JOB, VARYING, cfg) {
-                /* If a varying shader is used, we configure it with the same
-                 * state as the position shader for backwards compatible
-                 * behaviour with Bifrost. This could be optimized.
-                 */
-                if (!secondary_shader) continue;
+   pan_section_pack(job, MALLOC_VERTEX_JOB, VARYING, cfg) {
+      /* If a varying shader is used, we configure it with the same
+       * state as the position shader for backwards compatible
+       * behaviour with Bifrost. This could be optimized.
+       */
+      if (!secondary_shader)
+         continue;
 
-                mali_ptr ptr = batch->rsd[PIPE_SHADER_VERTEX] +
-                                (2 * pan_size(SHADER_PROGRAM));
+      mali_ptr ptr =
+         batch->rsd[PIPE_SHADER_VERTEX] + (2 * pan_size(SHADER_PROGRAM));
 
-                panfrost_emit_shader(batch, &cfg, PIPE_SHADER_VERTEX,
-                             ptr, batch->tls.gpu);
-        }
+      panfrost_emit_shader(batch, &cfg, PIPE_SHADER_VERTEX, ptr,
+                           batch->tls.gpu);
+   }
 }
 #endif
 
@@ -3445,391 +3370,381 @@ static void
 panfrost_draw_emit_tiler(struct panfrost_batch *batch,
                          const struct pipe_draw_info *info,
                          const struct pipe_draw_start_count_bias *draw,
-                         void *invocation_template,
-                         mali_ptr indices, mali_ptr fs_vary, mali_ptr varyings,
-                         mali_ptr pos, mali_ptr psiz, bool secondary_shader,
-                         void *job)
+                         void *invocation_template, mali_ptr indices,
+                         mali_ptr fs_vary, mali_ptr varyings, mali_ptr pos,
+                         mali_ptr psiz, bool secondary_shader, void *job)
 {
-        struct panfrost_context *ctx = batch->ctx;
+   struct panfrost_context *ctx = batch->ctx;
 
-        void *section = pan_section_ptr(job, TILER_JOB, INVOCATION);
-        memcpy(section, invocation_template, pan_size(INVOCATION));
+   void *section = pan_section_ptr(job, TILER_JOB, INVOCATION);
+   memcpy(section, invocation_template, pan_size(INVOCATION));
 
-        panfrost_emit_primitive(ctx, info, draw, indices, secondary_shader,
-                                pan_section_ptr(job, TILER_JOB, PRIMITIVE));
+   panfrost_emit_primitive(ctx, info, draw, indices, secondary_shader,
+                           pan_section_ptr(job, TILER_JOB, PRIMITIVE));
 
-        void *prim_size = pan_section_ptr(job, TILER_JOB, PRIMITIVE_SIZE);
-        enum pipe_prim_type prim = u_reduced_prim(info->mode);
+   void *prim_size = pan_section_ptr(job, TILER_JOB, PRIMITIVE_SIZE);
+   enum pipe_prim_type prim = u_reduced_prim(info->mode);
 
 #if PAN_ARCH >= 6
-        pan_section_pack(job, TILER_JOB, TILER, cfg) {
-                cfg.address = panfrost_batch_get_bifrost_tiler(batch, ~0);
-        }
+   pan_section_pack(job, TILER_JOB, TILER, cfg) {
+      cfg.address = panfrost_batch_get_bifrost_tiler(batch, ~0);
+   }
 
-        pan_section_pack(job, TILER_JOB, PADDING, cfg);
+   pan_section_pack(job, TILER_JOB, PADDING, cfg)
+      ;
 #endif
 
-        panfrost_emit_draw(pan_section_ptr(job, TILER_JOB, DRAW),
-                           batch, true, prim, pos, fs_vary, varyings);
+   panfrost_emit_draw(pan_section_ptr(job, TILER_JOB, DRAW), batch, true, prim,
+                      pos, fs_vary, varyings);
 
-        panfrost_emit_primitive_size(ctx, prim == PIPE_PRIM_POINTS, psiz, prim_size);
+   panfrost_emit_primitive_size(ctx, prim == PIPE_PRIM_POINTS, psiz, prim_size);
 }
 #endif
 
 static void
 panfrost_launch_xfb(struct panfrost_batch *batch,
-                    const struct pipe_draw_info *info,
-                    mali_ptr attribs, mali_ptr attrib_bufs,
-                    unsigned count)
+                    const struct pipe_draw_info *info, mali_ptr attribs,
+                    mali_ptr attrib_bufs, unsigned count)
 {
-        struct panfrost_context *ctx = batch->ctx;
+   struct panfrost_context *ctx = batch->ctx;
 
-        struct panfrost_ptr t =
-                pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB);
+   struct panfrost_ptr t = pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB);
 
-        /* Nothing to do */
-        if (batch->ctx->streamout.num_targets == 0)
-                return;
+   /* Nothing to do */
+   if (batch->ctx->streamout.num_targets == 0)
+      return;
 
-        /* TODO: XFB with index buffers */
-        //assert(info->index_size == 0);
-        u_trim_pipe_prim(info->mode, &count);
+   /* TODO: XFB with index buffers */
+   // assert(info->index_size == 0);
+   u_trim_pipe_prim(info->mode, &count);
 
-        if (count == 0)
-                return;
+   if (count == 0)
+      return;
 
-        perf_debug_ctx(batch->ctx, "Emulating transform feedback");
+   perf_debug_ctx(batch->ctx, "Emulating transform feedback");
 
-        struct panfrost_uncompiled_shader *vs_uncompiled = ctx->uncompiled[PIPE_SHADER_VERTEX];
-        struct panfrost_compiled_shader *vs = ctx->prog[PIPE_SHADER_VERTEX];
+   struct panfrost_uncompiled_shader *vs_uncompiled =
+      ctx->uncompiled[PIPE_SHADER_VERTEX];
+   struct panfrost_compiled_shader *vs = ctx->prog[PIPE_SHADER_VERTEX];
 
-        vs_uncompiled->xfb->stream_output = vs->stream_output;
+   vs_uncompiled->xfb->stream_output = vs->stream_output;
 
-        mali_ptr saved_rsd = batch->rsd[PIPE_SHADER_VERTEX];
-        mali_ptr saved_ubo = batch->uniform_buffers[PIPE_SHADER_VERTEX];
-        mali_ptr saved_push = batch->push_uniforms[PIPE_SHADER_VERTEX];
+   mali_ptr saved_rsd = batch->rsd[PIPE_SHADER_VERTEX];
+   mali_ptr saved_ubo = batch->uniform_buffers[PIPE_SHADER_VERTEX];
+   mali_ptr saved_push = batch->push_uniforms[PIPE_SHADER_VERTEX];
 
-        ctx->uncompiled[PIPE_SHADER_VERTEX] = NULL; /* should not be read */
-        ctx->prog[PIPE_SHADER_VERTEX] = vs_uncompiled->xfb;
-        batch->rsd[PIPE_SHADER_VERTEX] = panfrost_emit_compute_shader_meta(batch, PIPE_SHADER_VERTEX);
+   ctx->uncompiled[PIPE_SHADER_VERTEX] = NULL; /* should not be read */
+   ctx->prog[PIPE_SHADER_VERTEX] = vs_uncompiled->xfb;
+   batch->rsd[PIPE_SHADER_VERTEX] =
+      panfrost_emit_compute_shader_meta(batch, PIPE_SHADER_VERTEX);
 
 #if PAN_ARCH >= 9
-        pan_section_pack(t.cpu, COMPUTE_JOB, PAYLOAD, cfg) {
-                cfg.workgroup_size_x = 1;
-                cfg.workgroup_size_y = 1;
-                cfg.workgroup_size_z = 1;
+   pan_section_pack(t.cpu, COMPUTE_JOB, PAYLOAD, cfg) {
+      cfg.workgroup_size_x = 1;
+      cfg.workgroup_size_y = 1;
+      cfg.workgroup_size_z = 1;
 
-                cfg.workgroup_count_x = count;
-                cfg.workgroup_count_y = info->instance_count;
-                cfg.workgroup_count_z = 1;
+      cfg.workgroup_count_x = count;
+      cfg.workgroup_count_y = info->instance_count;
+      cfg.workgroup_count_z = 1;
 
-                panfrost_emit_shader(batch, &cfg.compute, PIPE_SHADER_VERTEX,
-                                     batch->rsd[PIPE_SHADER_VERTEX],
-                                     batch->tls.gpu);
+      panfrost_emit_shader(batch, &cfg.compute, PIPE_SHADER_VERTEX,
+                           batch->rsd[PIPE_SHADER_VERTEX], batch->tls.gpu);
 
-                /* TODO: Indexing. Also, this is a legacy feature... */
-                cfg.compute.attribute_offset = batch->ctx->offset_start;
+      /* TODO: Indexing. Also, this is a legacy feature... */
+      cfg.compute.attribute_offset = batch->ctx->offset_start;
 
-                /* Transform feedback shaders do not use barriers or shared
-                 * memory, so we may merge workgroups.
-                 */
-                cfg.allow_merging_workgroups = true;
-                cfg.task_increment = 1;
-                cfg.task_axis = MALI_TASK_AXIS_Z;
-        }
+      /* Transform feedback shaders do not use barriers or shared
+       * memory, so we may merge workgroups.
+       */
+      cfg.allow_merging_workgroups = true;
+      cfg.task_increment = 1;
+      cfg.task_axis = MALI_TASK_AXIS_Z;
+   }
 #else
-        struct mali_invocation_packed invocation;
+   struct mali_invocation_packed invocation;
 
-        panfrost_pack_work_groups_compute(&invocation,
-                        1, count, info->instance_count,
-                        1, 1, 1, PAN_ARCH <= 5, false);
+   panfrost_pack_work_groups_compute(&invocation, 1, count,
+                                     info->instance_count, 1, 1, 1,
+                                     PAN_ARCH <= 5, false);
 
-        batch->uniform_buffers[PIPE_SHADER_VERTEX] =
-                panfrost_emit_const_buf(batch, PIPE_SHADER_VERTEX, NULL,
-                                &batch->push_uniforms[PIPE_SHADER_VERTEX], NULL);
+   batch->uniform_buffers[PIPE_SHADER_VERTEX] =
+      panfrost_emit_const_buf(batch, PIPE_SHADER_VERTEX, NULL,
+                              &batch->push_uniforms[PIPE_SHADER_VERTEX], NULL);
 
-        panfrost_draw_emit_vertex(batch, info, &invocation, 0, 0,
-                                  attribs, attrib_bufs, t.cpu);
+   panfrost_draw_emit_vertex(batch, info, &invocation, 0, 0, attribs,
+                             attrib_bufs, t.cpu);
 #endif
-        enum mali_job_type job_type = MALI_JOB_TYPE_COMPUTE;
+   enum mali_job_type job_type = MALI_JOB_TYPE_COMPUTE;
 #if PAN_ARCH <= 5
-        job_type = MALI_JOB_TYPE_VERTEX;
+   job_type = MALI_JOB_TYPE_VERTEX;
 #endif
-        panfrost_add_job(&batch->pool.base, &batch->scoreboard, job_type,
-                         true, false, 0, 0, &t, false);
+   panfrost_add_job(&batch->pool.base, &batch->scoreboard, job_type, true,
+                    false, 0, 0, &t, false);
 
-        ctx->uncompiled[PIPE_SHADER_VERTEX] = vs_uncompiled;
-        ctx->prog[PIPE_SHADER_VERTEX] = vs;
-        batch->rsd[PIPE_SHADER_VERTEX] = saved_rsd;
-        batch->uniform_buffers[PIPE_SHADER_VERTEX] = saved_ubo;
-        batch->push_uniforms[PIPE_SHADER_VERTEX] = saved_push;
+   ctx->uncompiled[PIPE_SHADER_VERTEX] = vs_uncompiled;
+   ctx->prog[PIPE_SHADER_VERTEX] = vs;
+   batch->rsd[PIPE_SHADER_VERTEX] = saved_rsd;
+   batch->uniform_buffers[PIPE_SHADER_VERTEX] = saved_ubo;
+   batch->push_uniforms[PIPE_SHADER_VERTEX] = saved_push;
 }
 
 static void
 panfrost_direct_draw(struct panfrost_batch *batch,
-                     const struct pipe_draw_info *info,
-                     unsigned drawid_offset,
+                     const struct pipe_draw_info *info, unsigned drawid_offset,
                      const struct pipe_draw_start_count_bias *draw)
 {
-        if (!draw->count || !info->instance_count)
-                return;
+   if (!draw->count || !info->instance_count)
+      return;
 
-        struct panfrost_context *ctx = batch->ctx;
+   struct panfrost_context *ctx = batch->ctx;
 
-        /* If we change whether we're drawing points, or whether point sprites
-         * are enabled (specified in the rasterizer), we may need to rebind
-         * shaders accordingly. This implicitly covers the case of rebinding
-         * framebuffers, because all dirty flags are set there.
-         */
-        if ((ctx->dirty & PAN_DIRTY_RASTERIZER) ||
-            ((ctx->active_prim == PIPE_PRIM_POINTS) ^
-             (info->mode       == PIPE_PRIM_POINTS))) {
+   /* If we change whether we're drawing points, or whether point sprites
+    * are enabled (specified in the rasterizer), we may need to rebind
+    * shaders accordingly. This implicitly covers the case of rebinding
+    * framebuffers, because all dirty flags are set there.
+    */
+   if ((ctx->dirty & PAN_DIRTY_RASTERIZER) ||
+       ((ctx->active_prim == PIPE_PRIM_POINTS) ^
+        (info->mode == PIPE_PRIM_POINTS))) {
 
-                ctx->active_prim = info->mode;
-                panfrost_update_shader_variant(ctx, PIPE_SHADER_FRAGMENT);
-        }
+      ctx->active_prim = info->mode;
+      panfrost_update_shader_variant(ctx, PIPE_SHADER_FRAGMENT);
+   }
 
-        /* Take into account a negative bias */
-        ctx->vertex_count = draw->count + (info->index_size ? abs(draw->index_bias) : 0);
-        ctx->instance_count = info->instance_count;
-        ctx->base_vertex = info->index_size ? draw->index_bias : 0;
-        ctx->base_instance = info->start_instance;
-        ctx->active_prim = info->mode;
-        ctx->drawid = drawid_offset;
+   /* Take into account a negative bias */
+   ctx->vertex_count =
+      draw->count + (info->index_size ? abs(draw->index_bias) : 0);
+   ctx->instance_count = info->instance_count;
+   ctx->base_vertex = info->index_size ? draw->index_bias : 0;
+   ctx->base_instance = info->start_instance;
+   ctx->active_prim = info->mode;
+   ctx->drawid = drawid_offset;
 
-        struct panfrost_compiled_shader *vs = ctx->prog[PIPE_SHADER_VERTEX];
+   struct panfrost_compiled_shader *vs = ctx->prog[PIPE_SHADER_VERTEX];
 
-        bool idvs = vs->info.vs.idvs;
-        bool secondary_shader = vs->info.vs.secondary_enable;
+   bool idvs = vs->info.vs.idvs;
+   bool secondary_shader = vs->info.vs.secondary_enable;
 
-        UNUSED struct panfrost_ptr tiler, vertex;
+   UNUSED struct panfrost_ptr tiler, vertex;
 
-        if (idvs) {
+   if (idvs) {
 #if PAN_ARCH >= 9
-                tiler = pan_pool_alloc_desc(&batch->pool.base, MALLOC_VERTEX_JOB);
+      tiler = pan_pool_alloc_desc(&batch->pool.base, MALLOC_VERTEX_JOB);
 #elif PAN_ARCH >= 6
-                tiler = pan_pool_alloc_desc(&batch->pool.base, INDEXED_VERTEX_JOB);
+      tiler = pan_pool_alloc_desc(&batch->pool.base, INDEXED_VERTEX_JOB);
 #else
-                unreachable("IDVS is unsupported on Midgard");
+      unreachable("IDVS is unsupported on Midgard");
 #endif
-        } else {
-                vertex = pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB);
-                tiler = pan_pool_alloc_desc(&batch->pool.base, TILER_JOB);
-        }
+   } else {
+      vertex = pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB);
+      tiler = pan_pool_alloc_desc(&batch->pool.base, TILER_JOB);
+   }
 
-        unsigned vertex_count = ctx->vertex_count;
+   unsigned vertex_count = ctx->vertex_count;
 
-        unsigned min_index = 0, max_index = 0;
-        mali_ptr indices = 0;
+   unsigned min_index = 0, max_index = 0;
+   mali_ptr indices = 0;
 
-        if (info->index_size && PAN_ARCH >= 9) {
-                indices = panfrost_get_index_buffer(batch, info, draw);
-        } else if (info->index_size) {
-                indices = panfrost_get_index_buffer_bounded(batch, info, draw,
-                                                            &min_index,
-                                                            &max_index);
+   if (info->index_size && PAN_ARCH >= 9) {
+      indices = panfrost_get_index_buffer(batch, info, draw);
+   } else if (info->index_size) {
+      indices = panfrost_get_index_buffer_bounded(batch, info, draw, &min_index,
+                                                  &max_index);
 
-                /* Use the corresponding values */
-                vertex_count = max_index - min_index + 1;
-                ctx->offset_start = min_index + draw->index_bias;
-        } else {
-                ctx->offset_start = draw->start;
-        }
+      /* Use the corresponding values */
+      vertex_count = max_index - min_index + 1;
+      ctx->offset_start = min_index + draw->index_bias;
+   } else {
+      ctx->offset_start = draw->start;
+   }
 
-        if (info->instance_count > 1) {
-                unsigned count = vertex_count;
+   if (info->instance_count > 1) {
+      unsigned count = vertex_count;
 
-                /* Index-Driven Vertex Shading requires different instances to
-                 * have different cache lines for position results. Each vertex
-                 * position is 16 bytes and the Mali cache line is 64 bytes, so
-                 * the instance count must be aligned to 4 vertices.
-                 */
-                if (idvs)
-                        count = ALIGN_POT(count, 4);
+      /* Index-Driven Vertex Shading requires different instances to
+       * have different cache lines for position results. Each vertex
+       * position is 16 bytes and the Mali cache line is 64 bytes, so
+       * the instance count must be aligned to 4 vertices.
+       */
+      if (idvs)
+         count = ALIGN_POT(count, 4);
 
-                ctx->padded_count = panfrost_padded_vertex_count(count);
-        } else
-                ctx->padded_count = vertex_count;
+      ctx->padded_count = panfrost_padded_vertex_count(count);
+   } else
+      ctx->padded_count = vertex_count;
 
-        panfrost_statistics_record(ctx, info, draw);
+   panfrost_statistics_record(ctx, info, draw);
 
 #if PAN_ARCH <= 7
-        struct mali_invocation_packed invocation;
-        if (info->instance_count > 1) {
-                panfrost_pack_work_groups_compute(&invocation,
-                                                  1, vertex_count, info->instance_count,
-                                                  1, 1, 1, true, false);
-        } else {
-                pan_pack(&invocation, INVOCATION, cfg) {
-                        cfg.invocations = MALI_POSITIVE(vertex_count);
-                        cfg.size_y_shift = 0;
-                        cfg.size_z_shift = 0;
-                        cfg.workgroups_x_shift = 0;
-                        cfg.workgroups_y_shift = 0;
-                        cfg.workgroups_z_shift = 32;
-                        cfg.thread_group_split = MALI_SPLIT_MIN_EFFICIENT;
-                }
-        }
+   struct mali_invocation_packed invocation;
+   if (info->instance_count > 1) {
+      panfrost_pack_work_groups_compute(&invocation, 1, vertex_count,
+                                        info->instance_count, 1, 1, 1, true,
+                                        false);
+   } else {
+      pan_pack(&invocation, INVOCATION, cfg) {
+         cfg.invocations = MALI_POSITIVE(vertex_count);
+         cfg.size_y_shift = 0;
+         cfg.size_z_shift = 0;
+         cfg.workgroups_x_shift = 0;
+         cfg.workgroups_y_shift = 0;
+         cfg.workgroups_z_shift = 32;
+         cfg.thread_group_split = MALI_SPLIT_MIN_EFFICIENT;
+      }
+   }
 
-        /* Emit all sort of descriptors. */
-        mali_ptr varyings = 0, vs_vary = 0, fs_vary = 0, pos = 0, psiz = 0;
+   /* Emit all sort of descriptors. */
+   mali_ptr varyings = 0, vs_vary = 0, fs_vary = 0, pos = 0, psiz = 0;
 
-        panfrost_emit_varying_descriptor(batch,
-                                         ctx->padded_count *
-                                         ctx->instance_count,
-                                         &vs_vary, &fs_vary, &varyings,
-                                         NULL, &pos, &psiz,
-                                         info->mode == PIPE_PRIM_POINTS);
+   panfrost_emit_varying_descriptor(
+      batch, ctx->padded_count * ctx->instance_count, &vs_vary, &fs_vary,
+      &varyings, NULL, &pos, &psiz, info->mode == PIPE_PRIM_POINTS);
 
-        mali_ptr attribs, attrib_bufs;
-        attribs = panfrost_emit_vertex_data(batch, &attrib_bufs);
+   mali_ptr attribs, attrib_bufs;
+   attribs = panfrost_emit_vertex_data(batch, &attrib_bufs);
 #endif
 
-        panfrost_update_state_3d(batch);
-        panfrost_update_shader_state(batch, PIPE_SHADER_VERTEX);
-        panfrost_update_shader_state(batch, PIPE_SHADER_FRAGMENT);
-        panfrost_clean_state_3d(ctx);
+   panfrost_update_state_3d(batch);
+   panfrost_update_shader_state(batch, PIPE_SHADER_VERTEX);
+   panfrost_update_shader_state(batch, PIPE_SHADER_FRAGMENT);
+   panfrost_clean_state_3d(ctx);
 
-        if (ctx->uncompiled[PIPE_SHADER_VERTEX]->xfb) {
+   if (ctx->uncompiled[PIPE_SHADER_VERTEX]->xfb) {
 #if PAN_ARCH >= 9
-                mali_ptr attribs = 0, attrib_bufs = 0;
+      mali_ptr attribs = 0, attrib_bufs = 0;
 #endif
-                panfrost_launch_xfb(batch, info, attribs, attrib_bufs, draw->count);
-        }
+      panfrost_launch_xfb(batch, info, attribs, attrib_bufs, draw->count);
+   }
 
-        /* Increment transform feedback offsets */
-        panfrost_update_streamout_offsets(ctx);
+   /* Increment transform feedback offsets */
+   panfrost_update_streamout_offsets(ctx);
 
-        /* Any side effects must be handled by the XFB shader, so we only need
-         * to run vertex shaders if we need rasterization.
-         */
-        if (panfrost_batch_skip_rasterization(batch))
-                return;
+   /* Any side effects must be handled by the XFB shader, so we only need
+    * to run vertex shaders if we need rasterization.
+    */
+   if (panfrost_batch_skip_rasterization(batch))
+      return;
 
 #if PAN_ARCH >= 9
-        assert(idvs && "Memory allocated IDVS required on Valhall");
+   assert(idvs && "Memory allocated IDVS required on Valhall");
 
-        panfrost_emit_malloc_vertex(batch, info, draw, indices,
-                                    secondary_shader, tiler.cpu);
+   panfrost_emit_malloc_vertex(batch, info, draw, indices, secondary_shader,
+                               tiler.cpu);
 
-        panfrost_add_job(&batch->pool.base, &batch->scoreboard,
-                         MALI_JOB_TYPE_MALLOC_VERTEX, false, false, 0,
-                         0, &tiler, false);
+   panfrost_add_job(&batch->pool.base, &batch->scoreboard,
+                    MALI_JOB_TYPE_MALLOC_VERTEX, false, false, 0, 0, &tiler,
+                    false);
 #else
-        /* Fire off the draw itself */
-        panfrost_draw_emit_tiler(batch, info, draw, &invocation, indices,
-                                 fs_vary, varyings, pos, psiz, secondary_shader,
-                                 tiler.cpu);
-        if (idvs) {
+   /* Fire off the draw itself */
+   panfrost_draw_emit_tiler(batch, info, draw, &invocation, indices, fs_vary,
+                            varyings, pos, psiz, secondary_shader, tiler.cpu);
+   if (idvs) {
 #if PAN_ARCH >= 6
-                panfrost_draw_emit_vertex_section(batch,
-                                  vs_vary, varyings,
-                                  attribs, attrib_bufs,
-                                  pan_section_ptr(tiler.cpu, INDEXED_VERTEX_JOB, VERTEX_DRAW));
+      panfrost_draw_emit_vertex_section(
+         batch, vs_vary, varyings, attribs, attrib_bufs,
+         pan_section_ptr(tiler.cpu, INDEXED_VERTEX_JOB, VERTEX_DRAW));
 
-                panfrost_add_job(&batch->pool.base, &batch->scoreboard,
-                                 MALI_JOB_TYPE_INDEXED_VERTEX, false, false,
-                                 0, 0, &tiler, false);
+      panfrost_add_job(&batch->pool.base, &batch->scoreboard,
+                       MALI_JOB_TYPE_INDEXED_VERTEX, false, false, 0, 0, &tiler,
+                       false);
 #endif
-        } else {
-                panfrost_draw_emit_vertex(batch, info, &invocation,
-                                          vs_vary, varyings, attribs, attrib_bufs, vertex.cpu);
-                panfrost_emit_vertex_tiler_jobs(batch, &vertex, &tiler);
-        }
+   } else {
+      panfrost_draw_emit_vertex(batch, info, &invocation, vs_vary, varyings,
+                                attribs, attrib_bufs, vertex.cpu);
+      panfrost_emit_vertex_tiler_jobs(batch, &vertex, &tiler);
+   }
 #endif
 }
 
 static bool
-panfrost_compatible_batch_state(struct panfrost_batch *batch,
-                                bool points)
+panfrost_compatible_batch_state(struct panfrost_batch *batch, bool points)
 {
-        /* Only applies on Valhall */
-        if (PAN_ARCH < 9)
-                return true;
+   /* Only applies on Valhall */
+   if (PAN_ARCH < 9)
+      return true;
 
-        struct panfrost_context *ctx = batch->ctx;
-        struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
+   struct panfrost_context *ctx = batch->ctx;
+   struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
 
-        bool coord = (rast->sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT);
-        bool first = rast->flatshade_first;
+   bool coord = (rast->sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT);
+   bool first = rast->flatshade_first;
 
-        /* gl_PointCoord orientation only matters when drawing points, but
-         * provoking vertex doesn't matter for points.
-         */
-        if (points)
-                return pan_tristate_set(&batch->sprite_coord_origin, coord);
-        else
-                return pan_tristate_set(&batch->first_provoking_vertex, first);
+   /* gl_PointCoord orientation only matters when drawing points, but
+    * provoking vertex doesn't matter for points.
+    */
+   if (points)
+      return pan_tristate_set(&batch->sprite_coord_origin, coord);
+   else
+      return pan_tristate_set(&batch->first_provoking_vertex, first);
 }
 
 static void
-panfrost_draw_vbo(struct pipe_context *pipe,
-                  const struct pipe_draw_info *info,
+panfrost_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info,
                   unsigned drawid_offset,
                   const struct pipe_draw_indirect_info *indirect,
                   const struct pipe_draw_start_count_bias *draws,
                   unsigned num_draws)
 {
-        struct panfrost_context *ctx = pan_context(pipe);
-        struct panfrost_device *dev = pan_device(pipe->screen);
+   struct panfrost_context *ctx = pan_context(pipe);
+   struct panfrost_device *dev = pan_device(pipe->screen);
 
-        if (!panfrost_render_condition_check(ctx))
-                return;
+   if (!panfrost_render_condition_check(ctx))
+      return;
 
-        ctx->draw_calls++;
+   ctx->draw_calls++;
 
-        /* Emulate indirect draws on JM */
-        if (indirect && indirect->buffer) {
-                assert(num_draws == 1);
-                util_draw_indirect(pipe, info, indirect);
-                perf_debug(dev, "Emulating indirect draw on the CPU");
-                return;
-        }
+   /* Emulate indirect draws on JM */
+   if (indirect && indirect->buffer) {
+      assert(num_draws == 1);
+      util_draw_indirect(pipe, info, indirect);
+      perf_debug(dev, "Emulating indirect draw on the CPU");
+      return;
+   }
 
-        /* Do some common setup */
-        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
+   /* Do some common setup */
+   struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 
-        /* Don't add too many jobs to a single batch. Hardware has a hard limit
-         * of 65536 jobs, but we choose a smaller soft limit (arbitrary) to
-         * avoid the risk of timeouts. This might not be a good idea. */
-        if (unlikely(batch->scoreboard.job_index > 10000))
-                batch = panfrost_get_fresh_batch_for_fbo(ctx, "Too many draws");
+   /* Don't add too many jobs to a single batch. Hardware has a hard limit
+    * of 65536 jobs, but we choose a smaller soft limit (arbitrary) to
+    * avoid the risk of timeouts. This might not be a good idea. */
+   if (unlikely(batch->scoreboard.job_index > 10000))
+      batch = panfrost_get_fresh_batch_for_fbo(ctx, "Too many draws");
 
-        bool points = (info->mode == PIPE_PRIM_POINTS);
+   bool points = (info->mode == PIPE_PRIM_POINTS);
 
-        if (unlikely(!panfrost_compatible_batch_state(batch, points))) {
-                batch = panfrost_get_fresh_batch_for_fbo(ctx, "State change");
+   if (unlikely(!panfrost_compatible_batch_state(batch, points))) {
+      batch = panfrost_get_fresh_batch_for_fbo(ctx, "State change");
 
-                ASSERTED bool succ = panfrost_compatible_batch_state(batch, points);
-                assert(succ && "must be able to set state for a fresh batch");
-        }
+      ASSERTED bool succ = panfrost_compatible_batch_state(batch, points);
+      assert(succ && "must be able to set state for a fresh batch");
+   }
 
-        /* panfrost_batch_skip_rasterization reads
-         * batch->scissor_culls_everything, which is set by
-         * panfrost_emit_viewport, so call that first.
-         */
-        if (ctx->dirty & (PAN_DIRTY_VIEWPORT | PAN_DIRTY_SCISSOR))
-                batch->viewport = panfrost_emit_viewport(batch);
+   /* panfrost_batch_skip_rasterization reads
+    * batch->scissor_culls_everything, which is set by
+    * panfrost_emit_viewport, so call that first.
+    */
+   if (ctx->dirty & (PAN_DIRTY_VIEWPORT | PAN_DIRTY_SCISSOR))
+      batch->viewport = panfrost_emit_viewport(batch);
 
-        /* Mark everything dirty when debugging */
-        if (unlikely(dev->debug & PAN_DBG_DIRTY))
-                panfrost_dirty_state_all(ctx);
+   /* Mark everything dirty when debugging */
+   if (unlikely(dev->debug & PAN_DBG_DIRTY))
+      panfrost_dirty_state_all(ctx);
 
-        /* Conservatively assume draw parameters always change */
-        ctx->dirty |= PAN_DIRTY_PARAMS | PAN_DIRTY_DRAWID;
+   /* Conservatively assume draw parameters always change */
+   ctx->dirty |= PAN_DIRTY_PARAMS | PAN_DIRTY_DRAWID;
 
-        struct pipe_draw_info tmp_info = *info;
-        unsigned drawid = drawid_offset;
+   struct pipe_draw_info tmp_info = *info;
+   unsigned drawid = drawid_offset;
 
-        for (unsigned i = 0; i < num_draws; i++) {
-                panfrost_direct_draw(batch, &tmp_info, drawid, &draws[i]);
-
-                if (tmp_info.increment_draw_id) {
-                        ctx->dirty |= PAN_DIRTY_DRAWID;
-                        drawid++;
-                }
-        }
+   for (unsigned i = 0; i < num_draws; i++) {
+      panfrost_direct_draw(batch, &tmp_info, drawid, &draws[i]);
 
+      if (tmp_info.increment_draw_id) {
+         ctx->dirty |= PAN_DIRTY_DRAWID;
+         drawid++;
+      }
+   }
 }
 
 /* Launch grid is the compute equivalent of draw_vbo, so in this routine, we
@@ -3838,162 +3753,156 @@ panfrost_draw_vbo(struct pipe_context *pipe,
 
 static void
 panfrost_launch_grid(struct pipe_context *pipe,
-                const struct pipe_grid_info *info)
+                     const struct pipe_grid_info *info)
 {
-        struct panfrost_context *ctx = pan_context(pipe);
+   struct panfrost_context *ctx = pan_context(pipe);
 
-        /* XXX - shouldn't be necessary with working memory barriers. Affected
-         * test: KHR-GLES31.core.compute_shader.pipeline-post-xfb */
-        panfrost_flush_all_batches(ctx, "Launch grid pre-barrier");
+   /* XXX - shouldn't be necessary with working memory barriers. Affected
+    * test: KHR-GLES31.core.compute_shader.pipeline-post-xfb */
+   panfrost_flush_all_batches(ctx, "Launch grid pre-barrier");
 
-        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
+   struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 
-        if (info->indirect && !PAN_GPU_INDIRECTS) {
-                struct pipe_transfer *transfer;
-                uint32_t *params = pipe_buffer_map_range(pipe, info->indirect,
-                                info->indirect_offset,
-                                3 * sizeof(uint32_t),
-                                PIPE_MAP_READ,
-                                &transfer);
+   if (info->indirect && !PAN_GPU_INDIRECTS) {
+      struct pipe_transfer *transfer;
+      uint32_t *params =
+         pipe_buffer_map_range(pipe, info->indirect, info->indirect_offset,
+                               3 * sizeof(uint32_t), PIPE_MAP_READ, &transfer);
 
-                struct pipe_grid_info direct = *info;
-                direct.indirect = NULL;
-                direct.grid[0] = params[0];
-                direct.grid[1] = params[1];
-                direct.grid[2] = params[2];
-                pipe_buffer_unmap(pipe, transfer);
+      struct pipe_grid_info direct = *info;
+      direct.indirect = NULL;
+      direct.grid[0] = params[0];
+      direct.grid[1] = params[1];
+      direct.grid[2] = params[2];
+      pipe_buffer_unmap(pipe, transfer);
 
-                if (params[0] && params[1] && params[2])
-                        panfrost_launch_grid(pipe, &direct);
+      if (params[0] && params[1] && params[2])
+         panfrost_launch_grid(pipe, &direct);
 
-                return;
-        }
+      return;
+   }
 
-        ctx->compute_grid = info;
+   ctx->compute_grid = info;
 
-        struct panfrost_ptr t =
-                pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB);
+   struct panfrost_ptr t = pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB);
 
-        /* Invoke according to the grid info */
+   /* Invoke according to the grid info */
 
-        unsigned num_wg[3] = { info->grid[0], info->grid[1], info->grid[2] };
+   unsigned num_wg[3] = {info->grid[0], info->grid[1], info->grid[2]};
 
-        if (info->indirect)
-                num_wg[0] = num_wg[1] = num_wg[2] = 1;
+   if (info->indirect)
+      num_wg[0] = num_wg[1] = num_wg[2] = 1;
 
-        /* Conservatively assume workgroup size changes every launch */
-        ctx->dirty |= PAN_DIRTY_PARAMS;
+   /* Conservatively assume workgroup size changes every launch */
+   ctx->dirty |= PAN_DIRTY_PARAMS;
 
-        panfrost_update_shader_state(batch, PIPE_SHADER_COMPUTE);
+   panfrost_update_shader_state(batch, PIPE_SHADER_COMPUTE);
 
 #if PAN_ARCH <= 7
-        panfrost_pack_work_groups_compute(pan_section_ptr(t.cpu, COMPUTE_JOB, INVOCATION),
-                                          num_wg[0], num_wg[1], num_wg[2],
-                                          info->block[0], info->block[1],
-                                          info->block[2],
-                                          false, info->indirect != NULL);
+   panfrost_pack_work_groups_compute(
+      pan_section_ptr(t.cpu, COMPUTE_JOB, INVOCATION), num_wg[0], num_wg[1],
+      num_wg[2], info->block[0], info->block[1], info->block[2], false,
+      info->indirect != NULL);
 
-        pan_section_pack(t.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
-                cfg.job_task_split =
-                        util_logbase2_ceil(info->block[0] + 1) +
-                        util_logbase2_ceil(info->block[1] + 1) +
-                        util_logbase2_ceil(info->block[2] + 1);
-        }
+   pan_section_pack(t.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
+      cfg.job_task_split = util_logbase2_ceil(info->block[0] + 1) +
+                           util_logbase2_ceil(info->block[1] + 1) +
+                           util_logbase2_ceil(info->block[2] + 1);
+   }
 
-        pan_section_pack(t.cpu, COMPUTE_JOB, DRAW, cfg) {
-                cfg.state = batch->rsd[PIPE_SHADER_COMPUTE];
-                cfg.attributes = panfrost_emit_image_attribs(batch, &cfg.attribute_buffers, PIPE_SHADER_COMPUTE);
-                cfg.thread_storage = panfrost_emit_shared_memory(batch, info);
-                cfg.uniform_buffers = batch->uniform_buffers[PIPE_SHADER_COMPUTE];
-                cfg.push_uniforms = batch->push_uniforms[PIPE_SHADER_COMPUTE];
-                cfg.textures = batch->textures[PIPE_SHADER_COMPUTE];
-                cfg.samplers = batch->samplers[PIPE_SHADER_COMPUTE];
-        }
+   pan_section_pack(t.cpu, COMPUTE_JOB, DRAW, cfg) {
+      cfg.state = batch->rsd[PIPE_SHADER_COMPUTE];
+      cfg.attributes = panfrost_emit_image_attribs(
+         batch, &cfg.attribute_buffers, PIPE_SHADER_COMPUTE);
+      cfg.thread_storage = panfrost_emit_shared_memory(batch, info);
+      cfg.uniform_buffers = batch->uniform_buffers[PIPE_SHADER_COMPUTE];
+      cfg.push_uniforms = batch->push_uniforms[PIPE_SHADER_COMPUTE];
+      cfg.textures = batch->textures[PIPE_SHADER_COMPUTE];
+      cfg.samplers = batch->samplers[PIPE_SHADER_COMPUTE];
+   }
 #else
-        struct panfrost_compiled_shader *cs = ctx->prog[PIPE_SHADER_COMPUTE];
+   struct panfrost_compiled_shader *cs = ctx->prog[PIPE_SHADER_COMPUTE];
 
-        pan_section_pack(t.cpu, COMPUTE_JOB, PAYLOAD, cfg) {
-                cfg.workgroup_size_x = info->block[0];
-                cfg.workgroup_size_y = info->block[1];
-                cfg.workgroup_size_z = info->block[2];
+   pan_section_pack(t.cpu, COMPUTE_JOB, PAYLOAD, cfg) {
+      cfg.workgroup_size_x = info->block[0];
+      cfg.workgroup_size_y = info->block[1];
+      cfg.workgroup_size_z = info->block[2];
 
-                cfg.workgroup_count_x = num_wg[0];
-                cfg.workgroup_count_y = num_wg[1];
-                cfg.workgroup_count_z = num_wg[2];
+      cfg.workgroup_count_x = num_wg[0];
+      cfg.workgroup_count_y = num_wg[1];
+      cfg.workgroup_count_z = num_wg[2];
 
-                panfrost_emit_shader(batch, &cfg.compute, PIPE_SHADER_COMPUTE,
-                                     batch->rsd[PIPE_SHADER_COMPUTE],
-                                     panfrost_emit_shared_memory(batch, info));
+      panfrost_emit_shader(batch, &cfg.compute, PIPE_SHADER_COMPUTE,
+                           batch->rsd[PIPE_SHADER_COMPUTE],
+                           panfrost_emit_shared_memory(batch, info));
 
-                /* Workgroups may be merged if the shader does not use barriers
-                 * or shared memory. This condition is checked against the
-                 * static shared_size at compile-time. We need to check the
-                 * variable shared size at launch_grid time, because the
-                 * compiler doesn't know about that.
-                 */
-                cfg.allow_merging_workgroups =
-                        cs->info.cs.allow_merging_workgroups &&
-                        (info->variable_shared_mem == 0);
+      /* Workgroups may be merged if the shader does not use barriers
+       * or shared memory. This condition is checked against the
+       * static shared_size at compile-time. We need to check the
+       * variable shared size at launch_grid time, because the
+       * compiler doesn't know about that.
+       */
+      cfg.allow_merging_workgroups = cs->info.cs.allow_merging_workgroups &&
+                                     (info->variable_shared_mem == 0);
 
-                cfg.task_increment = 1;
-                cfg.task_axis = MALI_TASK_AXIS_Z;
-        }
+      cfg.task_increment = 1;
+      cfg.task_axis = MALI_TASK_AXIS_Z;
+   }
 #endif
 
-        unsigned indirect_dep = 0;
+   unsigned indirect_dep = 0;
 #if PAN_GPU_INDIRECTS
-        if (info->indirect) {
-                struct pan_indirect_dispatch_info indirect = {
-                        .job = t.gpu,
-                        .indirect_dim = pan_resource(info->indirect)->image.data.bo->ptr.gpu +
-                                        info->indirect_offset,
-                        .num_wg_sysval = {
-                                batch->num_wg_sysval[0],
-                                batch->num_wg_sysval[1],
-                                batch->num_wg_sysval[2],
-                        },
-                };
+   if (info->indirect) {
+      struct pan_indirect_dispatch_info indirect = {
+         .job = t.gpu,
+         .indirect_dim = pan_resource(info->indirect)->image.data.bo->ptr.gpu +
+                         info->indirect_offset,
+         .num_wg_sysval =
+            {
+               batch->num_wg_sysval[0],
+               batch->num_wg_sysval[1],
+               batch->num_wg_sysval[2],
+            },
+      };
 
-                indirect_dep = GENX(pan_indirect_dispatch_emit)(&batch->pool.base,
-                                                                &batch->scoreboard,
-                                                                &indirect);
-        }
+      indirect_dep = GENX(pan_indirect_dispatch_emit)(
+         &batch->pool.base, &batch->scoreboard, &indirect);
+   }
 #endif
 
-        panfrost_add_job(&batch->pool.base, &batch->scoreboard,
-                         MALI_JOB_TYPE_COMPUTE, true, false,
-                         indirect_dep, 0, &t, false);
-        panfrost_flush_all_batches(ctx, "Launch grid post-barrier");
+   panfrost_add_job(&batch->pool.base, &batch->scoreboard,
+                    MALI_JOB_TYPE_COMPUTE, true, false, indirect_dep, 0, &t,
+                    false);
+   panfrost_flush_all_batches(ctx, "Launch grid post-barrier");
 }
 
 static void *
-panfrost_create_rasterizer_state(
-        struct pipe_context *pctx,
-        const struct pipe_rasterizer_state *cso)
+panfrost_create_rasterizer_state(struct pipe_context *pctx,
+                                 const struct pipe_rasterizer_state *cso)
 {
-        struct panfrost_rasterizer *so = CALLOC_STRUCT(panfrost_rasterizer);
+   struct panfrost_rasterizer *so = CALLOC_STRUCT(panfrost_rasterizer);
 
-        so->base = *cso;
+   so->base = *cso;
 
-        /* Gauranteed with the core GL call, so don't expose ARB_polygon_offset */
-        assert(cso->offset_clamp == 0.0);
+   /* Gauranteed with the core GL call, so don't expose ARB_polygon_offset */
+   assert(cso->offset_clamp == 0.0);
 
 #if PAN_ARCH <= 7
-        pan_pack(&so->multisample, MULTISAMPLE_MISC, cfg) {
-                cfg.multisample_enable = cso->multisample;
-                cfg.fixed_function_near_discard = cso->depth_clip_near;
-                cfg.fixed_function_far_discard = cso->depth_clip_far;
-                cfg.shader_depth_range_fixed = true;
-        }
+   pan_pack(&so->multisample, MULTISAMPLE_MISC, cfg) {
+      cfg.multisample_enable = cso->multisample;
+      cfg.fixed_function_near_discard = cso->depth_clip_near;
+      cfg.fixed_function_far_discard = cso->depth_clip_far;
+      cfg.shader_depth_range_fixed = true;
+   }
 
-        pan_pack(&so->stencil_misc, STENCIL_MASK_MISC, cfg) {
-                cfg.front_facing_depth_bias = cso->offset_tri;
-                cfg.back_facing_depth_bias = cso->offset_tri;
-                cfg.single_sampled_lines = !cso->multisample;
-        }
+   pan_pack(&so->stencil_misc, STENCIL_MASK_MISC, cfg) {
+      cfg.front_facing_depth_bias = cso->offset_tri;
+      cfg.back_facing_depth_bias = cso->offset_tri;
+      cfg.single_sampled_lines = !cso->multisample;
+   }
 #endif
 
-        return so;
+   return so;
 }
 
 #if PAN_ARCH >= 9
@@ -4008,90 +3917,96 @@ panfrost_pack_attribute(struct panfrost_device *dev,
                         const struct pipe_vertex_element el,
                         struct mali_attribute_packed *out)
 {
-        pan_pack(out, ATTRIBUTE, cfg) {
-                cfg.table = PAN_TABLE_ATTRIBUTE_BUFFER;
-                cfg.frequency = (el.instance_divisor > 0) ?
-                        MALI_ATTRIBUTE_FREQUENCY_INSTANCE :
-                        MALI_ATTRIBUTE_FREQUENCY_VERTEX;
-                cfg.format = dev->formats[el.src_format].hw;
-                cfg.offset = el.src_offset;
-                cfg.buffer_index = el.vertex_buffer_index;
+   pan_pack(out, ATTRIBUTE, cfg) {
+      cfg.table = PAN_TABLE_ATTRIBUTE_BUFFER;
+      cfg.frequency = (el.instance_divisor > 0)
+                         ? MALI_ATTRIBUTE_FREQUENCY_INSTANCE
+                         : MALI_ATTRIBUTE_FREQUENCY_VERTEX;
+      cfg.format = dev->formats[el.src_format].hw;
+      cfg.offset = el.src_offset;
+      cfg.buffer_index = el.vertex_buffer_index;
 
-                if (el.instance_divisor == 0) {
-                        /* Per-vertex */
-                        cfg.attribute_type = MALI_ATTRIBUTE_TYPE_1D;
-                        cfg.frequency = MALI_ATTRIBUTE_FREQUENCY_VERTEX;
-                        cfg.offset_enable = true;
-                } else if (util_is_power_of_two_or_zero(el.instance_divisor)) {
-                        /* Per-instance, POT divisor */
-                        cfg.attribute_type = MALI_ATTRIBUTE_TYPE_1D_POT_DIVISOR;
-                        cfg.frequency = MALI_ATTRIBUTE_FREQUENCY_INSTANCE;
-                        cfg.divisor_r = __builtin_ctz(el.instance_divisor);
-                } else {
-                        /* Per-instance, NPOT divisor */
-                        cfg.attribute_type = MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR;
-                        cfg.frequency = MALI_ATTRIBUTE_FREQUENCY_INSTANCE;
+      if (el.instance_divisor == 0) {
+         /* Per-vertex */
+         cfg.attribute_type = MALI_ATTRIBUTE_TYPE_1D;
+         cfg.frequency = MALI_ATTRIBUTE_FREQUENCY_VERTEX;
+         cfg.offset_enable = true;
+      } else if (util_is_power_of_two_or_zero(el.instance_divisor)) {
+         /* Per-instance, POT divisor */
+         cfg.attribute_type = MALI_ATTRIBUTE_TYPE_1D_POT_DIVISOR;
+         cfg.frequency = MALI_ATTRIBUTE_FREQUENCY_INSTANCE;
+         cfg.divisor_r = __builtin_ctz(el.instance_divisor);
+      } else {
+         /* Per-instance, NPOT divisor */
+         cfg.attribute_type = MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR;
+         cfg.frequency = MALI_ATTRIBUTE_FREQUENCY_INSTANCE;
 
-                        cfg.divisor_d =
-                                panfrost_compute_magic_divisor(el.instance_divisor,
-                                                &cfg.divisor_r, &cfg.divisor_e);
-                }
-        }
+         cfg.divisor_d = panfrost_compute_magic_divisor(
+            el.instance_divisor, &cfg.divisor_r, &cfg.divisor_e);
+      }
+   }
 }
 #endif
 
 static void *
-panfrost_create_vertex_elements_state(
-        struct pipe_context *pctx,
-        unsigned num_elements,
-        const struct pipe_vertex_element *elements)
+panfrost_create_vertex_elements_state(struct pipe_context *pctx,
+                                      unsigned num_elements,
+                                      const struct pipe_vertex_element *elements)
 {
-        struct panfrost_vertex_state *so = CALLOC_STRUCT(panfrost_vertex_state);
-        struct panfrost_device *dev = pan_device(pctx->screen);
+   struct panfrost_vertex_state *so = CALLOC_STRUCT(panfrost_vertex_state);
+   struct panfrost_device *dev = pan_device(pctx->screen);
 
-        so->num_elements = num_elements;
-        memcpy(so->pipe, elements, sizeof(*elements) * num_elements);
+   so->num_elements = num_elements;
+   memcpy(so->pipe, elements, sizeof(*elements) * num_elements);
 
 #if PAN_ARCH >= 9
-        for (unsigned i = 0; i < num_elements; ++i)
-                panfrost_pack_attribute(dev, elements[i], &so->attributes[i]);
+   for (unsigned i = 0; i < num_elements; ++i)
+      panfrost_pack_attribute(dev, elements[i], &so->attributes[i]);
 #else
-        /* Assign attribute buffers corresponding to the vertex buffers, keyed
-         * for a particular divisor since that's how instancing works on Mali */
-        for (unsigned i = 0; i < num_elements; ++i) {
-                so->element_buffer[i] = pan_assign_vertex_buffer(
-                                so->buffers, &so->nr_bufs,
-                                elements[i].vertex_buffer_index,
-                                elements[i].instance_divisor);
-        }
+   /* Assign attribute buffers corresponding to the vertex buffers, keyed
+    * for a particular divisor since that's how instancing works on Mali */
+   for (unsigned i = 0; i < num_elements; ++i) {
+      so->element_buffer[i] = pan_assign_vertex_buffer(
+         so->buffers, &so->nr_bufs, elements[i].vertex_buffer_index,
+         elements[i].instance_divisor);
+   }
 
-        for (int i = 0; i < num_elements; ++i) {
-                enum pipe_format fmt = elements[i].src_format;
-                so->formats[i] = dev->formats[fmt].hw;
-        }
+   for (int i = 0; i < num_elements; ++i) {
+      enum pipe_format fmt = elements[i].src_format;
+      so->formats[i] = dev->formats[fmt].hw;
+   }
 
-        /* Let's also prepare vertex builtins */
-        so->formats[PAN_VERTEX_ID] = dev->formats[PIPE_FORMAT_R32_UINT].hw;
-        so->formats[PAN_INSTANCE_ID] = dev->formats[PIPE_FORMAT_R32_UINT].hw;
+   /* Let's also prepare vertex builtins */
+   so->formats[PAN_VERTEX_ID] = dev->formats[PIPE_FORMAT_R32_UINT].hw;
+   so->formats[PAN_INSTANCE_ID] = dev->formats[PIPE_FORMAT_R32_UINT].hw;
 #endif
 
-        return so;
+   return so;
 }
 
 static inline unsigned
 pan_pipe_to_stencil_op(enum pipe_stencil_op in)
 {
-        switch (in) {
-        case PIPE_STENCIL_OP_KEEP: return MALI_STENCIL_OP_KEEP;
-        case PIPE_STENCIL_OP_ZERO: return MALI_STENCIL_OP_ZERO;
-        case PIPE_STENCIL_OP_REPLACE: return MALI_STENCIL_OP_REPLACE;
-        case PIPE_STENCIL_OP_INCR: return MALI_STENCIL_OP_INCR_SAT;
-        case PIPE_STENCIL_OP_DECR: return MALI_STENCIL_OP_DECR_SAT;
-        case PIPE_STENCIL_OP_INCR_WRAP: return MALI_STENCIL_OP_INCR_WRAP;
-        case PIPE_STENCIL_OP_DECR_WRAP: return MALI_STENCIL_OP_DECR_WRAP;
-        case PIPE_STENCIL_OP_INVERT: return MALI_STENCIL_OP_INVERT;
-        default: unreachable("Invalid stencil op");
-        }
+   switch (in) {
+   case PIPE_STENCIL_OP_KEEP:
+      return MALI_STENCIL_OP_KEEP;
+   case PIPE_STENCIL_OP_ZERO:
+      return MALI_STENCIL_OP_ZERO;
+   case PIPE_STENCIL_OP_REPLACE:
+      return MALI_STENCIL_OP_REPLACE;
+   case PIPE_STENCIL_OP_INCR:
+      return MALI_STENCIL_OP_INCR_SAT;
+   case PIPE_STENCIL_OP_DECR:
+      return MALI_STENCIL_OP_DECR_SAT;
+   case PIPE_STENCIL_OP_INCR_WRAP:
+      return MALI_STENCIL_OP_INCR_WRAP;
+   case PIPE_STENCIL_OP_DECR_WRAP:
+      return MALI_STENCIL_OP_DECR_WRAP;
+   case PIPE_STENCIL_OP_INVERT:
+      return MALI_STENCIL_OP_INVERT;
+   default:
+      unreachable("Invalid stencil op");
+   }
 }
 
 #if PAN_ARCH <= 7
@@ -4099,127 +4014,126 @@ static inline void
 pan_pipe_to_stencil(const struct pipe_stencil_state *in,
                     struct mali_stencil_packed *out)
 {
-        pan_pack(out, STENCIL, s) {
-                s.mask = in->valuemask;
-                s.compare_function = (enum mali_func) in->func;
-                s.stencil_fail = pan_pipe_to_stencil_op(in->fail_op);
-                s.depth_fail = pan_pipe_to_stencil_op(in->zfail_op);
-                s.depth_pass = pan_pipe_to_stencil_op(in->zpass_op);
-        }
+   pan_pack(out, STENCIL, s) {
+      s.mask = in->valuemask;
+      s.compare_function = (enum mali_func)in->func;
+      s.stencil_fail = pan_pipe_to_stencil_op(in->fail_op);
+      s.depth_fail = pan_pipe_to_stencil_op(in->zfail_op);
+      s.depth_pass = pan_pipe_to_stencil_op(in->zpass_op);
+   }
 }
 #endif
 
 static bool
 pipe_zs_always_passes(const struct pipe_depth_stencil_alpha_state *zsa)
 {
-        if (zsa->depth_enabled && zsa->depth_func != PIPE_FUNC_ALWAYS)
-                return false;
+   if (zsa->depth_enabled && zsa->depth_func != PIPE_FUNC_ALWAYS)
+      return false;
 
-        if (zsa->stencil[0].enabled && zsa->stencil[0].func != PIPE_FUNC_ALWAYS)
-                return false;
+   if (zsa->stencil[0].enabled && zsa->stencil[0].func != PIPE_FUNC_ALWAYS)
+      return false;
 
-        if (zsa->stencil[1].enabled && zsa->stencil[1].func != PIPE_FUNC_ALWAYS)
-                return false;
+   if (zsa->stencil[1].enabled && zsa->stencil[1].func != PIPE_FUNC_ALWAYS)
+      return false;
 
-        return true;
+   return true;
 }
 
 static void *
-panfrost_create_depth_stencil_state(struct pipe_context *pipe,
-                                    const struct pipe_depth_stencil_alpha_state *zsa)
+panfrost_create_depth_stencil_state(
+   struct pipe_context *pipe, const struct pipe_depth_stencil_alpha_state *zsa)
 {
-        struct panfrost_zsa_state *so = CALLOC_STRUCT(panfrost_zsa_state);
-        so->base = *zsa;
+   struct panfrost_zsa_state *so = CALLOC_STRUCT(panfrost_zsa_state);
+   so->base = *zsa;
 
-        const struct pipe_stencil_state front = zsa->stencil[0];
-        const struct pipe_stencil_state back =
-                zsa->stencil[1].enabled ? zsa->stencil[1] : front;
+   const struct pipe_stencil_state front = zsa->stencil[0];
+   const struct pipe_stencil_state back =
+      zsa->stencil[1].enabled ? zsa->stencil[1] : front;
 
-        enum mali_func depth_func = zsa->depth_enabled ?
-                (enum mali_func) zsa->depth_func : MALI_FUNC_ALWAYS;
+   enum mali_func depth_func =
+      zsa->depth_enabled ? (enum mali_func)zsa->depth_func : MALI_FUNC_ALWAYS;
 
-        /* Normalize (there's no separate enable) */
-        if (PAN_ARCH <= 5 && !zsa->alpha_enabled)
-                so->base.alpha_func = MALI_FUNC_ALWAYS;
+   /* Normalize (there's no separate enable) */
+   if (PAN_ARCH <= 5 && !zsa->alpha_enabled)
+      so->base.alpha_func = MALI_FUNC_ALWAYS;
 
 #if PAN_ARCH <= 7
-        /* Prepack relevant parts of the Renderer State Descriptor. They will
-         * be ORed in at draw-time */
-        pan_pack(&so->rsd_depth, MULTISAMPLE_MISC, cfg) {
-                cfg.depth_function = depth_func;
-                cfg.depth_write_mask = zsa->depth_writemask;
-        }
+   /* Prepack relevant parts of the Renderer State Descriptor. They will
+    * be ORed in at draw-time */
+   pan_pack(&so->rsd_depth, MULTISAMPLE_MISC, cfg) {
+      cfg.depth_function = depth_func;
+      cfg.depth_write_mask = zsa->depth_writemask;
+   }
 
-        pan_pack(&so->rsd_stencil, STENCIL_MASK_MISC, cfg) {
-                cfg.stencil_enable = front.enabled;
-                cfg.stencil_mask_front = front.writemask;
-                cfg.stencil_mask_back = back.writemask;
+   pan_pack(&so->rsd_stencil, STENCIL_MASK_MISC, cfg) {
+      cfg.stencil_enable = front.enabled;
+      cfg.stencil_mask_front = front.writemask;
+      cfg.stencil_mask_back = back.writemask;
 
 #if PAN_ARCH <= 5
-                cfg.alpha_test_compare_function =
-                        (enum mali_func) so->base.alpha_func;
+      cfg.alpha_test_compare_function = (enum mali_func)so->base.alpha_func;
 #endif
-        }
+   }
 
-        /* Stencil tests have their own words in the RSD */
-        pan_pipe_to_stencil(&front, &so->stencil_front);
-        pan_pipe_to_stencil(&back, &so->stencil_back);
+   /* Stencil tests have their own words in the RSD */
+   pan_pipe_to_stencil(&front, &so->stencil_front);
+   pan_pipe_to_stencil(&back, &so->stencil_back);
 #else
-        pan_pack(&so->desc, DEPTH_STENCIL, cfg) {
-                cfg.front_compare_function = (enum mali_func) front.func;
-                cfg.front_stencil_fail = pan_pipe_to_stencil_op(front.fail_op);
-                cfg.front_depth_fail = pan_pipe_to_stencil_op(front.zfail_op);
-                cfg.front_depth_pass = pan_pipe_to_stencil_op(front.zpass_op);
+   pan_pack(&so->desc, DEPTH_STENCIL, cfg) {
+      cfg.front_compare_function = (enum mali_func)front.func;
+      cfg.front_stencil_fail = pan_pipe_to_stencil_op(front.fail_op);
+      cfg.front_depth_fail = pan_pipe_to_stencil_op(front.zfail_op);
+      cfg.front_depth_pass = pan_pipe_to_stencil_op(front.zpass_op);
 
-                cfg.back_compare_function = (enum mali_func) back.func;
-                cfg.back_stencil_fail = pan_pipe_to_stencil_op(back.fail_op);
-                cfg.back_depth_fail = pan_pipe_to_stencil_op(back.zfail_op);
-                cfg.back_depth_pass = pan_pipe_to_stencil_op(back.zpass_op);
+      cfg.back_compare_function = (enum mali_func)back.func;
+      cfg.back_stencil_fail = pan_pipe_to_stencil_op(back.fail_op);
+      cfg.back_depth_fail = pan_pipe_to_stencil_op(back.zfail_op);
+      cfg.back_depth_pass = pan_pipe_to_stencil_op(back.zpass_op);
 
-                cfg.stencil_test_enable = front.enabled;
-                cfg.front_write_mask = front.writemask;
-                cfg.back_write_mask = back.writemask;
-                cfg.front_value_mask = front.valuemask;
-                cfg.back_value_mask = back.valuemask;
+      cfg.stencil_test_enable = front.enabled;
+      cfg.front_write_mask = front.writemask;
+      cfg.back_write_mask = back.writemask;
+      cfg.front_value_mask = front.valuemask;
+      cfg.back_value_mask = back.valuemask;
 
-                cfg.depth_write_enable = zsa->depth_writemask;
-                cfg.depth_function = depth_func;
-        }
+      cfg.depth_write_enable = zsa->depth_writemask;
+      cfg.depth_function = depth_func;
+   }
 #endif
 
-        so->enabled = zsa->stencil[0].enabled ||
-                (zsa->depth_enabled && zsa->depth_func != PIPE_FUNC_ALWAYS);
+   so->enabled = zsa->stencil[0].enabled ||
+                 (zsa->depth_enabled && zsa->depth_func != PIPE_FUNC_ALWAYS);
 
-        so->zs_always_passes = pipe_zs_always_passes(zsa);
-        so->writes_zs = util_writes_depth_stencil(zsa);
+   so->zs_always_passes = pipe_zs_always_passes(zsa);
+   so->writes_zs = util_writes_depth_stencil(zsa);
 
-        /* TODO: Bounds test should be easy */
-        assert(!zsa->depth_bounds_test);
+   /* TODO: Bounds test should be easy */
+   assert(!zsa->depth_bounds_test);
 
-        return so;
+   return so;
 }
 
 static struct pipe_sampler_view *
-panfrost_create_sampler_view(
-        struct pipe_context *pctx,
-        struct pipe_resource *texture,
-        const struct pipe_sampler_view *template)
+panfrost_create_sampler_view(struct pipe_context *pctx,
+                             struct pipe_resource *texture,
+                             const struct pipe_sampler_view *template)
 {
-        struct panfrost_context *ctx = pan_context(pctx);
-        struct panfrost_sampler_view *so = rzalloc(pctx, struct panfrost_sampler_view);
+   struct panfrost_context *ctx = pan_context(pctx);
+   struct panfrost_sampler_view *so =
+      rzalloc(pctx, struct panfrost_sampler_view);
 
-        pan_legalize_afbc_format(ctx, pan_resource(texture), template->format);
+   pan_legalize_afbc_format(ctx, pan_resource(texture), template->format);
 
-        pipe_reference(NULL, &texture->reference);
+   pipe_reference(NULL, &texture->reference);
 
-        so->base = *template;
-        so->base.texture = texture;
-        so->base.reference.count = 1;
-        so->base.context = pctx;
+   so->base = *template;
+   so->base.texture = texture;
+   so->base.reference.count = 1;
+   so->base.context = pctx;
 
-        panfrost_create_sampler_view_bo(so, pctx, texture);
+   panfrost_create_sampler_view_bo(so, pctx, texture);
 
-        return (struct pipe_sampler_view *) so;
+   return (struct pipe_sampler_view *)so;
 }
 
 /* A given Gallium blend state can be encoded to the hardware in numerous,
@@ -4254,255 +4168,257 @@ static void *
 panfrost_create_blend_state(struct pipe_context *pipe,
                             const struct pipe_blend_state *blend)
 {
-        struct panfrost_blend_state *so = CALLOC_STRUCT(panfrost_blend_state);
-        so->base = *blend;
+   struct panfrost_blend_state *so = CALLOC_STRUCT(panfrost_blend_state);
+   so->base = *blend;
 
-        so->pan.logicop_enable = blend->logicop_enable;
-        so->pan.logicop_func = blend->logicop_func;
-        so->pan.rt_count = blend->max_rt + 1;
+   so->pan.logicop_enable = blend->logicop_enable;
+   so->pan.logicop_func = blend->logicop_func;
+   so->pan.rt_count = blend->max_rt + 1;
 
-        for (unsigned c = 0; c < so->pan.rt_count; ++c) {
-                unsigned g = blend->independent_blend_enable ? c : 0;
-                const struct pipe_rt_blend_state pipe = blend->rt[g];
-                struct pan_blend_equation equation = {0};
+   for (unsigned c = 0; c < so->pan.rt_count; ++c) {
+      unsigned g = blend->independent_blend_enable ? c : 0;
+      const struct pipe_rt_blend_state pipe = blend->rt[g];
+      struct pan_blend_equation equation = {0};
 
-                equation.color_mask = pipe.colormask;
-                equation.blend_enable = pipe.blend_enable;
+      equation.color_mask = pipe.colormask;
+      equation.blend_enable = pipe.blend_enable;
 
-                if (pipe.blend_enable) {
-                        equation.rgb_func = util_blend_func_to_shader(pipe.rgb_func);
-                        equation.rgb_src_factor = util_blend_factor_to_shader(pipe.rgb_src_factor);
-                        equation.rgb_invert_src_factor = util_blend_factor_is_inverted(pipe.rgb_src_factor);
-                        equation.rgb_dst_factor = util_blend_factor_to_shader(pipe.rgb_dst_factor);
-                        equation.rgb_invert_dst_factor = util_blend_factor_is_inverted(pipe.rgb_dst_factor);
-                        equation.alpha_func = util_blend_func_to_shader(pipe.alpha_func);
-                        equation.alpha_src_factor = util_blend_factor_to_shader(pipe.alpha_src_factor);
-                        equation.alpha_invert_src_factor = util_blend_factor_is_inverted(pipe.alpha_src_factor);
-                        equation.alpha_dst_factor = util_blend_factor_to_shader(pipe.alpha_dst_factor);
-                        equation.alpha_invert_dst_factor = util_blend_factor_is_inverted(pipe.alpha_dst_factor);
-                }
+      if (pipe.blend_enable) {
+         equation.rgb_func = util_blend_func_to_shader(pipe.rgb_func);
+         equation.rgb_src_factor =
+            util_blend_factor_to_shader(pipe.rgb_src_factor);
+         equation.rgb_invert_src_factor =
+            util_blend_factor_is_inverted(pipe.rgb_src_factor);
+         equation.rgb_dst_factor =
+            util_blend_factor_to_shader(pipe.rgb_dst_factor);
+         equation.rgb_invert_dst_factor =
+            util_blend_factor_is_inverted(pipe.rgb_dst_factor);
+         equation.alpha_func = util_blend_func_to_shader(pipe.alpha_func);
+         equation.alpha_src_factor =
+            util_blend_factor_to_shader(pipe.alpha_src_factor);
+         equation.alpha_invert_src_factor =
+            util_blend_factor_is_inverted(pipe.alpha_src_factor);
+         equation.alpha_dst_factor =
+            util_blend_factor_to_shader(pipe.alpha_dst_factor);
+         equation.alpha_invert_dst_factor =
+            util_blend_factor_is_inverted(pipe.alpha_dst_factor);
+      }
 
-                /* Determine some common properties */
-                unsigned constant_mask = pan_blend_constant_mask(equation);
-                const bool supports_2src = pan_blend_supports_2src(PAN_ARCH);
-                so->info[c] = (struct pan_blend_info) {
-                        .enabled = (equation.color_mask != 0),
-                        .opaque = pan_blend_is_opaque(equation),
-                        .constant_mask = constant_mask,
+      /* Determine some common properties */
+      unsigned constant_mask = pan_blend_constant_mask(equation);
+      const bool supports_2src = pan_blend_supports_2src(PAN_ARCH);
+      so->info[c] = (struct pan_blend_info){
+         .enabled = (equation.color_mask != 0),
+         .opaque = pan_blend_is_opaque(equation),
+         .constant_mask = constant_mask,
 
-                        /* TODO: check the dest for the logicop */
-                        .load_dest = blend->logicop_enable ||
-                                pan_blend_reads_dest(equation),
+         /* TODO: check the dest for the logicop */
+         .load_dest = blend->logicop_enable || pan_blend_reads_dest(equation),
 
-                        /* Could this possibly be fixed-function? */
-                        .fixed_function = !blend->logicop_enable &&
-                                pan_blend_can_fixed_function(equation,
-                                                             supports_2src) &&
-                                (!constant_mask ||
-                                 pan_blend_supports_constant(PAN_ARCH, c)),
+         /* Could this possibly be fixed-function? */
+         .fixed_function =
+            !blend->logicop_enable &&
+            pan_blend_can_fixed_function(equation, supports_2src) &&
+            (!constant_mask || pan_blend_supports_constant(PAN_ARCH, c)),
 
-                        .alpha_zero_nop = pan_blend_alpha_zero_nop(equation),
-                        .alpha_one_store = pan_blend_alpha_one_store(equation),
-                };
+         .alpha_zero_nop = pan_blend_alpha_zero_nop(equation),
+         .alpha_one_store = pan_blend_alpha_one_store(equation),
+      };
 
-                so->pan.rts[c].equation = equation;
+      so->pan.rts[c].equation = equation;
 
-                /* Bifrost needs to know if any render target loads its
-                 * destination in the hot draw path, so precompute this */
-                if (so->info[c].load_dest)
-                        so->load_dest_mask |= BITFIELD_BIT(c);
+      /* Bifrost needs to know if any render target loads its
+       * destination in the hot draw path, so precompute this */
+      if (so->info[c].load_dest)
+         so->load_dest_mask |= BITFIELD_BIT(c);
 
-                /* Converting equations to Mali style is expensive, do it at
-                 * CSO create time instead of draw-time */
-                if (so->info[c].fixed_function) {
-                        so->equation[c] = pan_pack_blend(equation);
-                }
-        }
+      /* Converting equations to Mali style is expensive, do it at
+       * CSO create time instead of draw-time */
+      if (so->info[c].fixed_function) {
+         so->equation[c] = pan_pack_blend(equation);
+      }
+   }
 
-        return so;
+   return so;
 }
 
 #if PAN_ARCH >= 9
 static enum mali_flush_to_zero_mode
 panfrost_ftz_mode(struct pan_shader_info *info)
 {
-        if (info->ftz_fp32) {
-                if (info->ftz_fp16)
-                        return MALI_FLUSH_TO_ZERO_MODE_ALWAYS;
-                else
-                        return MALI_FLUSH_TO_ZERO_MODE_DX11;
-        } else {
-                /* We don't have a "flush FP16, preserve FP32" mode, but APIs
-                 * should not be able to generate that.
-                 */
-                assert(!info->ftz_fp16 && !info->ftz_fp32);
-                return MALI_FLUSH_TO_ZERO_MODE_PRESERVE_SUBNORMALS;
-        }
+   if (info->ftz_fp32) {
+      if (info->ftz_fp16)
+         return MALI_FLUSH_TO_ZERO_MODE_ALWAYS;
+      else
+         return MALI_FLUSH_TO_ZERO_MODE_DX11;
+   } else {
+      /* We don't have a "flush FP16, preserve FP32" mode, but APIs
+       * should not be able to generate that.
+       */
+      assert(!info->ftz_fp16 && !info->ftz_fp32);
+      return MALI_FLUSH_TO_ZERO_MODE_PRESERVE_SUBNORMALS;
+   }
 }
 #endif
 
 static void
 prepare_shader(struct panfrost_compiled_shader *state,
-            struct panfrost_pool *pool, bool upload)
+               struct panfrost_pool *pool, bool upload)
 {
 #if PAN_ARCH <= 7
-        void *out = &state->partial_rsd;
+   void *out = &state->partial_rsd;
 
-        if (upload) {
-                struct panfrost_ptr ptr =
-                        pan_pool_alloc_desc(&pool->base, RENDERER_STATE);
+   if (upload) {
+      struct panfrost_ptr ptr =
+         pan_pool_alloc_desc(&pool->base, RENDERER_STATE);
 
-                state->state = panfrost_pool_take_ref(pool, ptr.gpu);
-                out = ptr.cpu;
-        }
+      state->state = panfrost_pool_take_ref(pool, ptr.gpu);
+      out = ptr.cpu;
+   }
 
-        pan_pack(out, RENDERER_STATE, cfg) {
-                pan_shader_prepare_rsd(&state->info, state->bin.gpu, &cfg);
-
-       }
+   pan_pack(out, RENDERER_STATE, cfg) {
+      pan_shader_prepare_rsd(&state->info, state->bin.gpu, &cfg);
+   }
 #else
-        assert(upload);
+   assert(upload);
 
-        /* The address in the shader program descriptor must be non-null, but
-         * the entire shader program descriptor may be omitted.
-         *
-         * See dEQP-GLES31.functional.compute.basic.empty
-         */
-        if (!state->bin.gpu)
-                return;
+   /* The address in the shader program descriptor must be non-null, but
+    * the entire shader program descriptor may be omitted.
+    *
+    * See dEQP-GLES31.functional.compute.basic.empty
+    */
+   if (!state->bin.gpu)
+      return;
 
-        bool vs = (state->info.stage == MESA_SHADER_VERTEX);
-        bool secondary_enable = (vs && state->info.vs.secondary_enable);
+   bool vs = (state->info.stage == MESA_SHADER_VERTEX);
+   bool secondary_enable = (vs && state->info.vs.secondary_enable);
 
-        unsigned nr_variants = secondary_enable ? 3 : vs ? 2 : 1;
-        struct panfrost_ptr ptr = pan_pool_alloc_desc_array(&pool->base,
-                                                            nr_variants,
-                                                            SHADER_PROGRAM);
+   unsigned nr_variants = secondary_enable ? 3 : vs ? 2 : 1;
+   struct panfrost_ptr ptr =
+      pan_pool_alloc_desc_array(&pool->base, nr_variants, SHADER_PROGRAM);
 
-        state->state = panfrost_pool_take_ref(pool, ptr.gpu);
+   state->state = panfrost_pool_take_ref(pool, ptr.gpu);
 
-        /* Generic, or IDVS/points */
-        pan_pack(ptr.cpu, SHADER_PROGRAM, cfg) {
-                cfg.stage = pan_shader_stage(&state->info);
-                cfg.primary_shader = true;
-                cfg.register_allocation = pan_register_allocation(state->info.work_reg_count);
-                cfg.binary = state->bin.gpu;
-                cfg.preload.r48_r63 = (state->info.preload >> 48);
-                cfg.flush_to_zero_mode = panfrost_ftz_mode(&state->info);
+   /* Generic, or IDVS/points */
+   pan_pack(ptr.cpu, SHADER_PROGRAM, cfg) {
+      cfg.stage = pan_shader_stage(&state->info);
+      cfg.primary_shader = true;
+      cfg.register_allocation =
+         pan_register_allocation(state->info.work_reg_count);
+      cfg.binary = state->bin.gpu;
+      cfg.preload.r48_r63 = (state->info.preload >> 48);
+      cfg.flush_to_zero_mode = panfrost_ftz_mode(&state->info);
 
-                if (cfg.stage == MALI_SHADER_STAGE_FRAGMENT)
-                        cfg.requires_helper_threads = state->info.contains_barrier;
-        }
+      if (cfg.stage == MALI_SHADER_STAGE_FRAGMENT)
+         cfg.requires_helper_threads = state->info.contains_barrier;
+   }
 
-        if (!vs)
-                return;
+   if (!vs)
+      return;
 
-        /* IDVS/triangles */
-        pan_pack(ptr.cpu + pan_size(SHADER_PROGRAM), SHADER_PROGRAM, cfg) {
-                cfg.stage = pan_shader_stage(&state->info);
-                cfg.primary_shader = true;
-                cfg.register_allocation = pan_register_allocation(state->info.work_reg_count);
-                cfg.binary = state->bin.gpu + state->info.vs.no_psiz_offset;
-                cfg.preload.r48_r63 = (state->info.preload >> 48);
-                cfg.flush_to_zero_mode = panfrost_ftz_mode(&state->info);
-        }
+   /* IDVS/triangles */
+   pan_pack(ptr.cpu + pan_size(SHADER_PROGRAM), SHADER_PROGRAM, cfg) {
+      cfg.stage = pan_shader_stage(&state->info);
+      cfg.primary_shader = true;
+      cfg.register_allocation =
+         pan_register_allocation(state->info.work_reg_count);
+      cfg.binary = state->bin.gpu + state->info.vs.no_psiz_offset;
+      cfg.preload.r48_r63 = (state->info.preload >> 48);
+      cfg.flush_to_zero_mode = panfrost_ftz_mode(&state->info);
+   }
 
-        if (!secondary_enable)
-                return;
+   if (!secondary_enable)
+      return;
 
-        pan_pack(ptr.cpu + (pan_size(SHADER_PROGRAM) * 2), SHADER_PROGRAM, cfg) {
-                unsigned work_count = state->info.vs.secondary_work_reg_count;
+   pan_pack(ptr.cpu + (pan_size(SHADER_PROGRAM) * 2), SHADER_PROGRAM, cfg) {
+      unsigned work_count = state->info.vs.secondary_work_reg_count;
 
-                cfg.stage = pan_shader_stage(&state->info);
-                cfg.primary_shader = false;
-                cfg.register_allocation = pan_register_allocation(work_count);
-                cfg.binary = state->bin.gpu + state->info.vs.secondary_offset;
-                cfg.preload.r48_r63 = (state->info.vs.secondary_preload >> 48);
-                cfg.flush_to_zero_mode = panfrost_ftz_mode(&state->info);
-        }
+      cfg.stage = pan_shader_stage(&state->info);
+      cfg.primary_shader = false;
+      cfg.register_allocation = pan_register_allocation(work_count);
+      cfg.binary = state->bin.gpu + state->info.vs.secondary_offset;
+      cfg.preload.r48_r63 = (state->info.vs.secondary_preload >> 48);
+      cfg.flush_to_zero_mode = panfrost_ftz_mode(&state->info);
+   }
 #endif
 }
 
 static void
 panfrost_get_sample_position(struct pipe_context *context,
-                             unsigned sample_count,
-                             unsigned sample_index,
+                             unsigned sample_count, unsigned sample_index,
                              float *out_value)
 {
-        panfrost_query_sample_position(
-                        panfrost_sample_pattern(sample_count),
-                        sample_index,
-                        out_value);
+   panfrost_query_sample_position(panfrost_sample_pattern(sample_count),
+                                  sample_index, out_value);
 }
 
 static void
 screen_destroy(struct pipe_screen *pscreen)
 {
-        struct panfrost_device *dev = pan_device(pscreen);
-        GENX(pan_blitter_cleanup)(dev);
+   struct panfrost_device *dev = pan_device(pscreen);
+   GENX(pan_blitter_cleanup)(dev);
 #if PAN_GPU_INDIRECTS
-        GENX(pan_indirect_dispatch_cleanup)(dev);
+   GENX(pan_indirect_dispatch_cleanup)(dev);
 #endif
 }
 
 static void
 preload(struct panfrost_batch *batch, struct pan_fb_info *fb)
 {
-        GENX(pan_preload_fb)(&batch->pool.base, &batch->scoreboard, fb, batch->tls.gpu,
-                             PAN_ARCH >= 6 ? batch->tiler_ctx.bifrost : 0, NULL);
+   GENX(pan_preload_fb)
+   (&batch->pool.base, &batch->scoreboard, fb, batch->tls.gpu,
+    PAN_ARCH >= 6 ? batch->tiler_ctx.bifrost : 0, NULL);
 }
 
 static void
 init_batch(struct panfrost_batch *batch)
 {
-        /* Reserve the framebuffer and local storage descriptors */
-        batch->framebuffer =
+   /* Reserve the framebuffer and local storage descriptors */
+   batch->framebuffer =
 #if PAN_ARCH == 4
-                pan_pool_alloc_desc(&batch->pool.base, FRAMEBUFFER);
+      pan_pool_alloc_desc(&batch->pool.base, FRAMEBUFFER);
 #else
-                pan_pool_alloc_desc_aggregate(&batch->pool.base,
-                                              PAN_DESC(FRAMEBUFFER),
-                                              PAN_DESC(ZS_CRC_EXTENSION),
-                                              PAN_DESC_ARRAY(MAX2(batch->key.nr_cbufs, 1), RENDER_TARGET));
+      pan_pool_alloc_desc_aggregate(
+         &batch->pool.base, PAN_DESC(FRAMEBUFFER), PAN_DESC(ZS_CRC_EXTENSION),
+         PAN_DESC_ARRAY(MAX2(batch->key.nr_cbufs, 1), RENDER_TARGET));
 
-                batch->framebuffer.gpu |= MALI_FBD_TAG_IS_MFBD;
+   batch->framebuffer.gpu |= MALI_FBD_TAG_IS_MFBD;
 #endif
 
 #if PAN_ARCH >= 6
-        batch->tls = pan_pool_alloc_desc(&batch->pool.base, LOCAL_STORAGE);
+   batch->tls = pan_pool_alloc_desc(&batch->pool.base, LOCAL_STORAGE);
 #else
-        /* On Midgard, the TLS is embedded in the FB descriptor */
-        batch->tls = batch->framebuffer;
+   /* On Midgard, the TLS is embedded in the FB descriptor */
+   batch->tls = batch->framebuffer;
 #endif
 }
 
 static void
-panfrost_sampler_view_destroy(
-        struct pipe_context *pctx,
-        struct pipe_sampler_view *pview)
+panfrost_sampler_view_destroy(struct pipe_context *pctx,
+                              struct pipe_sampler_view *pview)
 {
-        struct panfrost_sampler_view *view = (struct panfrost_sampler_view *) pview;
+   struct panfrost_sampler_view *view = (struct panfrost_sampler_view *)pview;
 
-        pipe_resource_reference(&pview->texture, NULL);
-        panfrost_bo_unreference(view->state.bo);
-        ralloc_free(view);
+   pipe_resource_reference(&pview->texture, NULL);
+   panfrost_bo_unreference(view->state.bo);
+   ralloc_free(view);
 }
 
 static void
 context_init(struct pipe_context *pipe)
 {
-        pipe->draw_vbo           = panfrost_draw_vbo;
-        pipe->launch_grid        = panfrost_launch_grid;
+   pipe->draw_vbo = panfrost_draw_vbo;
+   pipe->launch_grid = panfrost_launch_grid;
 
-        pipe->create_vertex_elements_state = panfrost_create_vertex_elements_state;
-        pipe->create_rasterizer_state = panfrost_create_rasterizer_state;
-        pipe->create_depth_stencil_alpha_state = panfrost_create_depth_stencil_state;
-        pipe->create_sampler_view = panfrost_create_sampler_view;
-        pipe->sampler_view_destroy = panfrost_sampler_view_destroy;
-        pipe->create_sampler_state = panfrost_create_sampler_state;
-        pipe->create_blend_state = panfrost_create_blend_state;
+   pipe->create_vertex_elements_state = panfrost_create_vertex_elements_state;
+   pipe->create_rasterizer_state = panfrost_create_rasterizer_state;
+   pipe->create_depth_stencil_alpha_state = panfrost_create_depth_stencil_state;
+   pipe->create_sampler_view = panfrost_create_sampler_view;
+   pipe->sampler_view_destroy = panfrost_sampler_view_destroy;
+   pipe->create_sampler_state = panfrost_create_sampler_state;
+   pipe->create_blend_state = panfrost_create_blend_state;
 
-        pipe->get_sample_position = panfrost_get_sample_position;
+   pipe->get_sample_position = panfrost_get_sample_position;
 }
 
 #if PAN_ARCH <= 5
@@ -4514,49 +4430,43 @@ context_init(struct pipe_context *pipe)
 static mali_ptr
 batch_get_polygon_list(struct panfrost_batch *batch)
 {
-        struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
+   struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
 
-        if (!batch->tiler_ctx.midgard.polygon_list) {
-                bool has_draws = batch->scoreboard.first_tiler != NULL;
-                unsigned size =
-                        panfrost_tiler_get_polygon_list_size(dev,
-                                                             batch->key.width,
-                                                             batch->key.height,
-                                                             has_draws);
-                size = util_next_power_of_two(size);
+   if (!batch->tiler_ctx.midgard.polygon_list) {
+      bool has_draws = batch->scoreboard.first_tiler != NULL;
+      unsigned size = panfrost_tiler_get_polygon_list_size(
+         dev, batch->key.width, batch->key.height, has_draws);
+      size = util_next_power_of_two(size);
 
-                /* Create the BO as invisible if we can. If there are no draws,
-                 * we need to write the polygon list manually because there's
-                 * no WRITE_VALUE job in the chain
-                 */
-                bool init_polygon_list = !has_draws;
-                batch->tiler_ctx.midgard.polygon_list =
-                        panfrost_batch_create_bo(batch, size,
-                                                 init_polygon_list ? 0 : PAN_BO_INVISIBLE,
-                                                 PIPE_SHADER_VERTEX,
-                                                 "Polygon list");
-                panfrost_batch_add_bo(batch, batch->tiler_ctx.midgard.polygon_list,
-                                PIPE_SHADER_FRAGMENT);
+      /* Create the BO as invisible if we can. If there are no draws,
+       * we need to write the polygon list manually because there's
+       * no WRITE_VALUE job in the chain
+       */
+      bool init_polygon_list = !has_draws;
+      batch->tiler_ctx.midgard.polygon_list = panfrost_batch_create_bo(
+         batch, size, init_polygon_list ? 0 : PAN_BO_INVISIBLE,
+         PIPE_SHADER_VERTEX, "Polygon list");
+      panfrost_batch_add_bo(batch, batch->tiler_ctx.midgard.polygon_list,
+                            PIPE_SHADER_FRAGMENT);
 
-                if (init_polygon_list && dev->model->quirks.no_hierarchical_tiling) {
-                        assert(batch->tiler_ctx.midgard.polygon_list->ptr.cpu);
-                        uint32_t *polygon_list_body =
-                                batch->tiler_ctx.midgard.polygon_list->ptr.cpu +
-                                MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE;
+      if (init_polygon_list && dev->model->quirks.no_hierarchical_tiling) {
+         assert(batch->tiler_ctx.midgard.polygon_list->ptr.cpu);
+         uint32_t *polygon_list_body =
+            batch->tiler_ctx.midgard.polygon_list->ptr.cpu +
+            MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE;
 
-                        /* Magic for Mali T720 */
-                        polygon_list_body[0] = 0xa0000000;
-                } else if (init_polygon_list) {
-                        assert(batch->tiler_ctx.midgard.polygon_list->ptr.cpu);
-                        uint32_t *header =
-                                batch->tiler_ctx.midgard.polygon_list->ptr.cpu;
-                        memset(header, 0, size);
-                }
+         /* Magic for Mali T720 */
+         polygon_list_body[0] = 0xa0000000;
+      } else if (init_polygon_list) {
+         assert(batch->tiler_ctx.midgard.polygon_list->ptr.cpu);
+         uint32_t *header = batch->tiler_ctx.midgard.polygon_list->ptr.cpu;
+         memset(header, 0, size);
+      }
 
-                batch->tiler_ctx.midgard.disable = !has_draws;
-        }
+      batch->tiler_ctx.midgard.disable = !has_draws;
+   }
 
-        return batch->tiler_ctx.midgard.polygon_list->ptr.gpu;
+   return batch->tiler_ctx.midgard.polygon_list->ptr.gpu;
 }
 #endif
 
@@ -4564,31 +4474,30 @@ static void
 init_polygon_list(struct panfrost_batch *batch)
 {
 #if PAN_ARCH <= 5
-        mali_ptr polygon_list = batch_get_polygon_list(batch);
-        panfrost_scoreboard_initialize_tiler(&batch->pool.base,
-                                             &batch->scoreboard,
-                                             polygon_list);
+   mali_ptr polygon_list = batch_get_polygon_list(batch);
+   panfrost_scoreboard_initialize_tiler(&batch->pool.base, &batch->scoreboard,
+                                        polygon_list);
 #endif
 }
 
 void
 GENX(panfrost_cmdstream_screen_init)(struct panfrost_screen *screen)
 {
-        struct panfrost_device *dev = &screen->dev;
+   struct panfrost_device *dev = &screen->dev;
 
-        screen->vtbl.prepare_shader = prepare_shader;
-        screen->vtbl.emit_tls    = emit_tls;
-        screen->vtbl.emit_fbd    = emit_fbd;
-        screen->vtbl.emit_fragment_job = emit_fragment_job;
-        screen->vtbl.screen_destroy = screen_destroy;
-        screen->vtbl.preload     = preload;
-        screen->vtbl.context_init = context_init;
-        screen->vtbl.init_batch = init_batch;
-        screen->vtbl.get_blend_shader = GENX(pan_blend_get_shader_locked);
-        screen->vtbl.init_polygon_list = init_polygon_list;
-        screen->vtbl.get_compiler_options = GENX(pan_shader_get_compiler_options);
-        screen->vtbl.compile_shader = GENX(pan_shader_compile);
+   screen->vtbl.prepare_shader = prepare_shader;
+   screen->vtbl.emit_tls = emit_tls;
+   screen->vtbl.emit_fbd = emit_fbd;
+   screen->vtbl.emit_fragment_job = emit_fragment_job;
+   screen->vtbl.screen_destroy = screen_destroy;
+   screen->vtbl.preload = preload;
+   screen->vtbl.context_init = context_init;
+   screen->vtbl.init_batch = init_batch;
+   screen->vtbl.get_blend_shader = GENX(pan_blend_get_shader_locked);
+   screen->vtbl.init_polygon_list = init_polygon_list;
+   screen->vtbl.get_compiler_options = GENX(pan_shader_get_compiler_options);
+   screen->vtbl.compile_shader = GENX(pan_shader_compile);
 
-        GENX(pan_blitter_init)(dev, &screen->blitter.bin_pool.base,
-                               &screen->blitter.desc_pool.base);
+   GENX(pan_blitter_init)
+   (dev, &screen->blitter.bin_pool.base, &screen->blitter.desc_pool.base);
 }
diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c
index 5cac001abfe..14a244443df 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -32,745 +32,712 @@
 #include "pan_context.h"
 #include "pan_minmax_cache.h"
 
-#include "util/macros.h"
 #include "util/format/u_format.h"
-#include "util/libsync.h"
-#include "util/u_inlines.h"
-#include "util/u_upload_mgr.h"
-#include "util/u_memory.h"
-#include "util/u_surface.h"
-#include "util/u_vbuf.h"
 #include "util/half_float.h"
+#include "util/libsync.h"
+#include "util/macros.h"
+#include "util/u_debug_cb.h"
 #include "util/u_helpers.h"
-#include "util/format/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 #include "util/u_prim.h"
 #include "util/u_prim_restart.h"
 #include "util/u_surface.h"
-#include "util/u_math.h"
-#include "util/u_debug_cb.h"
+#include "util/u_upload_mgr.h"
+#include "util/u_vbuf.h"
 
+#include "compiler/nir/nir_serialize.h"
+#include "util/pan_lower_framebuffer.h"
+#include "decode.h"
 #include "pan_fence.h"
 #include "pan_screen.h"
 #include "pan_util.h"
-#include "decode.h"
-#include "util/pan_lower_framebuffer.h"
-#include "compiler/nir/nir_serialize.h"
 
 static void
-panfrost_clear(
-        struct pipe_context *pipe,
-        unsigned buffers,
-        const struct pipe_scissor_state *scissor_state,
-        const union pipe_color_union *color,
-        double depth, unsigned stencil)
+panfrost_clear(struct pipe_context *pipe, unsigned buffers,
+               const struct pipe_scissor_state *scissor_state,
+               const union pipe_color_union *color, double depth,
+               unsigned stencil)
 {
-        struct panfrost_context *ctx = pan_context(pipe);
-        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
+   struct panfrost_context *ctx = pan_context(pipe);
+   struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 
-        if (!panfrost_render_condition_check(ctx))
-                return;
+   if (!panfrost_render_condition_check(ctx))
+      return;
 
-        /* At the start of the batch, we can clear for free */
-        if (!batch->scoreboard.first_job) {
-                panfrost_batch_clear(batch, buffers, color, depth, stencil);
-                return;
-        }
+   /* At the start of the batch, we can clear for free */
+   if (!batch->scoreboard.first_job) {
+      panfrost_batch_clear(batch, buffers, color, depth, stencil);
+      return;
+   }
 
-        /* Once there is content, clear with a fullscreen quad */
-        panfrost_blitter_save(ctx, false /* render condition */);
+   /* Once there is content, clear with a fullscreen quad */
+   panfrost_blitter_save(ctx, false /* render condition */);
 
-        perf_debug_ctx(ctx, "Clearing with quad");
-        util_blitter_clear(ctx->blitter,
-                           ctx->pipe_framebuffer.width,
-                           ctx->pipe_framebuffer.height,
-                           util_framebuffer_get_num_layers(&ctx->pipe_framebuffer),
-                           buffers, color, depth, stencil,
-                           util_framebuffer_get_num_samples(&ctx->pipe_framebuffer) > 1);
+   perf_debug_ctx(ctx, "Clearing with quad");
+   util_blitter_clear(
+      ctx->blitter, ctx->pipe_framebuffer.width, ctx->pipe_framebuffer.height,
+      util_framebuffer_get_num_layers(&ctx->pipe_framebuffer), buffers, color,
+      depth, stencil,
+      util_framebuffer_get_num_samples(&ctx->pipe_framebuffer) > 1);
 }
 
 bool
 panfrost_writes_point_size(struct panfrost_context *ctx)
 {
-        struct panfrost_compiled_shader *vs = ctx->prog[PIPE_SHADER_VERTEX];
-        assert(vs != NULL);
+   struct panfrost_compiled_shader *vs = ctx->prog[PIPE_SHADER_VERTEX];
+   assert(vs != NULL);
 
-        return vs->info.vs.writes_point_size && ctx->active_prim == PIPE_PRIM_POINTS;
+   return vs->info.vs.writes_point_size && ctx->active_prim == PIPE_PRIM_POINTS;
 }
 
 /* The entire frame is in memory -- send it off to the kernel! */
 
 void
-panfrost_flush(
-        struct pipe_context *pipe,
-        struct pipe_fence_handle **fence,
-        unsigned flags)
+panfrost_flush(struct pipe_context *pipe, struct pipe_fence_handle **fence,
+               unsigned flags)
 {
-        struct panfrost_context *ctx = pan_context(pipe);
-        struct panfrost_device *dev = pan_device(pipe->screen);
+   struct panfrost_context *ctx = pan_context(pipe);
+   struct panfrost_device *dev = pan_device(pipe->screen);
 
+   /* Submit all pending jobs */
+   panfrost_flush_all_batches(ctx, NULL);
 
-        /* Submit all pending jobs */
-        panfrost_flush_all_batches(ctx, NULL);
+   if (fence) {
+      struct pipe_fence_handle *f = panfrost_fence_create(ctx);
+      pipe->screen->fence_reference(pipe->screen, fence, NULL);
+      *fence = f;
+   }
 
-        if (fence) {
-                struct pipe_fence_handle *f = panfrost_fence_create(ctx);
-                pipe->screen->fence_reference(pipe->screen, fence, NULL);
-                *fence = f;
-        }
-
-        if (dev->debug & PAN_DBG_TRACE)
-                pandecode_next_frame();
+   if (dev->debug & PAN_DBG_TRACE)
+      pandecode_next_frame();
 }
 
 static void
 panfrost_texture_barrier(struct pipe_context *pipe, unsigned flags)
 {
-        struct panfrost_context *ctx = pan_context(pipe);
-        panfrost_flush_all_batches(ctx, "Texture barrier");
+   struct panfrost_context *ctx = pan_context(pipe);
+   panfrost_flush_all_batches(ctx, "Texture barrier");
 }
 
 static void
 panfrost_set_frontend_noop(struct pipe_context *pipe, bool enable)
 {
-        struct panfrost_context *ctx = pan_context(pipe);
-        panfrost_flush_all_batches(ctx, "Frontend no-op change");
-        ctx->is_noop = enable;
+   struct panfrost_context *ctx = pan_context(pipe);
+   panfrost_flush_all_batches(ctx, "Frontend no-op change");
+   ctx->is_noop = enable;
 }
 
-
 static void
 panfrost_generic_cso_delete(struct pipe_context *pctx, void *hwcso)
 {
-        free(hwcso);
+   free(hwcso);
 }
 
 static void
 panfrost_bind_blend_state(struct pipe_context *pipe, void *cso)
 {
-        struct panfrost_context *ctx = pan_context(pipe);
-        ctx->blend = cso;
-        ctx->dirty |= PAN_DIRTY_BLEND;
+   struct panfrost_context *ctx = pan_context(pipe);
+   ctx->blend = cso;
+   ctx->dirty |= PAN_DIRTY_BLEND;
 }
 
 static void
 panfrost_set_blend_color(struct pipe_context *pipe,
                          const struct pipe_blend_color *blend_color)
 {
-        struct panfrost_context *ctx = pan_context(pipe);
-        ctx->dirty |= PAN_DIRTY_BLEND;
+   struct panfrost_context *ctx = pan_context(pipe);
+   ctx->dirty |= PAN_DIRTY_BLEND;
 
-        if (blend_color)
-                ctx->blend_color = *blend_color;
+   if (blend_color)
+      ctx->blend_color = *blend_color;
 }
 
 /* Create a final blend given the context */
 
 mali_ptr
-panfrost_get_blend(struct panfrost_batch *batch, unsigned rti, struct panfrost_bo **bo, unsigned *shader_offset)
+panfrost_get_blend(struct panfrost_batch *batch, unsigned rti,
+                   struct panfrost_bo **bo, unsigned *shader_offset)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        struct panfrost_device *dev = pan_device(ctx->base.screen);
-        struct panfrost_blend_state *blend = ctx->blend;
-        struct pan_blend_info info = blend->info[rti];
-        struct pipe_surface *surf = batch->key.cbufs[rti];
-        enum pipe_format fmt = surf->format;
+   struct panfrost_context *ctx = batch->ctx;
+   struct panfrost_device *dev = pan_device(ctx->base.screen);
+   struct panfrost_blend_state *blend = ctx->blend;
+   struct pan_blend_info info = blend->info[rti];
+   struct pipe_surface *surf = batch->key.cbufs[rti];
+   enum pipe_format fmt = surf->format;
 
-        /* Use fixed-function if the equation permits, the format is blendable,
-         * and no more than one unique constant is accessed */
-        if (info.fixed_function && panfrost_blendable_formats_v7[fmt].internal &&
-                        pan_blend_is_homogenous_constant(info.constant_mask,
-                                ctx->blend_color.color)) {
-                return 0;
-        }
+   /* Use fixed-function if the equation permits, the format is blendable,
+    * and no more than one unique constant is accessed */
+   if (info.fixed_function && panfrost_blendable_formats_v7[fmt].internal &&
+       pan_blend_is_homogenous_constant(info.constant_mask,
+                                        ctx->blend_color.color)) {
+      return 0;
+   }
 
-        /* On all architectures, we can disable writes for a blend descriptor,
-         * at which point the format doesn't matter.
-         */
-        if (!info.enabled)
-                return 0;
+   /* On all architectures, we can disable writes for a blend descriptor,
+    * at which point the format doesn't matter.
+    */
+   if (!info.enabled)
+      return 0;
 
-        /* On Bifrost and newer, we can also use fixed-function for opaque
-         * output regardless of the format by configuring the appropriate
-         * conversion descriptor in the internal blend descriptor. (Midgard
-         * requires a blend shader even for this case.)
-         */
-        if (dev->arch >= 6 && info.opaque)
-                return 0;
+   /* On Bifrost and newer, we can also use fixed-function for opaque
+    * output regardless of the format by configuring the appropriate
+    * conversion descriptor in the internal blend descriptor. (Midgard
+    * requires a blend shader even for this case.)
+    */
+   if (dev->arch >= 6 && info.opaque)
+      return 0;
 
-        /* Otherwise, we need to grab a shader */
-        struct pan_blend_state pan_blend = blend->pan;
-        unsigned nr_samples = surf->nr_samples ? : surf->texture->nr_samples;
+   /* Otherwise, we need to grab a shader */
+   struct pan_blend_state pan_blend = blend->pan;
+   unsigned nr_samples = surf->nr_samples ?: surf->texture->nr_samples;
 
-        pan_blend.rts[rti].format = fmt;
-        pan_blend.rts[rti].nr_samples = nr_samples;
-        memcpy(pan_blend.constants, ctx->blend_color.color,
-               sizeof(pan_blend.constants));
+   pan_blend.rts[rti].format = fmt;
+   pan_blend.rts[rti].nr_samples = nr_samples;
+   memcpy(pan_blend.constants, ctx->blend_color.color,
+          sizeof(pan_blend.constants));
 
-        /* Upload the shader, sharing a BO */
-        if (!(*bo)) {
-                *bo = panfrost_batch_create_bo(batch, 4096, PAN_BO_EXECUTE,
-                                PIPE_SHADER_FRAGMENT, "Blend shader");
-        }
+   /* Upload the shader, sharing a BO */
+   if (!(*bo)) {
+      *bo = panfrost_batch_create_bo(batch, 4096, PAN_BO_EXECUTE,
+                                     PIPE_SHADER_FRAGMENT, "Blend shader");
+   }
 
-        struct panfrost_compiled_shader *ss = ctx->prog[PIPE_SHADER_FRAGMENT];
+   struct panfrost_compiled_shader *ss = ctx->prog[PIPE_SHADER_FRAGMENT];
 
-        /* Default for Midgard */
-        nir_alu_type col0_type = nir_type_float32;
-        nir_alu_type col1_type = nir_type_float32;
+   /* Default for Midgard */
+   nir_alu_type col0_type = nir_type_float32;
+   nir_alu_type col1_type = nir_type_float32;
 
-        /* Bifrost has per-output types, respect them */
-        if (dev->arch >= 6) {
-                col0_type = ss->info.bifrost.blend[rti].type;
-                col1_type = ss->info.bifrost.blend_src1_type;
-        }
+   /* Bifrost has per-output types, respect them */
+   if (dev->arch >= 6) {
+      col0_type = ss->info.bifrost.blend[rti].type;
+      col1_type = ss->info.bifrost.blend_src1_type;
+   }
 
-        pthread_mutex_lock(&dev->blend_shaders.lock);
-        struct pan_blend_shader_variant *shader =
-                pan_screen(ctx->base.screen)->vtbl.get_blend_shader(dev,
-                                                                    &pan_blend,
-                                                                    col0_type,
-                                                                    col1_type,
-                                                                    rti);
+   pthread_mutex_lock(&dev->blend_shaders.lock);
+   struct pan_blend_shader_variant *shader =
+      pan_screen(ctx->base.screen)
+         ->vtbl.get_blend_shader(dev, &pan_blend, col0_type, col1_type, rti);
 
-        /* Size check and upload */
-        unsigned offset = *shader_offset;
-        assert((offset + shader->binary.size) < 4096);
-        memcpy((*bo)->ptr.cpu + offset, shader->binary.data, shader->binary.size);
-        *shader_offset += shader->binary.size;
-        pthread_mutex_unlock(&dev->blend_shaders.lock);
+   /* Size check and upload */
+   unsigned offset = *shader_offset;
+   assert((offset + shader->binary.size) < 4096);
+   memcpy((*bo)->ptr.cpu + offset, shader->binary.data, shader->binary.size);
+   *shader_offset += shader->binary.size;
+   pthread_mutex_unlock(&dev->blend_shaders.lock);
 
-        return ((*bo)->ptr.gpu + offset) | shader->first_tag;
+   return ((*bo)->ptr.gpu + offset) | shader->first_tag;
 }
 
 static void
-panfrost_bind_rasterizer_state(
-        struct pipe_context *pctx,
-        void *hwcso)
+panfrost_bind_rasterizer_state(struct pipe_context *pctx, void *hwcso)
 {
-        struct panfrost_context *ctx = pan_context(pctx);
-        ctx->rasterizer = hwcso;
+   struct panfrost_context *ctx = pan_context(pctx);
+   ctx->rasterizer = hwcso;
 
-        /* We can assume rasterizer is always dirty, the dependencies are
-         * too intricate to bother tracking in detail. However we could
-         * probably diff the renderers for viewport dirty tracking, that
-         * just cares about the scissor enable and the depth clips. */
-        ctx->dirty |= PAN_DIRTY_SCISSOR | PAN_DIRTY_RASTERIZER;
+   /* We can assume rasterizer is always dirty, the dependencies are
+    * too intricate to bother tracking in detail. However we could
+    * probably diff the renderers for viewport dirty tracking, that
+    * just cares about the scissor enable and the depth clips. */
+   ctx->dirty |= PAN_DIRTY_SCISSOR | PAN_DIRTY_RASTERIZER;
 }
 
 static void
-panfrost_set_shader_images(
-        struct pipe_context *pctx,
-        enum pipe_shader_type shader,
-        unsigned start_slot, unsigned count, unsigned unbind_num_trailing_slots,
-        const struct pipe_image_view *iviews)
+panfrost_set_shader_images(struct pipe_context *pctx,
+                           enum pipe_shader_type shader, unsigned start_slot,
+                           unsigned count, unsigned unbind_num_trailing_slots,
+                           const struct pipe_image_view *iviews)
 {
-        struct panfrost_context *ctx = pan_context(pctx);
-        ctx->dirty_shader[PIPE_SHADER_FRAGMENT] |= PAN_DIRTY_STAGE_IMAGE;
+   struct panfrost_context *ctx = pan_context(pctx);
+   ctx->dirty_shader[PIPE_SHADER_FRAGMENT] |= PAN_DIRTY_STAGE_IMAGE;
 
-        /* Unbind start_slot...start_slot+count */
-        if (!iviews) {
-                for (int i = start_slot; i < start_slot + count + unbind_num_trailing_slots; i++) {
-                        pipe_resource_reference(&ctx->images[shader][i].resource, NULL);
-                }
+   /* Unbind start_slot...start_slot+count */
+   if (!iviews) {
+      for (int i = start_slot;
+           i < start_slot + count + unbind_num_trailing_slots; i++) {
+         pipe_resource_reference(&ctx->images[shader][i].resource, NULL);
+      }
 
-                ctx->image_mask[shader] &= ~(((1ull << count) - 1) << start_slot);
-                return;
-        }
+      ctx->image_mask[shader] &= ~(((1ull << count) - 1) << start_slot);
+      return;
+   }
 
-        /* Bind start_slot...start_slot+count */
-        for (int i = 0; i < count; i++) {
-                const struct pipe_image_view *image = &iviews[i];
-                SET_BIT(ctx->image_mask[shader], 1 << (start_slot + i), image->resource);
+   /* Bind start_slot...start_slot+count */
+   for (int i = 0; i < count; i++) {
+      const struct pipe_image_view *image = &iviews[i];
+      SET_BIT(ctx->image_mask[shader], 1 << (start_slot + i), image->resource);
 
-                if (!image->resource) {
-                        util_copy_image_view(&ctx->images[shader][start_slot+i], NULL);
-                        continue;
-                }
+      if (!image->resource) {
+         util_copy_image_view(&ctx->images[shader][start_slot + i], NULL);
+         continue;
+      }
 
-                struct panfrost_resource *rsrc = pan_resource(image->resource);
+      struct panfrost_resource *rsrc = pan_resource(image->resource);
 
-                /* Images don't work with AFBC, since they require pixel-level granularity */
-                if (drm_is_afbc(rsrc->image.layout.modifier)) {
-                        pan_resource_modifier_convert(ctx, rsrc,
-                                        DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED,
-                                        "Shader image");
-                }
+      /* Images don't work with AFBC, since they require pixel-level granularity
+       */
+      if (drm_is_afbc(rsrc->image.layout.modifier)) {
+         pan_resource_modifier_convert(
+            ctx, rsrc, DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED,
+            "Shader image");
+      }
 
-                util_copy_image_view(&ctx->images[shader][start_slot+i], image);
-        }
+      util_copy_image_view(&ctx->images[shader][start_slot + i], image);
+   }
 
-        /* Unbind start_slot+count...start_slot+count+unbind_num_trailing_slots */
-        for (int i = 0; i < unbind_num_trailing_slots; i++) {
-                SET_BIT(ctx->image_mask[shader], 1 << (start_slot + count + i), NULL);
-                util_copy_image_view(&ctx->images[shader][start_slot+count+i], NULL);
-        }
+   /* Unbind start_slot+count...start_slot+count+unbind_num_trailing_slots */
+   for (int i = 0; i < unbind_num_trailing_slots; i++) {
+      SET_BIT(ctx->image_mask[shader], 1 << (start_slot + count + i), NULL);
+      util_copy_image_view(&ctx->images[shader][start_slot + count + i], NULL);
+   }
 }
 
 static void
-panfrost_bind_vertex_elements_state(
-        struct pipe_context *pctx,
-        void *hwcso)
+panfrost_bind_vertex_elements_state(struct pipe_context *pctx, void *hwcso)
 {
-        struct panfrost_context *ctx = pan_context(pctx);
-        ctx->vertex = hwcso;
-        ctx->dirty |= PAN_DIRTY_VERTEX;
+   struct panfrost_context *ctx = pan_context(pctx);
+   ctx->vertex = hwcso;
+   ctx->dirty |= PAN_DIRTY_VERTEX;
 }
 
 static void
-panfrost_bind_sampler_states(
-        struct pipe_context *pctx,
-        enum pipe_shader_type shader,
-        unsigned start_slot, unsigned num_sampler,
-        void **sampler)
+panfrost_bind_sampler_states(struct pipe_context *pctx,
+                             enum pipe_shader_type shader, unsigned start_slot,
+                             unsigned num_sampler, void **sampler)
 {
-        struct panfrost_context *ctx = pan_context(pctx);
-        ctx->dirty_shader[shader] |= PAN_DIRTY_STAGE_SAMPLER;
+   struct panfrost_context *ctx = pan_context(pctx);
+   ctx->dirty_shader[shader] |= PAN_DIRTY_STAGE_SAMPLER;
 
-        for (unsigned i = 0; i < num_sampler; i++) {
-                unsigned p = start_slot + i;
-                ctx->samplers[shader][p] = sampler ? sampler[i] : NULL;
-                if (ctx->samplers[shader][p])
-                        ctx->valid_samplers[shader] |= BITFIELD_BIT(p);
-                else
-                        ctx->valid_samplers[shader] &= ~BITFIELD_BIT(p);
-        }
+   for (unsigned i = 0; i < num_sampler; i++) {
+      unsigned p = start_slot + i;
+      ctx->samplers[shader][p] = sampler ? sampler[i] : NULL;
+      if (ctx->samplers[shader][p])
+         ctx->valid_samplers[shader] |= BITFIELD_BIT(p);
+      else
+         ctx->valid_samplers[shader] &= ~BITFIELD_BIT(p);
+   }
 
-        ctx->sampler_count[shader] = util_last_bit(ctx->valid_samplers[shader]);
+   ctx->sampler_count[shader] = util_last_bit(ctx->valid_samplers[shader]);
 }
 
 static void
-panfrost_set_vertex_buffers(
-        struct pipe_context *pctx,
-        unsigned start_slot,
-        unsigned num_buffers,
-        unsigned unbind_num_trailing_slots,
-        bool take_ownership,
-        const struct pipe_vertex_buffer *buffers)
+panfrost_set_vertex_buffers(struct pipe_context *pctx, unsigned start_slot,
+                            unsigned num_buffers,
+                            unsigned unbind_num_trailing_slots,
+                            bool take_ownership,
+                            const struct pipe_vertex_buffer *buffers)
 {
-        struct panfrost_context *ctx = pan_context(pctx);
+   struct panfrost_context *ctx = pan_context(pctx);
 
-        util_set_vertex_buffers_mask(ctx->vertex_buffers, &ctx->vb_mask, buffers,
-                                     start_slot, num_buffers, unbind_num_trailing_slots,
-                                     take_ownership);
+   util_set_vertex_buffers_mask(ctx->vertex_buffers, &ctx->vb_mask, buffers,
+                                start_slot, num_buffers,
+                                unbind_num_trailing_slots, take_ownership);
 
-        ctx->dirty |= PAN_DIRTY_VERTEX;
+   ctx->dirty |= PAN_DIRTY_VERTEX;
 }
 
 static void
-panfrost_set_constant_buffer(
-        struct pipe_context *pctx,
-        enum pipe_shader_type shader, uint index, bool take_ownership,
-        const struct pipe_constant_buffer *buf)
+panfrost_set_constant_buffer(struct pipe_context *pctx,
+                             enum pipe_shader_type shader, uint index,
+                             bool take_ownership,
+                             const struct pipe_constant_buffer *buf)
 {
-        struct panfrost_context *ctx = pan_context(pctx);
-        struct panfrost_constant_buffer *pbuf = &ctx->constant_buffer[shader];
+   struct panfrost_context *ctx = pan_context(pctx);
+   struct panfrost_constant_buffer *pbuf = &ctx->constant_buffer[shader];
 
-        util_copy_constant_buffer(&pbuf->cb[index], buf, take_ownership);
+   util_copy_constant_buffer(&pbuf->cb[index], buf, take_ownership);
 
-        unsigned mask = (1 << index);
+   unsigned mask = (1 << index);
 
-        if (unlikely(!buf)) {
-                pbuf->enabled_mask &= ~mask;
-                return;
-        }
+   if (unlikely(!buf)) {
+      pbuf->enabled_mask &= ~mask;
+      return;
+   }
 
-        pbuf->enabled_mask |= mask;
-        ctx->dirty_shader[shader] |= PAN_DIRTY_STAGE_CONST;
+   pbuf->enabled_mask |= mask;
+   ctx->dirty_shader[shader] |= PAN_DIRTY_STAGE_CONST;
 }
 
 static void
-panfrost_set_stencil_ref(
-        struct pipe_context *pctx,
-        const struct pipe_stencil_ref ref)
+panfrost_set_stencil_ref(struct pipe_context *pctx,
+                         const struct pipe_stencil_ref ref)
 {
-        struct panfrost_context *ctx = pan_context(pctx);
-        ctx->stencil_ref = ref;
-        ctx->dirty |= PAN_DIRTY_ZS;
+   struct panfrost_context *ctx = pan_context(pctx);
+   ctx->stencil_ref = ref;
+   ctx->dirty |= PAN_DIRTY_ZS;
 }
 
 static void
-panfrost_set_sampler_views(
-        struct pipe_context *pctx,
-        enum pipe_shader_type shader,
-        unsigned start_slot, unsigned num_views,
-        unsigned unbind_num_trailing_slots,
-        bool take_ownership,
-        struct pipe_sampler_view **views)
+panfrost_set_sampler_views(struct pipe_context *pctx,
+                           enum pipe_shader_type shader, unsigned start_slot,
+                           unsigned num_views,
+                           unsigned unbind_num_trailing_slots,
+                           bool take_ownership,
+                           struct pipe_sampler_view **views)
 {
-        struct panfrost_context *ctx = pan_context(pctx);
-        ctx->dirty_shader[shader] |= PAN_DIRTY_STAGE_TEXTURE;
+   struct panfrost_context *ctx = pan_context(pctx);
+   ctx->dirty_shader[shader] |= PAN_DIRTY_STAGE_TEXTURE;
 
-        unsigned new_nr = 0;
-        unsigned i;
+   unsigned new_nr = 0;
+   unsigned i;
 
-        for (i = 0; i < num_views; ++i) {
-                struct pipe_sampler_view *view = views ? views[i] : NULL;
-                unsigned p = i + start_slot;
+   for (i = 0; i < num_views; ++i) {
+      struct pipe_sampler_view *view = views ? views[i] : NULL;
+      unsigned p = i + start_slot;
 
-                if (view)
-                        new_nr = p + 1;
+      if (view)
+         new_nr = p + 1;
 
-                if (take_ownership) {
-                        pipe_sampler_view_reference((struct pipe_sampler_view **)&ctx->sampler_views[shader][p],
-                                                    NULL);
-                        ctx->sampler_views[shader][i] = (struct panfrost_sampler_view *)view;
-                } else {
-                        pipe_sampler_view_reference((struct pipe_sampler_view **)&ctx->sampler_views[shader][p],
-                                                    view);
-                }
-        }
+      if (take_ownership) {
+         pipe_sampler_view_reference(
+            (struct pipe_sampler_view **)&ctx->sampler_views[shader][p], NULL);
+         ctx->sampler_views[shader][i] = (struct panfrost_sampler_view *)view;
+      } else {
+         pipe_sampler_view_reference(
+            (struct pipe_sampler_view **)&ctx->sampler_views[shader][p], view);
+      }
+   }
 
-        for (; i < num_views + unbind_num_trailing_slots; i++) {
-                unsigned p = i + start_slot;
-		pipe_sampler_view_reference((struct pipe_sampler_view **)&ctx->sampler_views[shader][p],
-		                            NULL);
-        }
+   for (; i < num_views + unbind_num_trailing_slots; i++) {
+      unsigned p = i + start_slot;
+      pipe_sampler_view_reference(
+         (struct pipe_sampler_view **)&ctx->sampler_views[shader][p], NULL);
+   }
 
-        /* If the sampler view count is higher than the greatest sampler view
-         * we touch, it can't change */
-        if (ctx->sampler_view_count[shader] > start_slot + num_views + unbind_num_trailing_slots)
-                return;
+   /* If the sampler view count is higher than the greatest sampler view
+    * we touch, it can't change */
+   if (ctx->sampler_view_count[shader] >
+       start_slot + num_views + unbind_num_trailing_slots)
+      return;
 
-        /* If we haven't set any sampler views here, search lower numbers for
-         * set sampler views */
-        if (new_nr == 0) {
-                for (i = 0; i < start_slot; ++i) {
-                        if (ctx->sampler_views[shader][i])
-                                new_nr = i + 1;
-                }
-        }
+   /* If we haven't set any sampler views here, search lower numbers for
+    * set sampler views */
+   if (new_nr == 0) {
+      for (i = 0; i < start_slot; ++i) {
+         if (ctx->sampler_views[shader][i])
+            new_nr = i + 1;
+      }
+   }
 
-        ctx->sampler_view_count[shader] = new_nr;
+   ctx->sampler_view_count[shader] = new_nr;
 }
 
 static void
-panfrost_set_shader_buffers(
-        struct pipe_context *pctx,
-        enum pipe_shader_type shader,
-        unsigned start, unsigned count,
-        const struct pipe_shader_buffer *buffers,
-        unsigned writable_bitmask)
+panfrost_set_shader_buffers(struct pipe_context *pctx,
+                            enum pipe_shader_type shader, unsigned start,
+                            unsigned count,
+                            const struct pipe_shader_buffer *buffers,
+                            unsigned writable_bitmask)
 {
-        struct panfrost_context *ctx = pan_context(pctx);
+   struct panfrost_context *ctx = pan_context(pctx);
 
-        util_set_shader_buffers_mask(ctx->ssbo[shader], &ctx->ssbo_mask[shader],
-                        buffers, start, count);
+   util_set_shader_buffers_mask(ctx->ssbo[shader], &ctx->ssbo_mask[shader],
+                                buffers, start, count);
 
-        ctx->dirty_shader[shader] |= PAN_DIRTY_STAGE_SSBO;
+   ctx->dirty_shader[shader] |= PAN_DIRTY_STAGE_SSBO;
 }
 
 static void
 panfrost_set_framebuffer_state(struct pipe_context *pctx,
                                const struct pipe_framebuffer_state *fb)
 {
-        struct panfrost_context *ctx = pan_context(pctx);
+   struct panfrost_context *ctx = pan_context(pctx);
 
-        util_copy_framebuffer_state(&ctx->pipe_framebuffer, fb);
-        ctx->batch = NULL;
+   util_copy_framebuffer_state(&ctx->pipe_framebuffer, fb);
+   ctx->batch = NULL;
 
-        /* Hot draw call path needs the mask of active render targets */
-        ctx->fb_rt_mask = 0;
+   /* Hot draw call path needs the mask of active render targets */
+   ctx->fb_rt_mask = 0;
 
-        for (unsigned i = 0; i < ctx->pipe_framebuffer.nr_cbufs; ++i) {
-                if (ctx->pipe_framebuffer.cbufs[i])
-                        ctx->fb_rt_mask |= BITFIELD_BIT(i);
-        }
+   for (unsigned i = 0; i < ctx->pipe_framebuffer.nr_cbufs; ++i) {
+      if (ctx->pipe_framebuffer.cbufs[i])
+         ctx->fb_rt_mask |= BITFIELD_BIT(i);
+   }
 }
 
 static void
-panfrost_bind_depth_stencil_state(struct pipe_context *pipe,
-                                  void *cso)
+panfrost_bind_depth_stencil_state(struct pipe_context *pipe, void *cso)
 {
-        struct panfrost_context *ctx = pan_context(pipe);
-        ctx->depth_stencil = cso;
-        ctx->dirty |= PAN_DIRTY_ZS;
+   struct panfrost_context *ctx = pan_context(pipe);
+   ctx->depth_stencil = cso;
+   ctx->dirty |= PAN_DIRTY_ZS;
 }
 
 static void
-panfrost_set_sample_mask(struct pipe_context *pipe,
-                         unsigned sample_mask)
+panfrost_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
 {
-        struct panfrost_context *ctx = pan_context(pipe);
-        ctx->sample_mask = sample_mask;
-        ctx->dirty |= PAN_DIRTY_MSAA;
+   struct panfrost_context *ctx = pan_context(pipe);
+   ctx->sample_mask = sample_mask;
+   ctx->dirty |= PAN_DIRTY_MSAA;
 }
 
 static void
-panfrost_set_min_samples(struct pipe_context *pipe,
-                         unsigned min_samples)
+panfrost_set_min_samples(struct pipe_context *pipe, unsigned min_samples)
 {
-        struct panfrost_context *ctx = pan_context(pipe);
-        ctx->min_samples = min_samples;
-        ctx->dirty |= PAN_DIRTY_MSAA;
+   struct panfrost_context *ctx = pan_context(pipe);
+   ctx->min_samples = min_samples;
+   ctx->dirty |= PAN_DIRTY_MSAA;
 }
 
 static void
 panfrost_set_clip_state(struct pipe_context *pipe,
                         const struct pipe_clip_state *clip)
 {
-        //struct panfrost_context *panfrost = pan_context(pipe);
+   // struct panfrost_context *panfrost = pan_context(pipe);
 }
 
 static void
-panfrost_set_viewport_states(struct pipe_context *pipe,
-                             unsigned start_slot,
+panfrost_set_viewport_states(struct pipe_context *pipe, unsigned start_slot,
                              unsigned num_viewports,
                              const struct pipe_viewport_state *viewports)
 {
-        struct panfrost_context *ctx = pan_context(pipe);
+   struct panfrost_context *ctx = pan_context(pipe);
 
-        assert(start_slot == 0);
-        assert(num_viewports == 1);
+   assert(start_slot == 0);
+   assert(num_viewports == 1);
 
-        ctx->pipe_viewport = *viewports;
-        ctx->dirty |= PAN_DIRTY_VIEWPORT;
+   ctx->pipe_viewport = *viewports;
+   ctx->dirty |= PAN_DIRTY_VIEWPORT;
 }
 
 static void
-panfrost_set_scissor_states(struct pipe_context *pipe,
-                            unsigned start_slot,
+panfrost_set_scissor_states(struct pipe_context *pipe, unsigned start_slot,
                             unsigned num_scissors,
                             const struct pipe_scissor_state *scissors)
 {
-        struct panfrost_context *ctx = pan_context(pipe);
+   struct panfrost_context *ctx = pan_context(pipe);
 
-        assert(start_slot == 0);
-        assert(num_scissors == 1);
+   assert(start_slot == 0);
+   assert(num_scissors == 1);
 
-        ctx->scissor = *scissors;
-        ctx->dirty |= PAN_DIRTY_SCISSOR;
+   ctx->scissor = *scissors;
+   ctx->dirty |= PAN_DIRTY_SCISSOR;
 }
 
 static void
 panfrost_set_polygon_stipple(struct pipe_context *pipe,
                              const struct pipe_poly_stipple *stipple)
 {
-        //struct panfrost_context *panfrost = pan_context(pipe);
+   // struct panfrost_context *panfrost = pan_context(pipe);
 }
 
 static void
-panfrost_set_active_query_state(struct pipe_context *pipe,
-                                bool enable)
+panfrost_set_active_query_state(struct pipe_context *pipe, bool enable)
 {
-        struct panfrost_context *ctx = pan_context(pipe);
-        ctx->active_queries = enable;
-        ctx->dirty |= PAN_DIRTY_OQ;
+   struct panfrost_context *ctx = pan_context(pipe);
+   ctx->active_queries = enable;
+   ctx->dirty |= PAN_DIRTY_OQ;
 }
 
 static void
-panfrost_render_condition(struct pipe_context *pipe,
-                          struct pipe_query *query,
-                          bool condition,
-                          enum pipe_render_cond_flag mode)
+panfrost_render_condition(struct pipe_context *pipe, struct pipe_query *query,
+                          bool condition, enum pipe_render_cond_flag mode)
 {
-        struct panfrost_context *ctx = pan_context(pipe);
+   struct panfrost_context *ctx = pan_context(pipe);
 
-        ctx->cond_query = (struct panfrost_query *)query;
-        ctx->cond_cond = condition;
-        ctx->cond_mode = mode;
+   ctx->cond_query = (struct panfrost_query *)query;
+   ctx->cond_cond = condition;
+   ctx->cond_mode = mode;
 }
 
 static void
 panfrost_destroy(struct pipe_context *pipe)
 {
-        struct panfrost_context *panfrost = pan_context(pipe);
-        struct panfrost_device *dev = pan_device(pipe->screen);
+   struct panfrost_context *panfrost = pan_context(pipe);
+   struct panfrost_device *dev = pan_device(pipe->screen);
 
-        _mesa_hash_table_destroy(panfrost->writers, NULL);
+   _mesa_hash_table_destroy(panfrost->writers, NULL);
 
-        if (panfrost->blitter)
-                util_blitter_destroy(panfrost->blitter);
+   if (panfrost->blitter)
+      util_blitter_destroy(panfrost->blitter);
 
-        util_unreference_framebuffer_state(&panfrost->pipe_framebuffer);
-        u_upload_destroy(pipe->stream_uploader);
+   util_unreference_framebuffer_state(&panfrost->pipe_framebuffer);
+   u_upload_destroy(pipe->stream_uploader);
 
-        panfrost_pool_cleanup(&panfrost->descs);
-        panfrost_pool_cleanup(&panfrost->shaders);
+   panfrost_pool_cleanup(&panfrost->descs);
+   panfrost_pool_cleanup(&panfrost->shaders);
 
-        drmSyncobjDestroy(dev->fd, panfrost->in_sync_obj);
-        if (panfrost->in_sync_fd != -1)
-                close(panfrost->in_sync_fd);
+   drmSyncobjDestroy(dev->fd, panfrost->in_sync_obj);
+   if (panfrost->in_sync_fd != -1)
+      close(panfrost->in_sync_fd);
 
-        drmSyncobjDestroy(dev->fd, panfrost->syncobj);
-        ralloc_free(pipe);
+   drmSyncobjDestroy(dev->fd, panfrost->syncobj);
+   ralloc_free(pipe);
 }
 
 static struct pipe_query *
-panfrost_create_query(struct pipe_context *pipe,
-                      unsigned type,
-                      unsigned index)
+panfrost_create_query(struct pipe_context *pipe, unsigned type, unsigned index)
 {
-        struct panfrost_query *q = rzalloc(pipe, struct panfrost_query);
+   struct panfrost_query *q = rzalloc(pipe, struct panfrost_query);
 
-        q->type = type;
-        q->index = index;
+   q->type = type;
+   q->index = index;
 
-        return (struct pipe_query *) q;
+   return (struct pipe_query *)q;
 }
 
 static void
 panfrost_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
 {
-        struct panfrost_query *query = (struct panfrost_query *) q;
+   struct panfrost_query *query = (struct panfrost_query *)q;
 
-        if (query->rsrc)
-                pipe_resource_reference(&query->rsrc, NULL);
+   if (query->rsrc)
+      pipe_resource_reference(&query->rsrc, NULL);
 
-        ralloc_free(q);
+   ralloc_free(q);
 }
 
 static bool
 panfrost_begin_query(struct pipe_context *pipe, struct pipe_query *q)
 {
-        struct panfrost_context *ctx = pan_context(pipe);
-        struct panfrost_device *dev = pan_device(ctx->base.screen);
-        struct panfrost_query *query = (struct panfrost_query *) q;
+   struct panfrost_context *ctx = pan_context(pipe);
+   struct panfrost_device *dev = pan_device(ctx->base.screen);
+   struct panfrost_query *query = (struct panfrost_query *)q;
 
-        switch (query->type) {
-        case PIPE_QUERY_OCCLUSION_COUNTER:
-        case PIPE_QUERY_OCCLUSION_PREDICATE:
-        case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: {
-                unsigned size = sizeof(uint64_t) * dev->core_id_range;
+   switch (query->type) {
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: {
+      unsigned size = sizeof(uint64_t) * dev->core_id_range;
 
-                /* Allocate a resource for the query results to be stored */
-                if (!query->rsrc) {
-                        query->rsrc = pipe_buffer_create(ctx->base.screen,
-                                        PIPE_BIND_QUERY_BUFFER, 0, size);
-                }
+      /* Allocate a resource for the query results to be stored */
+      if (!query->rsrc) {
+         query->rsrc = pipe_buffer_create(ctx->base.screen,
+                                          PIPE_BIND_QUERY_BUFFER, 0, size);
+      }
 
-                /* Default to 0 if nothing at all drawn. */
-                uint8_t *zeroes = alloca(size);
-                memset(zeroes, 0, size);
-                pipe_buffer_write(pipe, query->rsrc, 0, size, zeroes);
+      /* Default to 0 if nothing at all drawn. */
+      uint8_t *zeroes = alloca(size);
+      memset(zeroes, 0, size);
+      pipe_buffer_write(pipe, query->rsrc, 0, size, zeroes);
 
-                query->msaa = (ctx->pipe_framebuffer.samples > 1);
-                ctx->occlusion_query = query;
-                ctx->dirty |= PAN_DIRTY_OQ;
-                break;
-        }
+      query->msaa = (ctx->pipe_framebuffer.samples > 1);
+      ctx->occlusion_query = query;
+      ctx->dirty |= PAN_DIRTY_OQ;
+      break;
+   }
 
-        /* Geometry statistics are computed in the driver. XXX: geom/tess
-         * shaders.. */
+      /* Geometry statistics are computed in the driver. XXX: geom/tess
+       * shaders.. */
 
-        case PIPE_QUERY_PRIMITIVES_GENERATED:
-                query->start = ctx->prims_generated;
-                break;
-        case PIPE_QUERY_PRIMITIVES_EMITTED:
-                query->start = ctx->tf_prims_generated;
-                break;
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+      query->start = ctx->prims_generated;
+      break;
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+      query->start = ctx->tf_prims_generated;
+      break;
 
-        case PAN_QUERY_DRAW_CALLS:
-                query->start = ctx->draw_calls;
-                break;
+   case PAN_QUERY_DRAW_CALLS:
+      query->start = ctx->draw_calls;
+      break;
 
-        default:
-                /* TODO: timestamp queries, etc? */
-                break;
-        }
+   default:
+      /* TODO: timestamp queries, etc? */
+      break;
+   }
 
-        return true;
+   return true;
 }
 
 static bool
 panfrost_end_query(struct pipe_context *pipe, struct pipe_query *q)
 {
-        struct panfrost_context *ctx = pan_context(pipe);
-        struct panfrost_query *query = (struct panfrost_query *) q;
+   struct panfrost_context *ctx = pan_context(pipe);
+   struct panfrost_query *query = (struct panfrost_query *)q;
 
-        switch (query->type) {
-        case PIPE_QUERY_OCCLUSION_COUNTER:
-        case PIPE_QUERY_OCCLUSION_PREDICATE:
-        case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
-                ctx->occlusion_query = NULL;
-                ctx->dirty |= PAN_DIRTY_OQ;
-                break;
-        case PIPE_QUERY_PRIMITIVES_GENERATED:
-                query->end = ctx->prims_generated;
-                break;
-        case PIPE_QUERY_PRIMITIVES_EMITTED:
-                query->end = ctx->tf_prims_generated;
-                break;
-        case PAN_QUERY_DRAW_CALLS:
-                query->end = ctx->draw_calls;
-                break;
-        }
+   switch (query->type) {
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+      ctx->occlusion_query = NULL;
+      ctx->dirty |= PAN_DIRTY_OQ;
+      break;
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+      query->end = ctx->prims_generated;
+      break;
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+      query->end = ctx->tf_prims_generated;
+      break;
+   case PAN_QUERY_DRAW_CALLS:
+      query->end = ctx->draw_calls;
+      break;
+   }
 
-        return true;
+   return true;
 }
 
 static bool
-panfrost_get_query_result(struct pipe_context *pipe,
-                          struct pipe_query *q,
-                          bool wait,
-                          union pipe_query_result *vresult)
+panfrost_get_query_result(struct pipe_context *pipe, struct pipe_query *q,
+                          bool wait, union pipe_query_result *vresult)
 {
-        struct panfrost_query *query = (struct panfrost_query *) q;
-        struct panfrost_context *ctx = pan_context(pipe);
-        struct panfrost_device *dev = pan_device(ctx->base.screen);
-        struct panfrost_resource *rsrc = pan_resource(query->rsrc);
+   struct panfrost_query *query = (struct panfrost_query *)q;
+   struct panfrost_context *ctx = pan_context(pipe);
+   struct panfrost_device *dev = pan_device(ctx->base.screen);
+   struct panfrost_resource *rsrc = pan_resource(query->rsrc);
 
-        switch (query->type) {
-        case PIPE_QUERY_OCCLUSION_COUNTER:
-        case PIPE_QUERY_OCCLUSION_PREDICATE:
-        case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
-                panfrost_flush_writer(ctx, rsrc, "Occlusion query");
-                panfrost_bo_wait(rsrc->image.data.bo, INT64_MAX, false);
+   switch (query->type) {
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+      panfrost_flush_writer(ctx, rsrc, "Occlusion query");
+      panfrost_bo_wait(rsrc->image.data.bo, INT64_MAX, false);
 
-                /* Read back the query results */
-                uint64_t *result = (uint64_t *) rsrc->image.data.bo->ptr.cpu;
+      /* Read back the query results */
+      uint64_t *result = (uint64_t *)rsrc->image.data.bo->ptr.cpu;
 
-                if (query->type == PIPE_QUERY_OCCLUSION_COUNTER) {
-                        uint64_t passed = 0;
-                        for (int i = 0; i < dev->core_id_range; ++i)
-                                passed += result[i];
+      if (query->type == PIPE_QUERY_OCCLUSION_COUNTER) {
+         uint64_t passed = 0;
+         for (int i = 0; i < dev->core_id_range; ++i)
+            passed += result[i];
 
-                        if (dev->arch <= 5 && !query->msaa)
-                                passed /= 4;
+         if (dev->arch <= 5 && !query->msaa)
+            passed /= 4;
 
-                        vresult->u64 = passed;
-                } else {
-                        vresult->b = !!result[0];
-                }
+         vresult->u64 = passed;
+      } else {
+         vresult->b = !!result[0];
+      }
 
-                break;
+      break;
 
-        case PIPE_QUERY_PRIMITIVES_GENERATED:
-        case PIPE_QUERY_PRIMITIVES_EMITTED:
-                panfrost_flush_all_batches(ctx, "Primitive count query");
-                vresult->u64 = query->end - query->start;
-                break;
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+      panfrost_flush_all_batches(ctx, "Primitive count query");
+      vresult->u64 = query->end - query->start;
+      break;
 
-        case PAN_QUERY_DRAW_CALLS:
-                vresult->u64 = query->end - query->start;
-                break;
+   case PAN_QUERY_DRAW_CALLS:
+      vresult->u64 = query->end - query->start;
+      break;
 
-        default:
-                /* TODO: more queries */
-                break;
-        }
+   default:
+      /* TODO: more queries */
+      break;
+   }
 
-        return true;
+   return true;
 }
 
 bool
 panfrost_render_condition_check(struct panfrost_context *ctx)
 {
-	if (!ctx->cond_query)
-		return true;
+   if (!ctx->cond_query)
+      return true;
 
-        perf_debug_ctx(ctx, "Implementing conditional rendering on the CPU");
+   perf_debug_ctx(ctx, "Implementing conditional rendering on the CPU");
 
-	union pipe_query_result res = { 0 };
-	bool wait =
-		ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT &&
-		ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
+   union pipe_query_result res = {0};
+   bool wait = ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT &&
+               ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
 
-        struct pipe_query *pq = (struct pipe_query *)ctx->cond_query;
+   struct pipe_query *pq = (struct pipe_query *)ctx->cond_query;
 
-        if (panfrost_get_query_result(&ctx->base, pq, wait, &res))
-                return res.u64 != ctx->cond_cond;
+   if (panfrost_get_query_result(&ctx->base, pq, wait, &res))
+      return res.u64 != ctx->cond_cond;
 
-	return true;
+   return true;
 }
 
 static struct pipe_stream_output_target *
@@ -779,29 +746,29 @@ panfrost_create_stream_output_target(struct pipe_context *pctx,
                                      unsigned buffer_offset,
                                      unsigned buffer_size)
 {
-        struct pipe_stream_output_target *target;
+   struct pipe_stream_output_target *target;
 
-        target = &rzalloc(pctx, struct panfrost_streamout_target)->base;
+   target = &rzalloc(pctx, struct panfrost_streamout_target)->base;
 
-        if (!target)
-                return NULL;
+   if (!target)
+      return NULL;
 
-        pipe_reference_init(&target->reference, 1);
-        pipe_resource_reference(&target->buffer, prsc);
+   pipe_reference_init(&target->reference, 1);
+   pipe_resource_reference(&target->buffer, prsc);
 
-        target->context = pctx;
-        target->buffer_offset = buffer_offset;
-        target->buffer_size = buffer_size;
+   target->context = pctx;
+   target->buffer_offset = buffer_offset;
+   target->buffer_size = buffer_size;
 
-        return target;
+   return target;
 }
 
 static void
 panfrost_stream_output_target_destroy(struct pipe_context *pctx,
                                       struct pipe_stream_output_target *target)
 {
-        pipe_resource_reference(&target->buffer, NULL);
-        ralloc_free(target);
+   pipe_resource_reference(&target->buffer, NULL);
+   ralloc_free(target);
 }
 
 static void
@@ -810,200 +777,200 @@ panfrost_set_stream_output_targets(struct pipe_context *pctx,
                                    struct pipe_stream_output_target **targets,
                                    const unsigned *offsets)
 {
-        struct panfrost_context *ctx = pan_context(pctx);
-        struct panfrost_streamout *so = &ctx->streamout;
+   struct panfrost_context *ctx = pan_context(pctx);
+   struct panfrost_streamout *so = &ctx->streamout;
 
-        assert(num_targets <= ARRAY_SIZE(so->targets));
+   assert(num_targets <= ARRAY_SIZE(so->targets));
 
-        for (unsigned i = 0; i < num_targets; i++) {
-                if (targets[i] && offsets[i] != -1)
-                        pan_so_target(targets[i])->offset = offsets[i];
+   for (unsigned i = 0; i < num_targets; i++) {
+      if (targets[i] && offsets[i] != -1)
+         pan_so_target(targets[i])->offset = offsets[i];
 
-                pipe_so_target_reference(&so->targets[i], targets[i]);
-        }
+      pipe_so_target_reference(&so->targets[i], targets[i]);
+   }
 
-        for (unsigned i = num_targets; i < so->num_targets; i++)
-                pipe_so_target_reference(&so->targets[i], NULL);
+   for (unsigned i = num_targets; i < so->num_targets; i++)
+      pipe_so_target_reference(&so->targets[i], NULL);
 
-        so->num_targets = num_targets;
-        ctx->dirty |= PAN_DIRTY_SO;
+   so->num_targets = num_targets;
+   ctx->dirty |= PAN_DIRTY_SO;
 }
 
 static void
-panfrost_set_global_binding(struct pipe_context *pctx,
-                      unsigned first, unsigned count,
-                      struct pipe_resource **resources,
-                      uint32_t **handles)
+panfrost_set_global_binding(struct pipe_context *pctx, unsigned first,
+                            unsigned count, struct pipe_resource **resources,
+                            uint32_t **handles)
 {
-        if (!resources)
-                return;
+   if (!resources)
+      return;
 
-        struct panfrost_context *ctx = pan_context(pctx);
-        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
+   struct panfrost_context *ctx = pan_context(pctx);
+   struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 
-        for (unsigned i = first; i < first + count; ++i) {
-                struct panfrost_resource *rsrc = pan_resource(resources[i]);
-                panfrost_batch_write_rsrc(batch, rsrc, PIPE_SHADER_COMPUTE);
+   for (unsigned i = first; i < first + count; ++i) {
+      struct panfrost_resource *rsrc = pan_resource(resources[i]);
+      panfrost_batch_write_rsrc(batch, rsrc, PIPE_SHADER_COMPUTE);
 
-                util_range_add(&rsrc->base, &rsrc->valid_buffer_range,
-                                0, rsrc->base.width0);
+      util_range_add(&rsrc->base, &rsrc->valid_buffer_range, 0,
+                     rsrc->base.width0);
 
-                /* The handle points to uint32_t, but space is allocated for 64
-                 * bits. We need to respect the offset passed in. This interface
-                 * is so bad.
-                 */
-                mali_ptr addr = 0;
-                static_assert(sizeof(addr) == 8, "size out of sync");
+      /* The handle points to uint32_t, but space is allocated for 64
+       * bits. We need to respect the offset passed in. This interface
+       * is so bad.
+       */
+      mali_ptr addr = 0;
+      static_assert(sizeof(addr) == 8, "size out of sync");
 
-                memcpy(&addr, handles[i], sizeof(addr));
-                addr += rsrc->image.data.bo->ptr.gpu;
+      memcpy(&addr, handles[i], sizeof(addr));
+      addr += rsrc->image.data.bo->ptr.gpu;
 
-                memcpy(handles[i], &addr, sizeof(addr));
-        }
+      memcpy(handles[i], &addr, sizeof(addr));
+   }
 }
 
 static void
 panfrost_memory_barrier(struct pipe_context *pctx, unsigned flags)
 {
-        /* TODO: Be smart and only flush the minimum needed, maybe emitting a
-         * cache flush job if that would help */
-        panfrost_flush_all_batches(pan_context(pctx), "Memory barrier");
+   /* TODO: Be smart and only flush the minimum needed, maybe emitting a
+    * cache flush job if that would help */
+   panfrost_flush_all_batches(pan_context(pctx), "Memory barrier");
 }
 
 static void
 panfrost_create_fence_fd(struct pipe_context *pctx,
-                         struct pipe_fence_handle **pfence,
-                         int fd, enum pipe_fd_type type)
+                         struct pipe_fence_handle **pfence, int fd,
+                         enum pipe_fd_type type)
 {
-        *pfence = panfrost_fence_from_fd(pan_context(pctx), fd, type);
+   *pfence = panfrost_fence_from_fd(pan_context(pctx), fd, type);
 }
 
 static void
 panfrost_fence_server_sync(struct pipe_context *pctx,
                            struct pipe_fence_handle *f)
 {
-        struct panfrost_device *dev = pan_device(pctx->screen);
-        struct panfrost_context *ctx = pan_context(pctx);
-        int fd = -1, ret;
+   struct panfrost_device *dev = pan_device(pctx->screen);
+   struct panfrost_context *ctx = pan_context(pctx);
+   int fd = -1, ret;
 
-        ret = drmSyncobjExportSyncFile(dev->fd, f->syncobj, &fd);
-        assert(!ret);
+   ret = drmSyncobjExportSyncFile(dev->fd, f->syncobj, &fd);
+   assert(!ret);
 
-        sync_accumulate("panfrost", &ctx->in_sync_fd, fd);
-        close(fd);
+   sync_accumulate("panfrost", &ctx->in_sync_fd, fd);
+   close(fd);
 }
 
 struct pipe_context *
 panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
 {
-        struct panfrost_context *ctx = rzalloc(NULL, struct panfrost_context);
-        struct pipe_context *gallium = (struct pipe_context *) ctx;
-        struct panfrost_device *dev = pan_device(screen);
+   struct panfrost_context *ctx = rzalloc(NULL, struct panfrost_context);
+   struct pipe_context *gallium = (struct pipe_context *)ctx;
+   struct panfrost_device *dev = pan_device(screen);
 
-        gallium->screen = screen;
+   gallium->screen = screen;
 
-        gallium->destroy = panfrost_destroy;
+   gallium->destroy = panfrost_destroy;
 
-        gallium->set_framebuffer_state = panfrost_set_framebuffer_state;
-        gallium->set_debug_callback = u_default_set_debug_callback;
+   gallium->set_framebuffer_state = panfrost_set_framebuffer_state;
+   gallium->set_debug_callback = u_default_set_debug_callback;
 
-        gallium->create_fence_fd = panfrost_create_fence_fd;
-        gallium->fence_server_sync = panfrost_fence_server_sync;
+   gallium->create_fence_fd = panfrost_create_fence_fd;
+   gallium->fence_server_sync = panfrost_fence_server_sync;
 
-        gallium->flush = panfrost_flush;
-        gallium->clear = panfrost_clear;
-        gallium->clear_texture = util_clear_texture;
-        gallium->texture_barrier = panfrost_texture_barrier;
-        gallium->set_frontend_noop = panfrost_set_frontend_noop;
+   gallium->flush = panfrost_flush;
+   gallium->clear = panfrost_clear;
+   gallium->clear_texture = util_clear_texture;
+   gallium->texture_barrier = panfrost_texture_barrier;
+   gallium->set_frontend_noop = panfrost_set_frontend_noop;
 
-        gallium->set_vertex_buffers = panfrost_set_vertex_buffers;
-        gallium->set_constant_buffer = panfrost_set_constant_buffer;
-        gallium->set_shader_buffers = panfrost_set_shader_buffers;
-        gallium->set_shader_images = panfrost_set_shader_images;
+   gallium->set_vertex_buffers = panfrost_set_vertex_buffers;
+   gallium->set_constant_buffer = panfrost_set_constant_buffer;
+   gallium->set_shader_buffers = panfrost_set_shader_buffers;
+   gallium->set_shader_images = panfrost_set_shader_images;
 
-        gallium->set_stencil_ref = panfrost_set_stencil_ref;
+   gallium->set_stencil_ref = panfrost_set_stencil_ref;
 
-        gallium->set_sampler_views = panfrost_set_sampler_views;
+   gallium->set_sampler_views = panfrost_set_sampler_views;
 
-        gallium->bind_rasterizer_state = panfrost_bind_rasterizer_state;
-        gallium->delete_rasterizer_state = panfrost_generic_cso_delete;
+   gallium->bind_rasterizer_state = panfrost_bind_rasterizer_state;
+   gallium->delete_rasterizer_state = panfrost_generic_cso_delete;
 
-        gallium->bind_vertex_elements_state = panfrost_bind_vertex_elements_state;
-        gallium->delete_vertex_elements_state = panfrost_generic_cso_delete;
+   gallium->bind_vertex_elements_state = panfrost_bind_vertex_elements_state;
+   gallium->delete_vertex_elements_state = panfrost_generic_cso_delete;
 
-        gallium->delete_sampler_state = panfrost_generic_cso_delete;
-        gallium->bind_sampler_states = panfrost_bind_sampler_states;
+   gallium->delete_sampler_state = panfrost_generic_cso_delete;
+   gallium->bind_sampler_states = panfrost_bind_sampler_states;
 
-        gallium->bind_depth_stencil_alpha_state   = panfrost_bind_depth_stencil_state;
-        gallium->delete_depth_stencil_alpha_state = panfrost_generic_cso_delete;
+   gallium->bind_depth_stencil_alpha_state = panfrost_bind_depth_stencil_state;
+   gallium->delete_depth_stencil_alpha_state = panfrost_generic_cso_delete;
 
-        gallium->set_sample_mask = panfrost_set_sample_mask;
-        gallium->set_min_samples = panfrost_set_min_samples;
+   gallium->set_sample_mask = panfrost_set_sample_mask;
+   gallium->set_min_samples = panfrost_set_min_samples;
 
-        gallium->set_clip_state = panfrost_set_clip_state;
-        gallium->set_viewport_states = panfrost_set_viewport_states;
-        gallium->set_scissor_states = panfrost_set_scissor_states;
-        gallium->set_polygon_stipple = panfrost_set_polygon_stipple;
-        gallium->set_active_query_state = panfrost_set_active_query_state;
-        gallium->render_condition = panfrost_render_condition;
+   gallium->set_clip_state = panfrost_set_clip_state;
+   gallium->set_viewport_states = panfrost_set_viewport_states;
+   gallium->set_scissor_states = panfrost_set_scissor_states;
+   gallium->set_polygon_stipple = panfrost_set_polygon_stipple;
+   gallium->set_active_query_state = panfrost_set_active_query_state;
+   gallium->render_condition = panfrost_render_condition;
 
-        gallium->create_query = panfrost_create_query;
-        gallium->destroy_query = panfrost_destroy_query;
-        gallium->begin_query = panfrost_begin_query;
-        gallium->end_query = panfrost_end_query;
-        gallium->get_query_result = panfrost_get_query_result;
+   gallium->create_query = panfrost_create_query;
+   gallium->destroy_query = panfrost_destroy_query;
+   gallium->begin_query = panfrost_begin_query;
+   gallium->end_query = panfrost_end_query;
+   gallium->get_query_result = panfrost_get_query_result;
 
-        gallium->create_stream_output_target = panfrost_create_stream_output_target;
-        gallium->stream_output_target_destroy = panfrost_stream_output_target_destroy;
-        gallium->set_stream_output_targets = panfrost_set_stream_output_targets;
+   gallium->create_stream_output_target = panfrost_create_stream_output_target;
+   gallium->stream_output_target_destroy =
+      panfrost_stream_output_target_destroy;
+   gallium->set_stream_output_targets = panfrost_set_stream_output_targets;
 
-        gallium->bind_blend_state   = panfrost_bind_blend_state;
-        gallium->delete_blend_state = panfrost_generic_cso_delete;
+   gallium->bind_blend_state = panfrost_bind_blend_state;
+   gallium->delete_blend_state = panfrost_generic_cso_delete;
 
-        gallium->set_blend_color = panfrost_set_blend_color;
+   gallium->set_blend_color = panfrost_set_blend_color;
 
-        gallium->set_global_binding = panfrost_set_global_binding;
-        gallium->memory_barrier = panfrost_memory_barrier;
+   gallium->set_global_binding = panfrost_set_global_binding;
+   gallium->memory_barrier = panfrost_memory_barrier;
 
-        pan_screen(screen)->vtbl.context_init(gallium);
+   pan_screen(screen)->vtbl.context_init(gallium);
 
-        panfrost_resource_context_init(gallium);
-        panfrost_shader_context_init(gallium);
+   panfrost_resource_context_init(gallium);
+   panfrost_shader_context_init(gallium);
 
-        gallium->stream_uploader = u_upload_create_default(gallium);
-        gallium->const_uploader = gallium->stream_uploader;
+   gallium->stream_uploader = u_upload_create_default(gallium);
+   gallium->const_uploader = gallium->stream_uploader;
 
-        panfrost_pool_init(&ctx->descs, ctx, dev,
-                        0, 4096, "Descriptors", true, false);
+   panfrost_pool_init(&ctx->descs, ctx, dev, 0, 4096, "Descriptors", true,
+                      false);
 
-        panfrost_pool_init(&ctx->shaders, ctx, dev,
-                        PAN_BO_EXECUTE, 4096, "Shaders", true, false);
+   panfrost_pool_init(&ctx->shaders, ctx, dev, PAN_BO_EXECUTE, 4096, "Shaders",
+                      true, false);
 
-        ctx->blitter = util_blitter_create(gallium);
+   ctx->blitter = util_blitter_create(gallium);
 
-        ctx->writers = _mesa_hash_table_create(gallium, _mesa_hash_pointer,
-                                                        _mesa_key_pointer_equal);
+   ctx->writers = _mesa_hash_table_create(gallium, _mesa_hash_pointer,
+                                          _mesa_key_pointer_equal);
 
-        assert(ctx->blitter);
+   assert(ctx->blitter);
 
-        /* Prepare for render! */
+   /* Prepare for render! */
 
-        /* By default mask everything on */
-        ctx->sample_mask = ~0;
-        ctx->active_queries = true;
+   /* By default mask everything on */
+   ctx->sample_mask = ~0;
+   ctx->active_queries = true;
 
-        int ASSERTED ret;
+   int ASSERTED ret;
 
-        /* Create a syncobj in a signaled state. Will be updated to point to the
-         * last queued job out_sync every time we submit a new job.
-         */
-        ret = drmSyncobjCreate(dev->fd, DRM_SYNCOBJ_CREATE_SIGNALED, &ctx->syncobj);
-        assert(!ret && ctx->syncobj);
+   /* Create a syncobj in a signaled state. Will be updated to point to the
+    * last queued job out_sync every time we submit a new job.
+    */
+   ret = drmSyncobjCreate(dev->fd, DRM_SYNCOBJ_CREATE_SIGNALED, &ctx->syncobj);
+   assert(!ret && ctx->syncobj);
 
-        /* Sync object/FD used for NATIVE_FENCE_FD. */
-        ctx->in_sync_fd = -1;
-        ret = drmSyncobjCreate(dev->fd, 0, &ctx->in_sync_obj);
-        assert(!ret);
+   /* Sync object/FD used for NATIVE_FENCE_FD. */
+   ctx->in_sync_fd = -1;
+   ret = drmSyncobjCreate(dev->fd, 0, &ctx->in_sync_obj);
+   assert(!ret);
 
-        return gallium;
+   return gallium;
 }
diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h
index e202371c42e..4bc57521649 100644
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@@ -26,206 +26,207 @@
 #define __BUILDER_H__
 
 #define _LARGEFILE64_SOURCE 1
-#include <sys/mman.h>
 #include <assert.h>
-#include "pan_resource.h"
-#include "pan_job.h"
+#include <sys/mman.h>
 #include "pan_blend_cso.h"
-#include "pan_encoder.h"
-#include "pan_texture.h"
 #include "pan_earlyzs.h"
+#include "pan_encoder.h"
+#include "pan_job.h"
+#include "pan_resource.h"
+#include "pan_texture.h"
 
 #include "pipe/p_compiler.h"
-#include "util/detect.h"
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
-#include "util/format/u_formats.h"
 #include "pipe/p_screen.h"
 #include "pipe/p_state.h"
-#include "util/u_blitter.h"
+#include "util/detect.h"
+#include "util/format/u_formats.h"
 #include "util/hash_table.h"
 #include "util/simple_mtx.h"
+#include "util/u_blitter.h"
 
-#include "midgard/midgard_compile.h"
 #include "compiler/shader_enums.h"
+#include "midgard/midgard_compile.h"
 
-#define SET_BIT(lval, bit, cond) \
-	if (cond) \
-		lval |= (bit); \
-	else \
-		lval &= ~(bit);
+#define SET_BIT(lval, bit, cond)                                               \
+   if (cond)                                                                   \
+      lval |= (bit);                                                           \
+   else                                                                        \
+      lval &= ~(bit);
 
 /* Dirty tracking flags. 3D is for general 3D state. Shader flags are
  * per-stage. Renderer refers to Renderer State Descriptors. Vertex refers to
  * vertex attributes/elements. */
 
 enum pan_dirty_3d {
-        PAN_DIRTY_VIEWPORT       = BITFIELD_BIT(0),
-        PAN_DIRTY_SCISSOR        = BITFIELD_BIT(1),
-        PAN_DIRTY_VERTEX         = BITFIELD_BIT(2),
-        PAN_DIRTY_PARAMS         = BITFIELD_BIT(3),
-        PAN_DIRTY_DRAWID         = BITFIELD_BIT(4),
-        PAN_DIRTY_TLS_SIZE       = BITFIELD_BIT(5),
-        PAN_DIRTY_ZS             = BITFIELD_BIT(6),
-        PAN_DIRTY_BLEND          = BITFIELD_BIT(7),
-        PAN_DIRTY_MSAA           = BITFIELD_BIT(8),
-        PAN_DIRTY_OQ             = BITFIELD_BIT(9),
-        PAN_DIRTY_RASTERIZER     = BITFIELD_BIT(10),
-        PAN_DIRTY_POINTS         = BITFIELD_BIT(11),
-        PAN_DIRTY_SO             = BITFIELD_BIT(12),
+   PAN_DIRTY_VIEWPORT = BITFIELD_BIT(0),
+   PAN_DIRTY_SCISSOR = BITFIELD_BIT(1),
+   PAN_DIRTY_VERTEX = BITFIELD_BIT(2),
+   PAN_DIRTY_PARAMS = BITFIELD_BIT(3),
+   PAN_DIRTY_DRAWID = BITFIELD_BIT(4),
+   PAN_DIRTY_TLS_SIZE = BITFIELD_BIT(5),
+   PAN_DIRTY_ZS = BITFIELD_BIT(6),
+   PAN_DIRTY_BLEND = BITFIELD_BIT(7),
+   PAN_DIRTY_MSAA = BITFIELD_BIT(8),
+   PAN_DIRTY_OQ = BITFIELD_BIT(9),
+   PAN_DIRTY_RASTERIZER = BITFIELD_BIT(10),
+   PAN_DIRTY_POINTS = BITFIELD_BIT(11),
+   PAN_DIRTY_SO = BITFIELD_BIT(12),
 };
 
 enum pan_dirty_shader {
-        PAN_DIRTY_STAGE_SHADER   = BITFIELD_BIT(0),
-        PAN_DIRTY_STAGE_TEXTURE  = BITFIELD_BIT(1),
-        PAN_DIRTY_STAGE_SAMPLER  = BITFIELD_BIT(2),
-        PAN_DIRTY_STAGE_IMAGE    = BITFIELD_BIT(3),
-        PAN_DIRTY_STAGE_CONST    = BITFIELD_BIT(4),
-        PAN_DIRTY_STAGE_SSBO     = BITFIELD_BIT(5),
+   PAN_DIRTY_STAGE_SHADER = BITFIELD_BIT(0),
+   PAN_DIRTY_STAGE_TEXTURE = BITFIELD_BIT(1),
+   PAN_DIRTY_STAGE_SAMPLER = BITFIELD_BIT(2),
+   PAN_DIRTY_STAGE_IMAGE = BITFIELD_BIT(3),
+   PAN_DIRTY_STAGE_CONST = BITFIELD_BIT(4),
+   PAN_DIRTY_STAGE_SSBO = BITFIELD_BIT(5),
 };
 
 struct panfrost_constant_buffer {
-        struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS];
-        uint32_t enabled_mask;
+   struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS];
+   uint32_t enabled_mask;
 };
 
 struct panfrost_query {
-        /* Passthrough from Gallium */
-        unsigned type;
-        unsigned index;
+   /* Passthrough from Gallium */
+   unsigned type;
+   unsigned index;
 
-        /* For computed queries. 64-bit to prevent overflow */
-        struct {
-                uint64_t start;
-                uint64_t end;
-        };
+   /* For computed queries. 64-bit to prevent overflow */
+   struct {
+      uint64_t start;
+      uint64_t end;
+   };
 
-        /* Memory for the GPU to writeback the value of the query */
-        struct pipe_resource *rsrc;
+   /* Memory for the GPU to writeback the value of the query */
+   struct pipe_resource *rsrc;
 
-        /* Whether an occlusion query is for a MSAA framebuffer */
-        bool msaa;
+   /* Whether an occlusion query is for a MSAA framebuffer */
+   bool msaa;
 };
 
 struct panfrost_streamout_target {
-        struct pipe_stream_output_target base;
-        uint32_t offset;
+   struct pipe_stream_output_target base;
+   uint32_t offset;
 };
 
 struct panfrost_streamout {
-        struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
-        unsigned num_targets;
+   struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
+   unsigned num_targets;
 };
 
 struct panfrost_context {
-        /* Gallium context */
-        struct pipe_context base;
+   /* Gallium context */
+   struct pipe_context base;
 
-        /* Dirty global state */
-        enum pan_dirty_3d dirty;
+   /* Dirty global state */
+   enum pan_dirty_3d dirty;
 
-        /* Per shader stage dirty state */
-        enum pan_dirty_shader dirty_shader[PIPE_SHADER_TYPES];
+   /* Per shader stage dirty state */
+   enum pan_dirty_shader dirty_shader[PIPE_SHADER_TYPES];
 
-        /* Unowned pools, so manage yourself. */
-        struct panfrost_pool descs, shaders;
+   /* Unowned pools, so manage yourself. */
+   struct panfrost_pool descs, shaders;
 
-        /* Sync obj used to keep track of in-flight jobs. */
-        uint32_t syncobj;
+   /* Sync obj used to keep track of in-flight jobs. */
+   uint32_t syncobj;
 
-        /* Set of 32 batches. When the set is full, the LRU entry (the batch
-         * with the smallest seqnum) is flushed to free a slot.
-         */
-        struct {
-                uint64_t seqnum;
-                struct panfrost_batch slots[PAN_MAX_BATCHES];
+   /* Set of 32 batches. When the set is full, the LRU entry (the batch
+    * with the smallest seqnum) is flushed to free a slot.
+    */
+   struct {
+      uint64_t seqnum;
+      struct panfrost_batch slots[PAN_MAX_BATCHES];
 
-                /** Set of active batches for faster traversal */
-                BITSET_DECLARE(active, PAN_MAX_BATCHES);
-        } batches;
+      /** Set of active batches for faster traversal */
+      BITSET_DECLARE(active, PAN_MAX_BATCHES);
+   } batches;
 
-        /* Map from resources to panfrost_batches */
-        struct hash_table *writers;
+   /* Map from resources to panfrost_batches */
+   struct hash_table *writers;
 
-        /* Bound job batch */
-        struct panfrost_batch *batch;
+   /* Bound job batch */
+   struct panfrost_batch *batch;
 
-        /* Within a launch_grid call.. */
-        const struct pipe_grid_info *compute_grid;
+   /* Within a launch_grid call.. */
+   const struct pipe_grid_info *compute_grid;
 
-        struct pipe_framebuffer_state pipe_framebuffer;
-        struct panfrost_streamout streamout;
+   struct pipe_framebuffer_state pipe_framebuffer;
+   struct panfrost_streamout streamout;
 
-        bool active_queries;
-        uint64_t prims_generated;
-        uint64_t tf_prims_generated;
-        uint64_t draw_calls;
-        struct panfrost_query *occlusion_query;
+   bool active_queries;
+   uint64_t prims_generated;
+   uint64_t tf_prims_generated;
+   uint64_t draw_calls;
+   struct panfrost_query *occlusion_query;
 
-        unsigned drawid;
-        unsigned vertex_count;
-        unsigned instance_count;
-        unsigned offset_start;
-        unsigned base_vertex;
-        unsigned base_instance;
-        enum pipe_prim_type active_prim;
+   unsigned drawid;
+   unsigned vertex_count;
+   unsigned instance_count;
+   unsigned offset_start;
+   unsigned base_vertex;
+   unsigned base_instance;
+   enum pipe_prim_type active_prim;
 
-        /* If instancing is enabled, vertex count padded for instance; if
-         * it is disabled, just equal to plain vertex count */
-        unsigned padded_count;
+   /* If instancing is enabled, vertex count padded for instance; if
+    * it is disabled, just equal to plain vertex count */
+   unsigned padded_count;
 
-        struct panfrost_constant_buffer constant_buffer[PIPE_SHADER_TYPES];
-        struct panfrost_rasterizer *rasterizer;
-        struct panfrost_vertex_state *vertex;
+   struct panfrost_constant_buffer constant_buffer[PIPE_SHADER_TYPES];
+   struct panfrost_rasterizer *rasterizer;
+   struct panfrost_vertex_state *vertex;
 
-        struct panfrost_uncompiled_shader *uncompiled[PIPE_SHADER_TYPES];
-        struct panfrost_compiled_shader *prog[PIPE_SHADER_TYPES];
+   struct panfrost_uncompiled_shader *uncompiled[PIPE_SHADER_TYPES];
+   struct panfrost_compiled_shader *prog[PIPE_SHADER_TYPES];
 
-        struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
-        uint32_t vb_mask;
+   struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
+   uint32_t vb_mask;
 
-        struct pipe_shader_buffer ssbo[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_BUFFERS];
-        uint32_t ssbo_mask[PIPE_SHADER_TYPES];
+   struct pipe_shader_buffer ssbo[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_BUFFERS];
+   uint32_t ssbo_mask[PIPE_SHADER_TYPES];
 
-        struct pipe_image_view images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
-        uint32_t image_mask[PIPE_SHADER_TYPES];
+   struct pipe_image_view images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
+   uint32_t image_mask[PIPE_SHADER_TYPES];
 
-        struct panfrost_sampler_state *samplers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
-        unsigned sampler_count[PIPE_SHADER_TYPES];
-        uint32_t valid_samplers[PIPE_SHADER_TYPES];
+   struct panfrost_sampler_state *samplers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
+   unsigned sampler_count[PIPE_SHADER_TYPES];
+   uint32_t valid_samplers[PIPE_SHADER_TYPES];
 
-        struct panfrost_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS];
-        unsigned sampler_view_count[PIPE_SHADER_TYPES];
+   struct panfrost_sampler_view
+      *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS];
+   unsigned sampler_view_count[PIPE_SHADER_TYPES];
 
-        struct blitter_context *blitter;
+   struct blitter_context *blitter;
 
-        struct panfrost_blend_state *blend;
+   struct panfrost_blend_state *blend;
 
-        /* On Valhall, does the current blend state use a blend shader for any
-         * output? We need this information in a hot path to decide if
-         * per-sample shading should be enabled.
-         */
-        bool valhall_has_blend_shader;
+   /* On Valhall, does the current blend state use a blend shader for any
+    * output? We need this information in a hot path to decide if
+    * per-sample shading should be enabled.
+    */
+   bool valhall_has_blend_shader;
 
-        struct pipe_viewport_state pipe_viewport;
-        struct pipe_scissor_state scissor;
-        struct pipe_blend_color blend_color;
-        struct panfrost_zsa_state *depth_stencil;
-        struct pipe_stencil_ref stencil_ref;
-        uint16_t sample_mask;
-        unsigned min_samples;
+   struct pipe_viewport_state pipe_viewport;
+   struct pipe_scissor_state scissor;
+   struct pipe_blend_color blend_color;
+   struct panfrost_zsa_state *depth_stencil;
+   struct pipe_stencil_ref stencil_ref;
+   uint16_t sample_mask;
+   unsigned min_samples;
 
-        struct panfrost_query *cond_query;
-        bool cond_cond;
-        enum pipe_render_cond_flag cond_mode;
+   struct panfrost_query *cond_query;
+   bool cond_cond;
+   enum pipe_render_cond_flag cond_mode;
 
-        bool is_noop;
+   bool is_noop;
 
-        /* Mask of active render targets */
-        uint8_t fb_rt_mask;
+   /* Mask of active render targets */
+   uint8_t fb_rt_mask;
 
-        int in_sync_fd;
-        uint32_t in_sync_obj;
+   int in_sync_fd;
+   uint32_t in_sync_obj;
 };
 
 /* Corresponds to the CSO */
@@ -234,19 +235,19 @@ struct panfrost_rasterizer;
 
 /* Linked varyings */
 struct pan_linkage {
-        /* If the upload is owned by the CSO instead
-         * of the pool, the referenced BO. Else,
-         * NULL. */
-        struct panfrost_bo *bo;
+   /* If the upload is owned by the CSO instead
+    * of the pool, the referenced BO. Else,
+    * NULL. */
+   struct panfrost_bo *bo;
 
-        /* Uploaded attribute descriptors */
-        mali_ptr producer, consumer;
+   /* Uploaded attribute descriptors */
+   mali_ptr producer, consumer;
 
-        /* Varyings buffers required */
-        uint32_t present;
+   /* Varyings buffers required */
+   uint32_t present;
 
-        /* Per-vertex stride for general varying buffer */
-        uint32_t stride;
+   /* Per-vertex stride for general varying buffer */
+   uint32_t stride;
 };
 
 #define RSD_WORDS 16
@@ -255,89 +256,89 @@ struct pan_linkage {
  * shaders with varying emulated features baked in
  */
 struct panfrost_fs_key {
-        /* Number of colour buffers if gl_FragColor is written */
-        unsigned nr_cbufs_for_fragcolor;
+   /* Number of colour buffers if gl_FragColor is written */
+   unsigned nr_cbufs_for_fragcolor;
 
-        /* On Valhall, fixed_varying_mask of the linked vertex shader */
-        uint32_t fixed_varying_mask;
+   /* On Valhall, fixed_varying_mask of the linked vertex shader */
+   uint32_t fixed_varying_mask;
 
-        /* Midgard shaders that read the tilebuffer must be keyed for
-         * non-blendable formats
-         */
-        enum pipe_format rt_formats[8];
+   /* Midgard shaders that read the tilebuffer must be keyed for
+    * non-blendable formats
+    */
+   enum pipe_format rt_formats[8];
 
-        /* From rasterize state, to lower point sprites */
-        uint16_t sprite_coord_enable;
+   /* From rasterize state, to lower point sprites */
+   uint16_t sprite_coord_enable;
 
-        /* User clip plane lowering */
-        uint8_t clip_plane_enable;
+   /* User clip plane lowering */
+   uint8_t clip_plane_enable;
 };
 
 struct panfrost_shader_key {
-        union {
-                /* Vertex shaders do not use shader keys. However, we have a
-                 * special "transform feedback" vertex program derived from a
-                 * vertex shader. If vs_is_xfb is set on a vertex shader, this
-                 * is a transform feedback shader, else it is a regular
-                 * (unkeyed) vertex shader.
-                 */
-                bool vs_is_xfb;
+   union {
+      /* Vertex shaders do not use shader keys. However, we have a
+       * special "transform feedback" vertex program derived from a
+       * vertex shader. If vs_is_xfb is set on a vertex shader, this
+       * is a transform feedback shader, else it is a regular
+       * (unkeyed) vertex shader.
+       */
+      bool vs_is_xfb;
 
-                /* Fragment shaders use regular shader keys */
-                struct panfrost_fs_key fs;
-        };
+      /* Fragment shaders use regular shader keys */
+      struct panfrost_fs_key fs;
+   };
 };
 
 struct panfrost_compiled_shader {
-        /* Respectively, shader binary and Renderer State Descriptor */
-        struct panfrost_pool_ref bin, state;
+   /* Respectively, shader binary and Renderer State Descriptor */
+   struct panfrost_pool_ref bin, state;
 
-        /* For fragment shaders, a prepared (but not uploaded RSD) */
-        uint32_t partial_rsd[RSD_WORDS];
+   /* For fragment shaders, a prepared (but not uploaded RSD) */
+   uint32_t partial_rsd[RSD_WORDS];
 
-        struct pan_shader_info info;
+   struct pan_shader_info info;
 
-        struct pan_earlyzs_lut earlyzs;
+   struct pan_earlyzs_lut earlyzs;
 
-        /* Linked varyings, for non-separable programs */
-        struct pan_linkage linkage;
+   /* Linked varyings, for non-separable programs */
+   struct pan_linkage linkage;
 
-        struct pipe_stream_output_info stream_output;
+   struct pipe_stream_output_info stream_output;
 
-        struct panfrost_shader_key key;
+   struct panfrost_shader_key key;
 
-        /* Mask of state that dirties the sysvals */
-        unsigned dirty_3d, dirty_shader;
+   /* Mask of state that dirties the sysvals */
+   unsigned dirty_3d, dirty_shader;
 };
 
 /* Shader CSO */
 struct panfrost_uncompiled_shader {
-        /* NIR for the shader. For graphics, this will be non-NULL even for
-         * TGSI. For compute, this will be NULL after the shader is compiled,
-         * as we don't need any compute variants.
-         */
-        const nir_shader *nir;
+   /* NIR for the shader. For graphics, this will be non-NULL even for
+    * TGSI. For compute, this will be NULL after the shader is compiled,
+    * as we don't need any compute variants.
+    */
+   const nir_shader *nir;
 
-        /* A SHA1 of the serialized NIR for the disk cache. */
-        unsigned char nir_sha1[20];
+   /* A SHA1 of the serialized NIR for the disk cache. */
+   unsigned char nir_sha1[20];
 
-        /* Stream output information */
-        struct pipe_stream_output_info stream_output;
+   /* Stream output information */
+   struct pipe_stream_output_info stream_output;
 
-        /** Lock for the variants array */
-        simple_mtx_t lock;
+   /** Lock for the variants array */
+   simple_mtx_t lock;
 
-        /* Array of panfrost_compiled_shader */
-        struct util_dynarray variants;
+   /* Array of panfrost_compiled_shader */
+   struct util_dynarray variants;
 
-        /* Compiled transform feedback program, if one is required */
-        struct panfrost_compiled_shader *xfb;
+   /* Compiled transform feedback program, if one is required */
+   struct panfrost_compiled_shader *xfb;
 
-        /* On vertex shaders, bit mask of special desktop-only varyings to link
-         * with the fragment shader. Used on Valhall to implement separable
-         * shaders for desktop GL.
-         */
-        uint32_t fixed_varying_mask;
+   /* On vertex shaders, bit mask of special desktop-only varyings to link
+    * with the fragment shader. Used on Valhall to implement separable
+    * shaders for desktop GL.
+    */
+   uint32_t fixed_varying_mask;
 };
 
 /* The binary artefacts of compiling a shader. This differs from
@@ -347,11 +348,11 @@ struct panfrost_uncompiled_shader {
  * This structure is serialized for the shader disk cache.
  */
 struct panfrost_shader_binary {
-        /* Collected information about the compiled shader */
-        struct pan_shader_info info;
+   /* Collected information about the compiled shader */
+   struct pan_shader_info info;
 
-        /* The binary itself */
-        struct util_dynarray binary;
+   /* The binary itself */
+   struct util_dynarray binary;
 };
 
 void
@@ -360,28 +361,25 @@ panfrost_disk_cache_store(struct disk_cache *cache,
                           const struct panfrost_shader_key *key,
                           const struct panfrost_shader_binary *binary);
 
-bool
-panfrost_disk_cache_retrieve(struct disk_cache *cache,
-                             const struct panfrost_uncompiled_shader *uncompiled,
-                             const struct panfrost_shader_key *key,
-                             struct panfrost_shader_binary *binary);
+bool panfrost_disk_cache_retrieve(
+   struct disk_cache *cache,
+   const struct panfrost_uncompiled_shader *uncompiled,
+   const struct panfrost_shader_key *key,
+   struct panfrost_shader_binary *binary);
 
-void
-panfrost_disk_cache_init(struct panfrost_screen *screen);
+void panfrost_disk_cache_init(struct panfrost_screen *screen);
 
 /** (Vertex buffer index, divisor) tuple that will become an Attribute Buffer
  * Descriptor at draw-time on Midgard
  */
 struct pan_vertex_buffer {
-        unsigned vbi;
-        unsigned divisor;
+   unsigned vbi;
+   unsigned divisor;
 };
 
-unsigned
-pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers,
-                         unsigned *nr_bufs,
-                         unsigned vbi,
-                         unsigned divisor);
+unsigned pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers,
+                                  unsigned *nr_bufs, unsigned vbi,
+                                  unsigned divisor);
 
 struct panfrost_zsa_state;
 struct panfrost_sampler_state;
@@ -391,39 +389,32 @@ struct panfrost_vertex_state;
 static inline struct panfrost_context *
 pan_context(struct pipe_context *pcontext)
 {
-        return (struct panfrost_context *) pcontext;
+   return (struct panfrost_context *)pcontext;
 }
 
 static inline struct panfrost_streamout_target *
 pan_so_target(struct pipe_stream_output_target *target)
 {
-        return (struct panfrost_streamout_target *)target;
+   return (struct panfrost_streamout_target *)target;
 }
 
-struct pipe_context *
-panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags);
+struct pipe_context *panfrost_create_context(struct pipe_screen *screen,
+                                             void *priv, unsigned flags);
 
-bool
-panfrost_writes_point_size(struct panfrost_context *ctx);
+bool panfrost_writes_point_size(struct panfrost_context *ctx);
 
-struct panfrost_ptr
-panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler);
+struct panfrost_ptr panfrost_vertex_tiler_job(struct panfrost_context *ctx,
+                                              bool is_tiler);
 
-void
-panfrost_flush(
-        struct pipe_context *pipe,
-        struct pipe_fence_handle **fence,
-        unsigned flags);
+void panfrost_flush(struct pipe_context *pipe, struct pipe_fence_handle **fence,
+                    unsigned flags);
 
-bool
-panfrost_render_condition_check(struct panfrost_context *ctx);
+bool panfrost_render_condition_check(struct panfrost_context *ctx);
 
-void
-panfrost_update_shader_variant(struct panfrost_context *ctx,
-                               enum pipe_shader_type type);
+void panfrost_update_shader_variant(struct panfrost_context *ctx,
+                                    enum pipe_shader_type type);
 
-void
-panfrost_analyze_sysvals(struct panfrost_compiled_shader *ss);
+void panfrost_analyze_sysvals(struct panfrost_compiled_shader *ss);
 
 mali_ptr
 panfrost_get_index_buffer(struct panfrost_batch *batch,
@@ -438,41 +429,37 @@ panfrost_get_index_buffer_bounded(struct panfrost_batch *batch,
 
 /* Instancing */
 
-mali_ptr
-panfrost_vertex_buffer_address(struct panfrost_context *ctx, unsigned i);
+mali_ptr panfrost_vertex_buffer_address(struct panfrost_context *ctx,
+                                        unsigned i);
 
-void
-panfrost_shader_context_init(struct pipe_context *pctx);
+void panfrost_shader_context_init(struct pipe_context *pctx);
 
 static inline void
 panfrost_dirty_state_all(struct panfrost_context *ctx)
 {
-        ctx->dirty = ~0;
+   ctx->dirty = ~0;
 
-        for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i)
-                ctx->dirty_shader[i] = ~0;
+   for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i)
+      ctx->dirty_shader[i] = ~0;
 }
 
 static inline void
 panfrost_clean_state_3d(struct panfrost_context *ctx)
 {
-        ctx->dirty = 0;
+   ctx->dirty = 0;
 
-        for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) {
-                if (i != PIPE_SHADER_COMPUTE)
-                        ctx->dirty_shader[i] = 0;
-        }
+   for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) {
+      if (i != PIPE_SHADER_COMPUTE)
+         ctx->dirty_shader[i] = 0;
+   }
 }
 
-void
-panfrost_set_batch_masks_blend(struct panfrost_batch *batch);
+void panfrost_set_batch_masks_blend(struct panfrost_batch *batch);
 
-void
-panfrost_set_batch_masks_zs(struct panfrost_batch *batch);
+void panfrost_set_batch_masks_zs(struct panfrost_batch *batch);
 
-void
-panfrost_track_image_access(struct panfrost_batch *batch,
-                            enum pipe_shader_type stage,
-                            struct pipe_image_view *image);
+void panfrost_track_image_access(struct panfrost_batch *batch,
+                                 enum pipe_shader_type stage,
+                                 struct pipe_image_view *image);
 
 #endif
diff --git a/src/gallium/drivers/panfrost/pan_disk_cache.c b/src/gallium/drivers/panfrost/pan_disk_cache.c
index 056825dc7d6..dfe57dd6bab 100644
--- a/src/gallium/drivers/panfrost/pan_disk_cache.c
+++ b/src/gallium/drivers/panfrost/pan_disk_cache.c
@@ -21,9 +21,9 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
-#include <stdio.h>
-#include <stdint.h>
 #include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
 #include <string.h>
 
 #include "compiler/nir/nir.h"
@@ -43,17 +43,17 @@ extern int bifrost_debug;
  * Compute a disk cache key for the given uncompiled shader and shader key.
  */
 static void
-panfrost_disk_cache_compute_key(struct disk_cache *cache,
-                                const struct panfrost_uncompiled_shader *uncompiled,
-                                const struct panfrost_shader_key *shader_key,
-                                cache_key cache_key)
+panfrost_disk_cache_compute_key(
+   struct disk_cache *cache,
+   const struct panfrost_uncompiled_shader *uncompiled,
+   const struct panfrost_shader_key *shader_key, cache_key cache_key)
 {
-        uint8_t data[sizeof(uncompiled->nir_sha1) + sizeof(*shader_key)];
+   uint8_t data[sizeof(uncompiled->nir_sha1) + sizeof(*shader_key)];
 
-        memcpy(data, uncompiled->nir_sha1, sizeof(uncompiled->nir_sha1));
-        memcpy(data + sizeof(uncompiled->nir_sha1), shader_key, sizeof(*shader_key));
+   memcpy(data, uncompiled->nir_sha1, sizeof(uncompiled->nir_sha1));
+   memcpy(data + sizeof(uncompiled->nir_sha1), shader_key, sizeof(*shader_key));
 
-        disk_cache_compute_key(cache, data, sizeof(data), cache_key);
+   disk_cache_compute_key(cache, data, sizeof(data), cache_key);
 }
 
 /**
@@ -69,33 +69,33 @@ panfrost_disk_cache_store(struct disk_cache *cache,
                           const struct panfrost_shader_binary *binary)
 {
 #ifdef ENABLE_SHADER_CACHE
-        if (!cache)
-                return;
+   if (!cache)
+      return;
 
-        cache_key cache_key;
-        panfrost_disk_cache_compute_key(cache, uncompiled, key, cache_key);
+   cache_key cache_key;
+   panfrost_disk_cache_compute_key(cache, uncompiled, key, cache_key);
 
-        if (debug) {
-                char sha1[41];
-                _mesa_sha1_format(sha1, cache_key);
-                fprintf(stderr, "[mesa disk cache] storing %s\n", sha1);
-        }
+   if (debug) {
+      char sha1[41];
+      _mesa_sha1_format(sha1, cache_key);
+      fprintf(stderr, "[mesa disk cache] storing %s\n", sha1);
+   }
 
-        struct blob blob;
-        blob_init(&blob);
+   struct blob blob;
+   blob_init(&blob);
 
-        /* We write the following data to the cache blob:
-         *
-         * 1. Size of program binary
-         * 2. Program binary
-         * 3. Shader info
-         */
-        blob_write_uint32(&blob, binary->binary.size);
-        blob_write_bytes(&blob, binary->binary.data, binary->binary.size);
-        blob_write_bytes(&blob, &binary->info, sizeof(binary->info));
+   /* We write the following data to the cache blob:
+    *
+    * 1. Size of program binary
+    * 2. Program binary
+    * 3. Shader info
+    */
+   blob_write_uint32(&blob, binary->binary.size);
+   blob_write_bytes(&blob, binary->binary.data, binary->binary.size);
+   blob_write_bytes(&blob, &binary->info, sizeof(binary->info));
 
-        disk_cache_put(cache, cache_key, blob.data, blob.size, NULL);
-        blob_finish(&blob);
+   disk_cache_put(cache, cache_key, blob.data, blob.size, NULL);
+   blob_finish(&blob);
 #endif
 }
 
@@ -109,43 +109,43 @@ panfrost_disk_cache_retrieve(struct disk_cache *cache,
                              struct panfrost_shader_binary *binary)
 {
 #ifdef ENABLE_SHADER_CACHE
-        if (!cache)
-                return false;
+   if (!cache)
+      return false;
 
-        cache_key cache_key;
-        panfrost_disk_cache_compute_key(cache, uncompiled, key, cache_key);
+   cache_key cache_key;
+   panfrost_disk_cache_compute_key(cache, uncompiled, key, cache_key);
 
-        if (debug) {
-                char sha1[41];
-                _mesa_sha1_format(sha1, cache_key);
-                fprintf(stderr, "[mesa disk cache] retrieving %s: ", sha1);
-        }
+   if (debug) {
+      char sha1[41];
+      _mesa_sha1_format(sha1, cache_key);
+      fprintf(stderr, "[mesa disk cache] retrieving %s: ", sha1);
+   }
 
-        size_t size;
-        void *buffer = disk_cache_get(cache, cache_key, &size);
+   size_t size;
+   void *buffer = disk_cache_get(cache, cache_key, &size);
 
-        if (debug)
-                fprintf(stderr, "%s\n", buffer ? "found" : "missing");
+   if (debug)
+      fprintf(stderr, "%s\n", buffer ? "found" : "missing");
 
-        if (!buffer)
-                return false;
+   if (!buffer)
+      return false;
 
-        struct blob_reader blob;
-        blob_reader_init(&blob, buffer, size);
+   struct blob_reader blob;
+   blob_reader_init(&blob, buffer, size);
 
-        util_dynarray_init(&binary->binary, NULL);
+   util_dynarray_init(&binary->binary, NULL);
 
-        uint32_t binary_size = blob_read_uint32(&blob);
-        void *ptr = util_dynarray_resize_bytes(&binary->binary, binary_size, 1);
+   uint32_t binary_size = blob_read_uint32(&blob);
+   void *ptr = util_dynarray_resize_bytes(&binary->binary, binary_size, 1);
 
-        blob_copy_bytes(&blob, ptr, binary_size);
-        blob_copy_bytes(&blob, &binary->info, sizeof(binary->info));
+   blob_copy_bytes(&blob, ptr, binary_size);
+   blob_copy_bytes(&blob, &binary->info, sizeof(binary->info));
 
-        free(buffer);
+   free(buffer);
 
-        return true;
+   return true;
 #else
-        return false;
+   return false;
 #endif
 }
 
@@ -156,22 +156,22 @@ void
 panfrost_disk_cache_init(struct panfrost_screen *screen)
 {
 #ifdef ENABLE_SHADER_CACHE
-        const char *renderer = screen->base.get_name(&screen->base);
+   const char *renderer = screen->base.get_name(&screen->base);
 
-        const struct build_id_note *note =
-                build_id_find_nhdr_for_addr(panfrost_disk_cache_init);
-        assert(note && build_id_length(note) == 20); /* sha1 */
+   const struct build_id_note *note =
+      build_id_find_nhdr_for_addr(panfrost_disk_cache_init);
+   assert(note && build_id_length(note) == 20); /* sha1 */
 
-        const uint8_t *id_sha1 = build_id_data(note);
-        assert(id_sha1);
+   const uint8_t *id_sha1 = build_id_data(note);
+   assert(id_sha1);
 
-        char timestamp[41];
-        _mesa_sha1_format(timestamp, id_sha1);
+   char timestamp[41];
+   _mesa_sha1_format(timestamp, id_sha1);
 
-        /* Consider any flags affecting the compile when caching */
-        uint64_t driver_flags = screen->dev.debug;
-        driver_flags |= ((uint64_t) (midgard_debug | bifrost_debug) << 32);
+   /* Consider any flags affecting the compile when caching */
+   uint64_t driver_flags = screen->dev.debug;
+   driver_flags |= ((uint64_t)(midgard_debug | bifrost_debug) << 32);
 
-        screen->disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
+   screen->disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
 #endif
 }
diff --git a/src/gallium/drivers/panfrost/pan_fence.c b/src/gallium/drivers/panfrost/pan_fence.c
index 655644ec495..792550371f8 100644
--- a/src/gallium/drivers/panfrost/pan_fence.c
+++ b/src/gallium/drivers/panfrost/pan_fence.c
@@ -26,8 +26,8 @@
  * SOFTWARE.
  */
 
-#include "pan_context.h"
 #include "pan_fence.h"
+#include "pan_context.h"
 #include "pan_screen.h"
 
 #include "util/os_time.h"
@@ -38,117 +38,112 @@ panfrost_fence_reference(struct pipe_screen *pscreen,
                          struct pipe_fence_handle **ptr,
                          struct pipe_fence_handle *fence)
 {
-        struct panfrost_device *dev = pan_device(pscreen);
-        struct pipe_fence_handle *old = *ptr;
+   struct panfrost_device *dev = pan_device(pscreen);
+   struct pipe_fence_handle *old = *ptr;
 
-        if (pipe_reference(&old->reference, &fence->reference)) {
-                drmSyncobjDestroy(dev->fd, old->syncobj);
-                free(old);
-        }
+   if (pipe_reference(&old->reference, &fence->reference)) {
+      drmSyncobjDestroy(dev->fd, old->syncobj);
+      free(old);
+   }
 
-        *ptr = fence;
+   *ptr = fence;
 }
 
 bool
-panfrost_fence_finish(struct pipe_screen *pscreen,
-                      struct pipe_context *ctx,
-                      struct pipe_fence_handle *fence,
-                      uint64_t timeout)
+panfrost_fence_finish(struct pipe_screen *pscreen, struct pipe_context *ctx,
+                      struct pipe_fence_handle *fence, uint64_t timeout)
 {
-        struct panfrost_device *dev = pan_device(pscreen);
-        int ret;
+   struct panfrost_device *dev = pan_device(pscreen);
+   int ret;
 
-        if (fence->signaled)
-                return true;
+   if (fence->signaled)
+      return true;
 
-        uint64_t abs_timeout = os_time_get_absolute_timeout(timeout);
-        if (abs_timeout == OS_TIMEOUT_INFINITE)
-                abs_timeout = INT64_MAX;
+   uint64_t abs_timeout = os_time_get_absolute_timeout(timeout);
+   if (abs_timeout == OS_TIMEOUT_INFINITE)
+      abs_timeout = INT64_MAX;
 
-        ret = drmSyncobjWait(dev->fd, &fence->syncobj,
-                             1,
-                             abs_timeout, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL,
-                             NULL);
+   ret = drmSyncobjWait(dev->fd, &fence->syncobj, 1, abs_timeout,
+                        DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL, NULL);
 
-        fence->signaled = (ret >= 0);
-        return fence->signaled;
+   fence->signaled = (ret >= 0);
+   return fence->signaled;
 }
 
 int
-panfrost_fence_get_fd(struct pipe_screen *screen,
-                      struct pipe_fence_handle *f)
+panfrost_fence_get_fd(struct pipe_screen *screen, struct pipe_fence_handle *f)
 {
-        struct panfrost_device *dev = pan_device(screen);
-        int fd = -1;
+   struct panfrost_device *dev = pan_device(screen);
+   int fd = -1;
 
-        drmSyncobjExportSyncFile(dev->fd, f->syncobj, &fd);
-        return fd;
+   drmSyncobjExportSyncFile(dev->fd, f->syncobj, &fd);
+   return fd;
 }
 
 struct pipe_fence_handle *
 panfrost_fence_from_fd(struct panfrost_context *ctx, int fd,
                        enum pipe_fd_type type)
 {
-        struct panfrost_device *dev = pan_device(ctx->base.screen);
-        int ret;
+   struct panfrost_device *dev = pan_device(ctx->base.screen);
+   int ret;
 
-        struct pipe_fence_handle *f = calloc(1, sizeof(*f));
-        if (!f)
-                return NULL;
+   struct pipe_fence_handle *f = calloc(1, sizeof(*f));
+   if (!f)
+      return NULL;
 
-        if (type == PIPE_FD_TYPE_NATIVE_SYNC) {
-                ret = drmSyncobjCreate(dev->fd, 0, &f->syncobj);
-                if (ret) {
-                        fprintf(stderr, "create syncobj failed\n");
-                        goto err_free_fence;
-                }
+   if (type == PIPE_FD_TYPE_NATIVE_SYNC) {
+      ret = drmSyncobjCreate(dev->fd, 0, &f->syncobj);
+      if (ret) {
+         fprintf(stderr, "create syncobj failed\n");
+         goto err_free_fence;
+      }
 
-                ret = drmSyncobjImportSyncFile(dev->fd, f->syncobj, fd);
-                if (ret) {
-                        fprintf(stderr, "import syncfile failed\n");
-                        goto err_destroy_syncobj;
-                }
-        } else {
-                assert(type == PIPE_FD_TYPE_SYNCOBJ);
-                ret = drmSyncobjFDToHandle(dev->fd, fd, &f->syncobj);
-                if (ret) {
-                        fprintf(stderr, "import syncobj FD failed\n");
-                        goto err_free_fence;
-                }
-        }
+      ret = drmSyncobjImportSyncFile(dev->fd, f->syncobj, fd);
+      if (ret) {
+         fprintf(stderr, "import syncfile failed\n");
+         goto err_destroy_syncobj;
+      }
+   } else {
+      assert(type == PIPE_FD_TYPE_SYNCOBJ);
+      ret = drmSyncobjFDToHandle(dev->fd, fd, &f->syncobj);
+      if (ret) {
+         fprintf(stderr, "import syncobj FD failed\n");
+         goto err_free_fence;
+      }
+   }
 
-        pipe_reference_init(&f->reference, 1);
+   pipe_reference_init(&f->reference, 1);
 
-        return f;
+   return f;
 
 err_destroy_syncobj:
-        drmSyncobjDestroy(dev->fd, f->syncobj);
+   drmSyncobjDestroy(dev->fd, f->syncobj);
 err_free_fence:
-        free(f);
-        return NULL;
+   free(f);
+   return NULL;
 }
 
 struct pipe_fence_handle *
 panfrost_fence_create(struct panfrost_context *ctx)
 {
-        struct panfrost_device *dev = pan_device(ctx->base.screen);
-        int fd = -1, ret;
+   struct panfrost_device *dev = pan_device(ctx->base.screen);
+   int fd = -1, ret;
 
-        /* Snapshot the last rendering out fence. We'd rather have another
-         * syncobj instead of a sync file, but this is all we get.
-         * (HandleToFD/FDToHandle just gives you another syncobj ID for the
-         * same syncobj).
-         */
-        ret = drmSyncobjExportSyncFile(dev->fd, ctx->syncobj, &fd);
-        if (ret || fd == -1) {
-                fprintf(stderr, "export failed\n");
-                return NULL;
-        }
+   /* Snapshot the last rendering out fence. We'd rather have another
+    * syncobj instead of a sync file, but this is all we get.
+    * (HandleToFD/FDToHandle just gives you another syncobj ID for the
+    * same syncobj).
+    */
+   ret = drmSyncobjExportSyncFile(dev->fd, ctx->syncobj, &fd);
+   if (ret || fd == -1) {
+      fprintf(stderr, "export failed\n");
+      return NULL;
+   }
 
-        struct pipe_fence_handle *f =
-                panfrost_fence_from_fd(ctx, fd, PIPE_FD_TYPE_NATIVE_SYNC);
+   struct pipe_fence_handle *f =
+      panfrost_fence_from_fd(ctx, fd, PIPE_FD_TYPE_NATIVE_SYNC);
 
-        close(fd);
+   close(fd);
 
-        return f;
+   return f;
 }
diff --git a/src/gallium/drivers/panfrost/pan_fence.h b/src/gallium/drivers/panfrost/pan_fence.h
index 350f3682343..6a8cc74dc95 100644
--- a/src/gallium/drivers/panfrost/pan_fence.h
+++ b/src/gallium/drivers/panfrost/pan_fence.h
@@ -30,29 +30,24 @@
 struct panfrost_context;
 
 struct pipe_fence_handle {
-        struct pipe_reference reference;
-        uint32_t syncobj;
-        bool signaled;
+   struct pipe_reference reference;
+   uint32_t syncobj;
+   bool signaled;
 };
 
-void
-panfrost_fence_reference(struct pipe_screen *pscreen,
-                         struct pipe_fence_handle **ptr,
-                         struct pipe_fence_handle *fence);
+void panfrost_fence_reference(struct pipe_screen *pscreen,
+                              struct pipe_fence_handle **ptr,
+                              struct pipe_fence_handle *fence);
 
-bool
-panfrost_fence_finish(struct pipe_screen *pscreen,
-                      struct pipe_context *ctx,
-                      struct pipe_fence_handle *fence,
-                      uint64_t timeout);
+bool panfrost_fence_finish(struct pipe_screen *pscreen,
+                           struct pipe_context *ctx,
+                           struct pipe_fence_handle *fence, uint64_t timeout);
 
-int
-panfrost_fence_get_fd(struct pipe_screen *screen,
-                      struct pipe_fence_handle *f);
+int panfrost_fence_get_fd(struct pipe_screen *screen,
+                          struct pipe_fence_handle *f);
 
-struct pipe_fence_handle *
-panfrost_fence_from_fd(struct panfrost_context *ctx, int fd,
-                       enum pipe_fd_type type);
+struct pipe_fence_handle *panfrost_fence_from_fd(struct panfrost_context *ctx,
+                                                 int fd,
+                                                 enum pipe_fd_type type);
 
-struct pipe_fence_handle *
-panfrost_fence_create(struct panfrost_context *ctx);
+struct pipe_fence_handle *panfrost_fence_create(struct panfrost_context *ctx);
diff --git a/src/gallium/drivers/panfrost/pan_helpers.c b/src/gallium/drivers/panfrost/pan_helpers.c
index 2e2b9a6189e..fb27e102fc5 100644
--- a/src/gallium/drivers/panfrost/pan_helpers.c
+++ b/src/gallium/drivers/panfrost/pan_helpers.c
@@ -21,66 +21,66 @@
  * SOFTWARE.
  */
 
-#include "pan_context.h"
 #include "util/u_vbuf.h"
+#include "pan_context.h"
 
 void
 panfrost_analyze_sysvals(struct panfrost_compiled_shader *ss)
 {
-        unsigned dirty = 0;
-        unsigned dirty_shader = PAN_DIRTY_STAGE_SHADER | PAN_DIRTY_STAGE_CONST;
+   unsigned dirty = 0;
+   unsigned dirty_shader = PAN_DIRTY_STAGE_SHADER | PAN_DIRTY_STAGE_CONST;
 
-        for (unsigned i = 0; i < ss->info.sysvals.sysval_count; ++i) {
-                switch (PAN_SYSVAL_TYPE(ss->info.sysvals.sysvals[i])) {
-                case PAN_SYSVAL_VIEWPORT_SCALE:
-                case PAN_SYSVAL_VIEWPORT_OFFSET:
-                        dirty |= PAN_DIRTY_VIEWPORT;
-                        break;
+   for (unsigned i = 0; i < ss->info.sysvals.sysval_count; ++i) {
+      switch (PAN_SYSVAL_TYPE(ss->info.sysvals.sysvals[i])) {
+      case PAN_SYSVAL_VIEWPORT_SCALE:
+      case PAN_SYSVAL_VIEWPORT_OFFSET:
+         dirty |= PAN_DIRTY_VIEWPORT;
+         break;
 
-                case PAN_SYSVAL_TEXTURE_SIZE:
-                        dirty_shader |= PAN_DIRTY_STAGE_TEXTURE;
-                        break;
+      case PAN_SYSVAL_TEXTURE_SIZE:
+         dirty_shader |= PAN_DIRTY_STAGE_TEXTURE;
+         break;
 
-                case PAN_SYSVAL_SSBO:
-                        dirty_shader |= PAN_DIRTY_STAGE_SSBO;
-                        break;
+      case PAN_SYSVAL_SSBO:
+         dirty_shader |= PAN_DIRTY_STAGE_SSBO;
+         break;
 
-                case PAN_SYSVAL_XFB:
-                        dirty |= PAN_DIRTY_SO;
-                        break;
+      case PAN_SYSVAL_XFB:
+         dirty |= PAN_DIRTY_SO;
+         break;
 
-                case PAN_SYSVAL_SAMPLER:
-                        dirty_shader |= PAN_DIRTY_STAGE_SAMPLER;
-                        break;
+      case PAN_SYSVAL_SAMPLER:
+         dirty_shader |= PAN_DIRTY_STAGE_SAMPLER;
+         break;
 
-                case PAN_SYSVAL_IMAGE_SIZE:
-                        dirty_shader |= PAN_DIRTY_STAGE_IMAGE;
-                        break;
+      case PAN_SYSVAL_IMAGE_SIZE:
+         dirty_shader |= PAN_DIRTY_STAGE_IMAGE;
+         break;
 
-                case PAN_SYSVAL_NUM_WORK_GROUPS:
-                case PAN_SYSVAL_LOCAL_GROUP_SIZE:
-                case PAN_SYSVAL_WORK_DIM:
-                case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS:
-                case PAN_SYSVAL_NUM_VERTICES:
-                        dirty |= PAN_DIRTY_PARAMS;
-                        break;
+      case PAN_SYSVAL_NUM_WORK_GROUPS:
+      case PAN_SYSVAL_LOCAL_GROUP_SIZE:
+      case PAN_SYSVAL_WORK_DIM:
+      case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS:
+      case PAN_SYSVAL_NUM_VERTICES:
+         dirty |= PAN_DIRTY_PARAMS;
+         break;
 
-                case PAN_SYSVAL_DRAWID:
-                        dirty |= PAN_DIRTY_DRAWID;
-                        break;
+      case PAN_SYSVAL_DRAWID:
+         dirty |= PAN_DIRTY_DRAWID;
+         break;
 
-                case PAN_SYSVAL_SAMPLE_POSITIONS:
-                case PAN_SYSVAL_MULTISAMPLED:
-                case PAN_SYSVAL_RT_CONVERSION:
-                        /* Nothing beyond the batch itself */
-                        break;
-                default:
-                        unreachable("Invalid sysval");
-                }
-        }
+      case PAN_SYSVAL_SAMPLE_POSITIONS:
+      case PAN_SYSVAL_MULTISAMPLED:
+      case PAN_SYSVAL_RT_CONVERSION:
+         /* Nothing beyond the batch itself */
+         break;
+      default:
+         unreachable("Invalid sysval");
+      }
+   }
 
-        ss->dirty_3d = dirty;
-        ss->dirty_shader = dirty_shader;
+   ss->dirty_3d = dirty;
+   ss->dirty_shader = dirty_shader;
 }
 
 /*
@@ -93,25 +93,22 @@ panfrost_get_index_buffer(struct panfrost_batch *batch,
                           const struct pipe_draw_info *info,
                           const struct pipe_draw_start_count_bias *draw)
 {
-        struct panfrost_resource *rsrc = pan_resource(info->index.resource);
-        off_t offset = draw->start * info->index_size;
+   struct panfrost_resource *rsrc = pan_resource(info->index.resource);
+   off_t offset = draw->start * info->index_size;
 
-        if (!info->has_user_indices) {
-                /* Only resources can be directly mapped */
-                panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
-                return rsrc->image.data.bo->ptr.gpu + offset;
-        } else {
-                /* Otherwise, we need to upload to transient memory */
-                const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
-                struct panfrost_ptr T =
-                        pan_pool_alloc_aligned(&batch->pool.base,
-                                               draw->count *
-                                               info->index_size,
-                                               info->index_size);
+   if (!info->has_user_indices) {
+      /* Only resources can be directly mapped */
+      panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
+      return rsrc->image.data.bo->ptr.gpu + offset;
+   } else {
+      /* Otherwise, we need to upload to transient memory */
+      const uint8_t *ibuf8 = (const uint8_t *)info->index.user;
+      struct panfrost_ptr T = pan_pool_alloc_aligned(
+         &batch->pool.base, draw->count * info->index_size, info->index_size);
 
-                memcpy(T.cpu, ibuf8 + offset, draw->count * info->index_size);
-                return T.gpu;
-        }
+      memcpy(T.cpu, ibuf8 + offset, draw->count * info->index_size);
+      return T.gpu;
+   }
 }
 
 /* Gets a GPU address for the associated index buffer. Only gauranteed to be
@@ -126,34 +123,30 @@ panfrost_get_index_buffer_bounded(struct panfrost_batch *batch,
                                   const struct pipe_draw_start_count_bias *draw,
                                   unsigned *min_index, unsigned *max_index)
 {
-        struct panfrost_resource *rsrc = pan_resource(info->index.resource);
-        struct panfrost_context *ctx = batch->ctx;
-        bool needs_indices = true;
+   struct panfrost_resource *rsrc = pan_resource(info->index.resource);
+   struct panfrost_context *ctx = batch->ctx;
+   bool needs_indices = true;
 
-        if (info->index_bounds_valid) {
-                *min_index = info->min_index;
-                *max_index = info->max_index;
-                needs_indices = false;
-        } else if (!info->has_user_indices) {
-                /* Check the cache */
-                needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache,
-                                                           draw->start,
-                                                           draw->count,
-                                                           min_index,
-                                                           max_index);
-        }
+   if (info->index_bounds_valid) {
+      *min_index = info->min_index;
+      *max_index = info->max_index;
+      needs_indices = false;
+   } else if (!info->has_user_indices) {
+      /* Check the cache */
+      needs_indices = !panfrost_minmax_cache_get(
+         rsrc->index_cache, draw->start, draw->count, min_index, max_index);
+   }
 
-        if (needs_indices) {
-                /* Fallback */
-                u_vbuf_get_minmax_index(&ctx->base, info, draw, min_index, max_index);
+   if (needs_indices) {
+      /* Fallback */
+      u_vbuf_get_minmax_index(&ctx->base, info, draw, min_index, max_index);
 
-                if (!info->has_user_indices)
-                        panfrost_minmax_cache_add(rsrc->index_cache,
-                                                  draw->start, draw->count,
-                                                  *min_index, *max_index);
-        }
+      if (!info->has_user_indices)
+         panfrost_minmax_cache_add(rsrc->index_cache, draw->start, draw->count,
+                                   *min_index, *max_index);
+   }
 
-        return panfrost_get_index_buffer(batch, info, draw);
+   return panfrost_get_index_buffer(batch, info, draw);
 }
 
 /**
@@ -163,26 +156,24 @@ panfrost_get_index_buffer_bounded(struct panfrost_batch *batch,
  * elements CSO create time, not at draw time.
  */
 unsigned
-pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers,
-                         unsigned *nr_bufs,
-                         unsigned vbi,
-                         unsigned divisor)
+pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers, unsigned *nr_bufs,
+                         unsigned vbi, unsigned divisor)
 {
-        /* Look up the buffer */
-        for (unsigned i = 0; i < (*nr_bufs); ++i) {
-                if (buffers[i].vbi == vbi && buffers[i].divisor == divisor)
-                        return i;
-        }
+   /* Look up the buffer */
+   for (unsigned i = 0; i < (*nr_bufs); ++i) {
+      if (buffers[i].vbi == vbi && buffers[i].divisor == divisor)
+         return i;
+   }
 
-        /* Else, create a new buffer */
-        unsigned idx = (*nr_bufs)++;
+   /* Else, create a new buffer */
+   unsigned idx = (*nr_bufs)++;
 
-        buffers[idx] = (struct pan_vertex_buffer) {
-                .vbi = vbi,
-                .divisor = divisor,
-        };
+   buffers[idx] = (struct pan_vertex_buffer){
+      .vbi = vbi,
+      .divisor = divisor,
+   };
 
-        return idx;
+   return idx;
 }
 
 /*
@@ -194,8 +185,8 @@ pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers,
 static void
 panfrost_draw_target(struct panfrost_batch *batch, unsigned target)
 {
-        batch->draws |= target;
-        batch->resolve |= target;
+   batch->draws |= target;
+   batch->resolve |= target;
 }
 
 /*
@@ -206,34 +197,34 @@ panfrost_draw_target(struct panfrost_batch *batch, unsigned target)
 void
 panfrost_set_batch_masks_blend(struct panfrost_batch *batch)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        struct panfrost_blend_state *blend = ctx->blend;
+   struct panfrost_context *ctx = batch->ctx;
+   struct panfrost_blend_state *blend = ctx->blend;
 
-        for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
-                if (blend->info[i].enabled && batch->key.cbufs[i])
-                        panfrost_draw_target(batch, PIPE_CLEAR_COLOR0 << i);
-        }
+   for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
+      if (blend->info[i].enabled && batch->key.cbufs[i])
+         panfrost_draw_target(batch, PIPE_CLEAR_COLOR0 << i);
+   }
 }
 
 void
 panfrost_set_batch_masks_zs(struct panfrost_batch *batch)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        struct pipe_depth_stencil_alpha_state *zsa = (void *) ctx->depth_stencil;
+   struct panfrost_context *ctx = batch->ctx;
+   struct pipe_depth_stencil_alpha_state *zsa = (void *)ctx->depth_stencil;
 
-        /* Assume depth is read (TODO: perf) */
-        if (zsa->depth_enabled)
-                batch->read |= PIPE_CLEAR_DEPTH;
+   /* Assume depth is read (TODO: perf) */
+   if (zsa->depth_enabled)
+      batch->read |= PIPE_CLEAR_DEPTH;
 
-        if (zsa->depth_writemask)
-                panfrost_draw_target(batch, PIPE_CLEAR_DEPTH);
+   if (zsa->depth_writemask)
+      panfrost_draw_target(batch, PIPE_CLEAR_DEPTH);
 
-        if (zsa->stencil[0].enabled) {
-                panfrost_draw_target(batch, PIPE_CLEAR_STENCIL);
+   if (zsa->stencil[0].enabled) {
+      panfrost_draw_target(batch, PIPE_CLEAR_STENCIL);
 
-                /* Assume stencil is read (TODO: perf) */
-                batch->read |= PIPE_CLEAR_STENCIL;
-        }
+      /* Assume stencil is read (TODO: perf) */
+      batch->read |= PIPE_CLEAR_STENCIL;
+   }
 }
 
 void
@@ -241,21 +232,20 @@ panfrost_track_image_access(struct panfrost_batch *batch,
                             enum pipe_shader_type stage,
                             struct pipe_image_view *image)
 {
-        struct panfrost_resource *rsrc = pan_resource(image->resource);
+   struct panfrost_resource *rsrc = pan_resource(image->resource);
 
-        if (image->shader_access & PIPE_IMAGE_ACCESS_WRITE) {
-                panfrost_batch_write_rsrc(batch, rsrc, stage);
+   if (image->shader_access & PIPE_IMAGE_ACCESS_WRITE) {
+      panfrost_batch_write_rsrc(batch, rsrc, stage);
 
-                bool is_buffer = rsrc->base.target == PIPE_BUFFER;
-                unsigned level = is_buffer ? 0 : image->u.tex.level;
-                BITSET_SET(rsrc->valid.data, level);
+      bool is_buffer = rsrc->base.target == PIPE_BUFFER;
+      unsigned level = is_buffer ? 0 : image->u.tex.level;
+      BITSET_SET(rsrc->valid.data, level);
 
-                if (is_buffer) {
-                        util_range_add(&rsrc->base, &rsrc->valid_buffer_range,
-                                        0, rsrc->base.width0);
-                }
-        } else {
-                panfrost_batch_read_rsrc(batch, rsrc, stage);
-        }
+      if (is_buffer) {
+         util_range_add(&rsrc->base, &rsrc->valid_buffer_range, 0,
+                        rsrc->base.width0);
+      }
+   } else {
+      panfrost_batch_read_rsrc(batch, rsrc, stage);
+   }
 }
-
diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c
index 0736ea41492..16516f11d28 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -28,35 +28,36 @@
 
 #include "drm-uapi/panfrost_drm.h"
 
-#include "pan_bo.h"
-#include "pan_context.h"
+#include "util/format/u_format.h"
 #include "util/hash_table.h"
 #include "util/ralloc.h"
-#include "util/format/u_format.h"
-#include "util/u_pack_color.h"
 #include "util/rounding.h"
 #include "util/u_framebuffer.h"
-#include "pan_util.h"
+#include "util/u_pack_color.h"
 #include "decode.h"
+#include "pan_bo.h"
+#include "pan_context.h"
+#include "pan_util.h"
 
-#define foreach_batch(ctx, idx) \
-        BITSET_FOREACH_SET(idx, ctx->batches.active, PAN_MAX_BATCHES)
+#define foreach_batch(ctx, idx)                                                \
+   BITSET_FOREACH_SET(idx, ctx->batches.active, PAN_MAX_BATCHES)
 
 static unsigned
 panfrost_batch_idx(struct panfrost_batch *batch)
 {
-        return batch - batch->ctx->batches.slots;
+   return batch - batch->ctx->batches.slots;
 }
 
 /* Adds the BO backing surface to a batch if the surface is non-null */
 
 static void
-panfrost_batch_add_surface(struct panfrost_batch *batch, struct pipe_surface *surf)
+panfrost_batch_add_surface(struct panfrost_batch *batch,
+                           struct pipe_surface *surf)
 {
-        if (surf) {
-                struct panfrost_resource *rsrc = pan_resource(surf->texture);
-                panfrost_batch_write_rsrc(batch, rsrc, PIPE_SHADER_FRAGMENT);
-        }
+   if (surf) {
+      struct panfrost_resource *rsrc = pan_resource(surf->texture);
+      panfrost_batch_write_rsrc(batch, rsrc, PIPE_SHADER_FRAGMENT);
+   }
 }
 
 static void
@@ -64,115 +65,116 @@ panfrost_batch_init(struct panfrost_context *ctx,
                     const struct pipe_framebuffer_state *key,
                     struct panfrost_batch *batch)
 {
-        struct pipe_screen *pscreen = ctx->base.screen;
-        struct panfrost_screen *screen = pan_screen(pscreen);
-        struct panfrost_device *dev = &screen->dev;
+   struct pipe_screen *pscreen = ctx->base.screen;
+   struct panfrost_screen *screen = pan_screen(pscreen);
+   struct panfrost_device *dev = &screen->dev;
 
-        batch->ctx = ctx;
+   batch->ctx = ctx;
 
-        batch->seqnum = ++ctx->batches.seqnum;
+   batch->seqnum = ++ctx->batches.seqnum;
 
-        util_dynarray_init(&batch->bos, NULL);
+   util_dynarray_init(&batch->bos, NULL);
 
-        batch->minx = batch->miny = ~0;
-        batch->maxx = batch->maxy = 0;
+   batch->minx = batch->miny = ~0;
+   batch->maxx = batch->maxy = 0;
 
-        util_copy_framebuffer_state(&batch->key, key);
+   util_copy_framebuffer_state(&batch->key, key);
 
-        /* Preallocate the main pool, since every batch has at least one job
-         * structure so it will be used */
-        panfrost_pool_init(&batch->pool, NULL, dev, 0, 65536, "Batch pool", true, true);
+   /* Preallocate the main pool, since every batch has at least one job
+    * structure so it will be used */
+   panfrost_pool_init(&batch->pool, NULL, dev, 0, 65536, "Batch pool", true,
+                      true);
 
-        /* Don't preallocate the invisible pool, since not every batch will use
-         * the pre-allocation, particularly if the varyings are larger than the
-         * preallocation and a reallocation is needed after anyway. */
-        panfrost_pool_init(&batch->invisible_pool, NULL, dev,
-                        PAN_BO_INVISIBLE, 65536, "Varyings", false, true);
+   /* Don't preallocate the invisible pool, since not every batch will use
+    * the pre-allocation, particularly if the varyings are larger than the
+    * preallocation and a reallocation is needed after anyway. */
+   panfrost_pool_init(&batch->invisible_pool, NULL, dev, PAN_BO_INVISIBLE,
+                      65536, "Varyings", false, true);
 
-        for (unsigned i = 0; i < batch->key.nr_cbufs; ++i)
-                panfrost_batch_add_surface(batch, batch->key.cbufs[i]);
+   for (unsigned i = 0; i < batch->key.nr_cbufs; ++i)
+      panfrost_batch_add_surface(batch, batch->key.cbufs[i]);
 
-        panfrost_batch_add_surface(batch, batch->key.zsbuf);
+   panfrost_batch_add_surface(batch, batch->key.zsbuf);
 
-        screen->vtbl.init_batch(batch);
+   screen->vtbl.init_batch(batch);
 }
 
 static void
-panfrost_batch_cleanup(struct panfrost_context *ctx, struct panfrost_batch *batch)
+panfrost_batch_cleanup(struct panfrost_context *ctx,
+                       struct panfrost_batch *batch)
 {
-        struct panfrost_device *dev = pan_device(ctx->base.screen);
+   struct panfrost_device *dev = pan_device(ctx->base.screen);
 
-        assert(batch->seqnum);
+   assert(batch->seqnum);
 
-        if (ctx->batch == batch)
-                ctx->batch = NULL;
+   if (ctx->batch == batch)
+      ctx->batch = NULL;
 
-        unsigned batch_idx = panfrost_batch_idx(batch);
+   unsigned batch_idx = panfrost_batch_idx(batch);
 
-        pan_bo_access *flags = util_dynarray_begin(&batch->bos);
-        unsigned end_bo = util_dynarray_num_elements(&batch->bos, pan_bo_access);
+   pan_bo_access *flags = util_dynarray_begin(&batch->bos);
+   unsigned end_bo = util_dynarray_num_elements(&batch->bos, pan_bo_access);
 
-        for (int i = 0; i < end_bo; ++i) {
-                if (!flags[i])
-                        continue;
+   for (int i = 0; i < end_bo; ++i) {
+      if (!flags[i])
+         continue;
 
-                struct panfrost_bo *bo = pan_lookup_bo(dev, i);
-                panfrost_bo_unreference(bo);
-        }
+      struct panfrost_bo *bo = pan_lookup_bo(dev, i);
+      panfrost_bo_unreference(bo);
+   }
 
-        /* There is no more writer for anything we wrote */
-        hash_table_foreach(ctx->writers, ent) {
-                if (ent->data == batch)
-                        _mesa_hash_table_remove(ctx->writers, ent);
-        }
+   /* There is no more writer for anything we wrote */
+   hash_table_foreach(ctx->writers, ent) {
+      if (ent->data == batch)
+         _mesa_hash_table_remove(ctx->writers, ent);
+   }
 
-        panfrost_pool_cleanup(&batch->pool);
-        panfrost_pool_cleanup(&batch->invisible_pool);
+   panfrost_pool_cleanup(&batch->pool);
+   panfrost_pool_cleanup(&batch->invisible_pool);
 
-        util_unreference_framebuffer_state(&batch->key);
+   util_unreference_framebuffer_state(&batch->key);
 
-        util_dynarray_fini(&batch->bos);
+   util_dynarray_fini(&batch->bos);
 
-        memset(batch, 0, sizeof(*batch));
-        BITSET_CLEAR(ctx->batches.active, batch_idx);
+   memset(batch, 0, sizeof(*batch));
+   BITSET_CLEAR(ctx->batches.active, batch_idx);
 }
 
-static void
-panfrost_batch_submit(struct panfrost_context *ctx,
-                      struct panfrost_batch *batch);
+static void panfrost_batch_submit(struct panfrost_context *ctx,
+                                  struct panfrost_batch *batch);
 
 static struct panfrost_batch *
 panfrost_get_batch(struct panfrost_context *ctx,
                    const struct pipe_framebuffer_state *key)
 {
-        struct panfrost_batch *batch = NULL;
+   struct panfrost_batch *batch = NULL;
 
-        for (unsigned i = 0; i < PAN_MAX_BATCHES; i++) {
-                if (ctx->batches.slots[i].seqnum &&
-                    util_framebuffer_state_equal(&ctx->batches.slots[i].key, key)) {
-                        /* We found a match, increase the seqnum for the LRU
-                         * eviction logic.
-                         */
-                        ctx->batches.slots[i].seqnum = ++ctx->batches.seqnum;
-                        return &ctx->batches.slots[i];
-                }
+   for (unsigned i = 0; i < PAN_MAX_BATCHES; i++) {
+      if (ctx->batches.slots[i].seqnum &&
+          util_framebuffer_state_equal(&ctx->batches.slots[i].key, key)) {
+         /* We found a match, increase the seqnum for the LRU
+          * eviction logic.
+          */
+         ctx->batches.slots[i].seqnum = ++ctx->batches.seqnum;
+         return &ctx->batches.slots[i];
+      }
 
-                if (!batch || batch->seqnum > ctx->batches.slots[i].seqnum)
-                        batch = &ctx->batches.slots[i];
-        }
+      if (!batch || batch->seqnum > ctx->batches.slots[i].seqnum)
+         batch = &ctx->batches.slots[i];
+   }
 
-        assert(batch);
+   assert(batch);
 
-        /* The selected slot is used, we need to flush the batch */
-        if (batch->seqnum)
-                panfrost_batch_submit(ctx, batch);
+   /* The selected slot is used, we need to flush the batch */
+   if (batch->seqnum)
+      panfrost_batch_submit(ctx, batch);
 
-        panfrost_batch_init(ctx, key, batch);
+   panfrost_batch_init(ctx, key, batch);
 
-        unsigned batch_idx = panfrost_batch_idx(batch);
-        BITSET_SET(ctx->batches.active, batch_idx);
+   unsigned batch_idx = panfrost_batch_idx(batch);
+   BITSET_SET(ctx->batches.active, batch_idx);
 
-        return batch;
+   return batch;
 }
 
 /* Get the job corresponding to the FBO we're currently rendering into */
@@ -180,152 +182,151 @@ panfrost_get_batch(struct panfrost_context *ctx,
 struct panfrost_batch *
 panfrost_get_batch_for_fbo(struct panfrost_context *ctx)
 {
-        /* If we already began rendering, use that */
+   /* If we already began rendering, use that */
 
-        if (ctx->batch) {
-                assert(util_framebuffer_state_equal(&ctx->batch->key,
-                                                    &ctx->pipe_framebuffer));
-                return ctx->batch;
-        }
+   if (ctx->batch) {
+      assert(util_framebuffer_state_equal(&ctx->batch->key,
+                                          &ctx->pipe_framebuffer));
+      return ctx->batch;
+   }
 
-        /* If not, look up the job */
-        struct panfrost_batch *batch = panfrost_get_batch(ctx,
-                                                          &ctx->pipe_framebuffer);
+   /* If not, look up the job */
+   struct panfrost_batch *batch =
+      panfrost_get_batch(ctx, &ctx->pipe_framebuffer);
 
-        /* Set this job as the current FBO job. Will be reset when updating the
-         * FB state and when submitting or releasing a job.
-         */
-        ctx->batch = batch;
-        panfrost_dirty_state_all(ctx);
-        return batch;
+   /* Set this job as the current FBO job. Will be reset when updating the
+    * FB state and when submitting or releasing a job.
+    */
+   ctx->batch = batch;
+   panfrost_dirty_state_all(ctx);
+   return batch;
 }
 
 struct panfrost_batch *
-panfrost_get_fresh_batch_for_fbo(struct panfrost_context *ctx, const char *reason)
+panfrost_get_fresh_batch_for_fbo(struct panfrost_context *ctx,
+                                 const char *reason)
 {
-        struct panfrost_batch *batch;
+   struct panfrost_batch *batch;
 
-        batch = panfrost_get_batch(ctx, &ctx->pipe_framebuffer);
-        panfrost_dirty_state_all(ctx);
+   batch = panfrost_get_batch(ctx, &ctx->pipe_framebuffer);
+   panfrost_dirty_state_all(ctx);
 
-        /* We only need to submit and get a fresh batch if there is no
-         * draw/clear queued. Otherwise we may reuse the batch. */
+   /* We only need to submit and get a fresh batch if there is no
+    * draw/clear queued. Otherwise we may reuse the batch. */
 
-        if (batch->scoreboard.first_job) {
-                perf_debug_ctx(ctx, "Flushing the current FBO due to: %s", reason);
-                panfrost_batch_submit(ctx, batch);
-                batch = panfrost_get_batch(ctx, &ctx->pipe_framebuffer);
-        }
+   if (batch->scoreboard.first_job) {
+      perf_debug_ctx(ctx, "Flushing the current FBO due to: %s", reason);
+      panfrost_batch_submit(ctx, batch);
+      batch = panfrost_get_batch(ctx, &ctx->pipe_framebuffer);
+   }
 
-        ctx->batch = batch;
-        return batch;
+   ctx->batch = batch;
+   return batch;
 }
 
-static bool
-panfrost_batch_uses_resource(struct panfrost_batch *batch,
-                             struct panfrost_resource *rsrc);
+static bool panfrost_batch_uses_resource(struct panfrost_batch *batch,
+                                         struct panfrost_resource *rsrc);
 
 static void
 panfrost_batch_update_access(struct panfrost_batch *batch,
                              struct panfrost_resource *rsrc, bool writes)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        uint32_t batch_idx = panfrost_batch_idx(batch);
-        struct hash_entry *entry = _mesa_hash_table_search(ctx->writers, rsrc);
-        struct panfrost_batch *writer = entry ? entry->data : NULL;
+   struct panfrost_context *ctx = batch->ctx;
+   uint32_t batch_idx = panfrost_batch_idx(batch);
+   struct hash_entry *entry = _mesa_hash_table_search(ctx->writers, rsrc);
+   struct panfrost_batch *writer = entry ? entry->data : NULL;
 
-        /* Both reads and writes flush the existing writer */
-        if (writer != NULL && writer != batch)
-                panfrost_batch_submit(ctx, writer);
+   /* Both reads and writes flush the existing writer */
+   if (writer != NULL && writer != batch)
+      panfrost_batch_submit(ctx, writer);
 
-        /* Writes (only) flush readers too */
-        if (writes) {
-                unsigned i;
-                foreach_batch(ctx, i) {
-                        struct panfrost_batch *batch = &ctx->batches.slots[i];
+   /* Writes (only) flush readers too */
+   if (writes) {
+      unsigned i;
+      foreach_batch(ctx, i) {
+         struct panfrost_batch *batch = &ctx->batches.slots[i];
 
-                        /* Skip the entry if this our batch. */
-                        if (i == batch_idx)
-                                continue;
+         /* Skip the entry if this our batch. */
+         if (i == batch_idx)
+            continue;
 
-                        /* Submit if it's a user */
-                        if (panfrost_batch_uses_resource(batch, rsrc))
-                                panfrost_batch_submit(ctx, batch);
-                }
-        }
+         /* Submit if it's a user */
+         if (panfrost_batch_uses_resource(batch, rsrc))
+            panfrost_batch_submit(ctx, batch);
+      }
+   }
 
-        if (writes) {
-                _mesa_hash_table_insert(ctx->writers, rsrc, batch);
-        }
+   if (writes) {
+      _mesa_hash_table_insert(ctx->writers, rsrc, batch);
+   }
 }
 
 static pan_bo_access *
 panfrost_batch_get_bo_access(struct panfrost_batch *batch, unsigned handle)
 {
-        unsigned size = util_dynarray_num_elements(&batch->bos, pan_bo_access);
+   unsigned size = util_dynarray_num_elements(&batch->bos, pan_bo_access);
 
-        if (handle >= size) {
-                unsigned grow = handle + 1 - size;
+   if (handle >= size) {
+      unsigned grow = handle + 1 - size;
 
-                memset(util_dynarray_grow(&batch->bos, pan_bo_access, grow),
-                       0, grow * sizeof(pan_bo_access));
-        }
+      memset(util_dynarray_grow(&batch->bos, pan_bo_access, grow), 0,
+             grow * sizeof(pan_bo_access));
+   }
 
-        return util_dynarray_element(&batch->bos, pan_bo_access, handle);
+   return util_dynarray_element(&batch->bos, pan_bo_access, handle);
 }
 
 static bool
 panfrost_batch_uses_resource(struct panfrost_batch *batch,
                              struct panfrost_resource *rsrc)
 {
-        /* A resource is used iff its current BO is used */
-        uint32_t handle = rsrc->image.data.bo->gem_handle;
-        unsigned size = util_dynarray_num_elements(&batch->bos, pan_bo_access);
+   /* A resource is used iff its current BO is used */
+   uint32_t handle = rsrc->image.data.bo->gem_handle;
+   unsigned size = util_dynarray_num_elements(&batch->bos, pan_bo_access);
 
-        /* If out of bounds, certainly not used */
-        if (handle >= size)
-                return false;
+   /* If out of bounds, certainly not used */
+   if (handle >= size)
+      return false;
 
-        /* Otherwise check if nonzero access */
-        return !!(*util_dynarray_element(&batch->bos, pan_bo_access, handle));
+   /* Otherwise check if nonzero access */
+   return !!(*util_dynarray_element(&batch->bos, pan_bo_access, handle));
 }
 
 static void
-panfrost_batch_add_bo_old(struct panfrost_batch *batch,
-                struct panfrost_bo *bo, uint32_t flags)
+panfrost_batch_add_bo_old(struct panfrost_batch *batch, struct panfrost_bo *bo,
+                          uint32_t flags)
 {
-        if (!bo)
-                return;
+   if (!bo)
+      return;
 
-        pan_bo_access *entry =
-                panfrost_batch_get_bo_access(batch, bo->gem_handle);
-        pan_bo_access old_flags = *entry;
+   pan_bo_access *entry = panfrost_batch_get_bo_access(batch, bo->gem_handle);
+   pan_bo_access old_flags = *entry;
 
-        if (!old_flags) {
-                batch->num_bos++;
-                panfrost_bo_reference(bo);
-        }
+   if (!old_flags) {
+      batch->num_bos++;
+      panfrost_bo_reference(bo);
+   }
 
-        if (old_flags == flags)
-                return;
+   if (old_flags == flags)
+      return;
 
-        flags |= old_flags;
-        *entry = flags;
+   flags |= old_flags;
+   *entry = flags;
 }
 
 static uint32_t
 panfrost_access_for_stage(enum pipe_shader_type stage)
 {
-        return (stage == PIPE_SHADER_FRAGMENT) ?
-                PAN_BO_ACCESS_FRAGMENT : PAN_BO_ACCESS_VERTEX_TILER;
+   return (stage == PIPE_SHADER_FRAGMENT) ? PAN_BO_ACCESS_FRAGMENT
+                                          : PAN_BO_ACCESS_VERTEX_TILER;
 }
 
 void
-panfrost_batch_add_bo(struct panfrost_batch *batch,
-                struct panfrost_bo *bo, enum pipe_shader_type stage)
+panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo,
+                      enum pipe_shader_type stage)
 {
-        panfrost_batch_add_bo_old(batch, bo, PAN_BO_ACCESS_READ |
-                        panfrost_access_for_stage(stage));
+   panfrost_batch_add_bo_old(
+      batch, bo, PAN_BO_ACCESS_READ | panfrost_access_for_stage(stage));
 }
 
 void
@@ -333,31 +334,31 @@ panfrost_batch_read_rsrc(struct panfrost_batch *batch,
                          struct panfrost_resource *rsrc,
                          enum pipe_shader_type stage)
 {
-        uint32_t access = PAN_BO_ACCESS_READ |
-                panfrost_access_for_stage(stage);
+   uint32_t access = PAN_BO_ACCESS_READ | panfrost_access_for_stage(stage);
 
-        panfrost_batch_add_bo_old(batch, rsrc->image.data.bo, access);
+   panfrost_batch_add_bo_old(batch, rsrc->image.data.bo, access);
 
-        if (rsrc->separate_stencil)
-                panfrost_batch_add_bo_old(batch, rsrc->separate_stencil->image.data.bo, access);
+   if (rsrc->separate_stencil)
+      panfrost_batch_add_bo_old(batch, rsrc->separate_stencil->image.data.bo,
+                                access);
 
-        panfrost_batch_update_access(batch, rsrc, false);
+   panfrost_batch_update_access(batch, rsrc, false);
 }
 
 void
 panfrost_batch_write_rsrc(struct panfrost_batch *batch,
-                         struct panfrost_resource *rsrc,
-                         enum pipe_shader_type stage)
+                          struct panfrost_resource *rsrc,
+                          enum pipe_shader_type stage)
 {
-        uint32_t access = PAN_BO_ACCESS_WRITE |
-                panfrost_access_for_stage(stage);
+   uint32_t access = PAN_BO_ACCESS_WRITE | panfrost_access_for_stage(stage);
 
-        panfrost_batch_add_bo_old(batch, rsrc->image.data.bo, access);
+   panfrost_batch_add_bo_old(batch, rsrc->image.data.bo, access);
 
-        if (rsrc->separate_stencil)
-                panfrost_batch_add_bo_old(batch, rsrc->separate_stencil->image.data.bo, access);
+   if (rsrc->separate_stencil)
+      panfrost_batch_add_bo_old(batch, rsrc->separate_stencil->image.data.bo,
+                                access);
 
-        panfrost_batch_update_access(batch, rsrc, true);
+   panfrost_batch_update_access(batch, rsrc, true);
 }
 
 struct panfrost_bo *
@@ -365,324 +366,321 @@ panfrost_batch_create_bo(struct panfrost_batch *batch, size_t size,
                          uint32_t create_flags, enum pipe_shader_type stage,
                          const char *label)
 {
-        struct panfrost_bo *bo;
+   struct panfrost_bo *bo;
 
-        bo = panfrost_bo_create(pan_device(batch->ctx->base.screen), size,
-                                create_flags, label);
-        panfrost_batch_add_bo(batch, bo, stage);
+   bo = panfrost_bo_create(pan_device(batch->ctx->base.screen), size,
+                           create_flags, label);
+   panfrost_batch_add_bo(batch, bo, stage);
 
-        /* panfrost_batch_add_bo() has retained a reference and
-         * panfrost_bo_create() initialize the refcnt to 1, so let's
-         * unreference the BO here so it gets released when the batch is
-         * destroyed (unless it's retained by someone else in the meantime).
-         */
-        panfrost_bo_unreference(bo);
-        return bo;
+   /* panfrost_batch_add_bo() has retained a reference and
+    * panfrost_bo_create() initialize the refcnt to 1, so let's
+    * unreference the BO here so it gets released when the batch is
+    * destroyed (unless it's retained by someone else in the meantime).
+    */
+   panfrost_bo_unreference(bo);
+   return bo;
 }
 
 struct panfrost_bo *
 panfrost_batch_get_scratchpad(struct panfrost_batch *batch,
-                unsigned size_per_thread,
-                unsigned thread_tls_alloc,
-                unsigned core_id_range)
+                              unsigned size_per_thread,
+                              unsigned thread_tls_alloc, unsigned core_id_range)
 {
-        unsigned size = panfrost_get_total_stack_size(size_per_thread,
-                        thread_tls_alloc,
-                        core_id_range);
+   unsigned size = panfrost_get_total_stack_size(
+      size_per_thread, thread_tls_alloc, core_id_range);
 
-        if (batch->scratchpad) {
-                assert(batch->scratchpad->size >= size);
-        } else {
-                batch->scratchpad = panfrost_batch_create_bo(batch, size,
-                                             PAN_BO_INVISIBLE,
-                                             PIPE_SHADER_VERTEX,
-                                             "Thread local storage");
+   if (batch->scratchpad) {
+      assert(batch->scratchpad->size >= size);
+   } else {
+      batch->scratchpad =
+         panfrost_batch_create_bo(batch, size, PAN_BO_INVISIBLE,
+                                  PIPE_SHADER_VERTEX, "Thread local storage");
 
-                panfrost_batch_add_bo(batch, batch->scratchpad,
-                                PIPE_SHADER_FRAGMENT);
-        }
+      panfrost_batch_add_bo(batch, batch->scratchpad, PIPE_SHADER_FRAGMENT);
+   }
 
-        return batch->scratchpad;
+   return batch->scratchpad;
 }
 
 struct panfrost_bo *
-panfrost_batch_get_shared_memory(struct panfrost_batch *batch,
-                unsigned size,
-                unsigned workgroup_count)
+panfrost_batch_get_shared_memory(struct panfrost_batch *batch, unsigned size,
+                                 unsigned workgroup_count)
 {
-        if (batch->shared_memory) {
-                assert(batch->shared_memory->size >= size);
-        } else {
-                batch->shared_memory = panfrost_batch_create_bo(batch, size,
-                                             PAN_BO_INVISIBLE,
-                                             PIPE_SHADER_VERTEX,
-                                             "Workgroup shared memory");
-        }
+   if (batch->shared_memory) {
+      assert(batch->shared_memory->size >= size);
+   } else {
+      batch->shared_memory = panfrost_batch_create_bo(
+         batch, size, PAN_BO_INVISIBLE, PIPE_SHADER_VERTEX,
+         "Workgroup shared memory");
+   }
 
-        return batch->shared_memory;
+   return batch->shared_memory;
 }
 
 static void
 panfrost_batch_to_fb_info(const struct panfrost_batch *batch,
-                          struct pan_fb_info *fb,
-                          struct pan_image_view *rts,
-                          struct pan_image_view *zs,
-                          struct pan_image_view *s,
+                          struct pan_fb_info *fb, struct pan_image_view *rts,
+                          struct pan_image_view *zs, struct pan_image_view *s,
                           bool reserve)
 {
-        memset(fb, 0, sizeof(*fb));
-        memset(rts, 0, sizeof(*rts) * 8);
-        memset(zs, 0, sizeof(*zs));
-        memset(s, 0, sizeof(*s));
+   memset(fb, 0, sizeof(*fb));
+   memset(rts, 0, sizeof(*rts) * 8);
+   memset(zs, 0, sizeof(*zs));
+   memset(s, 0, sizeof(*s));
 
-        fb->width = batch->key.width;
-        fb->height = batch->key.height;
-        fb->extent.minx = batch->minx;
-        fb->extent.miny = batch->miny;
-        fb->extent.maxx = batch->maxx - 1;
-        fb->extent.maxy = batch->maxy - 1;
-        fb->nr_samples = util_framebuffer_get_num_samples(&batch->key);
-        fb->rt_count = batch->key.nr_cbufs;
-        fb->sprite_coord_origin = pan_tristate_get(batch->sprite_coord_origin);
-        fb->first_provoking_vertex = pan_tristate_get(batch->first_provoking_vertex);
+   fb->width = batch->key.width;
+   fb->height = batch->key.height;
+   fb->extent.minx = batch->minx;
+   fb->extent.miny = batch->miny;
+   fb->extent.maxx = batch->maxx - 1;
+   fb->extent.maxy = batch->maxy - 1;
+   fb->nr_samples = util_framebuffer_get_num_samples(&batch->key);
+   fb->rt_count = batch->key.nr_cbufs;
+   fb->sprite_coord_origin = pan_tristate_get(batch->sprite_coord_origin);
+   fb->first_provoking_vertex = pan_tristate_get(batch->first_provoking_vertex);
 
-        static const unsigned char id_swz[] = {
-                PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
-        };
+   static const unsigned char id_swz[] = {
+      PIPE_SWIZZLE_X,
+      PIPE_SWIZZLE_Y,
+      PIPE_SWIZZLE_Z,
+      PIPE_SWIZZLE_W,
+   };
 
-        for (unsigned i = 0; i < fb->rt_count; i++) {
-                struct pipe_surface *surf = batch->key.cbufs[i];
+   for (unsigned i = 0; i < fb->rt_count; i++) {
+      struct pipe_surface *surf = batch->key.cbufs[i];
 
-                if (!surf)
-                        continue;
+      if (!surf)
+         continue;
 
-                struct panfrost_resource *prsrc = pan_resource(surf->texture);
-                unsigned mask = PIPE_CLEAR_COLOR0 << i;
+      struct panfrost_resource *prsrc = pan_resource(surf->texture);
+      unsigned mask = PIPE_CLEAR_COLOR0 << i;
 
-                if (batch->clear & mask) {
-                        fb->rts[i].clear = true;
-                        memcpy(fb->rts[i].clear_value, batch->clear_color[i],
-                               sizeof((fb->rts[i].clear_value)));
-                }
+      if (batch->clear & mask) {
+         fb->rts[i].clear = true;
+         memcpy(fb->rts[i].clear_value, batch->clear_color[i],
+                sizeof((fb->rts[i].clear_value)));
+      }
 
-                fb->rts[i].discard = !reserve && !(batch->resolve & mask);
+      fb->rts[i].discard = !reserve && !(batch->resolve & mask);
 
-                rts[i].format = surf->format;
-                rts[i].dim = MALI_TEXTURE_DIMENSION_2D;
-                rts[i].last_level = rts[i].first_level = surf->u.tex.level;
-                rts[i].first_layer = surf->u.tex.first_layer;
-                rts[i].last_layer = surf->u.tex.last_layer;
-                rts[i].image = &prsrc->image;
-                rts[i].nr_samples = surf->nr_samples ? : MAX2(surf->texture->nr_samples, 1);
-                memcpy(rts[i].swizzle, id_swz, sizeof(rts[i].swizzle));
-                fb->rts[i].crc_valid = &prsrc->valid.crc;
-                fb->rts[i].view = &rts[i];
+      rts[i].format = surf->format;
+      rts[i].dim = MALI_TEXTURE_DIMENSION_2D;
+      rts[i].last_level = rts[i].first_level = surf->u.tex.level;
+      rts[i].first_layer = surf->u.tex.first_layer;
+      rts[i].last_layer = surf->u.tex.last_layer;
+      rts[i].image = &prsrc->image;
+      rts[i].nr_samples =
+         surf->nr_samples ?: MAX2(surf->texture->nr_samples, 1);
+      memcpy(rts[i].swizzle, id_swz, sizeof(rts[i].swizzle));
+      fb->rts[i].crc_valid = &prsrc->valid.crc;
+      fb->rts[i].view = &rts[i];
 
-                /* Preload if the RT is read or updated */
-                if (!(batch->clear & mask) &&
-                    ((batch->read & mask) ||
-                     ((batch->draws & mask) &&
-                      BITSET_TEST(prsrc->valid.data, fb->rts[i].view->first_level))))
-                        fb->rts[i].preload = true;
+      /* Preload if the RT is read or updated */
+      if (!(batch->clear & mask) &&
+          ((batch->read & mask) ||
+           ((batch->draws & mask) &&
+            BITSET_TEST(prsrc->valid.data, fb->rts[i].view->first_level))))
+         fb->rts[i].preload = true;
+   }
 
-        }
+   const struct pan_image_view *s_view = NULL, *z_view = NULL;
+   struct panfrost_resource *z_rsrc = NULL, *s_rsrc = NULL;
 
-        const struct pan_image_view *s_view = NULL, *z_view = NULL;
-        struct panfrost_resource *z_rsrc = NULL, *s_rsrc = NULL;
+   if (batch->key.zsbuf) {
+      struct pipe_surface *surf = batch->key.zsbuf;
+      z_rsrc = pan_resource(surf->texture);
 
-        if (batch->key.zsbuf) {
-                struct pipe_surface *surf = batch->key.zsbuf;
-                z_rsrc = pan_resource(surf->texture);
+      zs->format = surf->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
+                      ? PIPE_FORMAT_Z32_FLOAT
+                      : surf->format;
+      zs->dim = MALI_TEXTURE_DIMENSION_2D;
+      zs->last_level = zs->first_level = surf->u.tex.level;
+      zs->first_layer = surf->u.tex.first_layer;
+      zs->last_layer = surf->u.tex.last_layer;
+      zs->image = &z_rsrc->image;
+      zs->nr_samples = surf->nr_samples ?: MAX2(surf->texture->nr_samples, 1);
+      memcpy(zs->swizzle, id_swz, sizeof(zs->swizzle));
+      fb->zs.view.zs = zs;
+      z_view = zs;
+      if (util_format_is_depth_and_stencil(zs->format)) {
+         s_view = zs;
+         s_rsrc = z_rsrc;
+      }
 
-                zs->format = surf->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ?
-                             PIPE_FORMAT_Z32_FLOAT : surf->format;
-                zs->dim = MALI_TEXTURE_DIMENSION_2D;
-                zs->last_level = zs->first_level = surf->u.tex.level;
-                zs->first_layer = surf->u.tex.first_layer;
-                zs->last_layer = surf->u.tex.last_layer;
-                zs->image = &z_rsrc->image;
-                zs->nr_samples = surf->nr_samples ? : MAX2(surf->texture->nr_samples, 1);
-                memcpy(zs->swizzle, id_swz, sizeof(zs->swizzle));
-                fb->zs.view.zs = zs;
-                z_view = zs;
-                if (util_format_is_depth_and_stencil(zs->format)) {
-                        s_view = zs;
-                        s_rsrc = z_rsrc;
-                }
+      if (z_rsrc->separate_stencil) {
+         s_rsrc = z_rsrc->separate_stencil;
+         s->format = PIPE_FORMAT_S8_UINT;
+         s->dim = MALI_TEXTURE_DIMENSION_2D;
+         s->last_level = s->first_level = surf->u.tex.level;
+         s->first_layer = surf->u.tex.first_layer;
+         s->last_layer = surf->u.tex.last_layer;
+         s->image = &s_rsrc->image;
+         s->nr_samples = surf->nr_samples ?: MAX2(surf->texture->nr_samples, 1);
+         memcpy(s->swizzle, id_swz, sizeof(s->swizzle));
+         fb->zs.view.s = s;
+         s_view = s;
+      }
+   }
 
-                if (z_rsrc->separate_stencil) {
-                        s_rsrc = z_rsrc->separate_stencil;
-                        s->format = PIPE_FORMAT_S8_UINT;
-                        s->dim = MALI_TEXTURE_DIMENSION_2D;
-                        s->last_level = s->first_level = surf->u.tex.level;
-                        s->first_layer = surf->u.tex.first_layer;
-                        s->last_layer = surf->u.tex.last_layer;
-                        s->image = &s_rsrc->image;
-                        s->nr_samples = surf->nr_samples ? : MAX2(surf->texture->nr_samples, 1);
-                        memcpy(s->swizzle, id_swz, sizeof(s->swizzle));
-                        fb->zs.view.s = s;
-                        s_view = s;
-                }
-        }
+   if (batch->clear & PIPE_CLEAR_DEPTH) {
+      fb->zs.clear.z = true;
+      fb->zs.clear_value.depth = batch->clear_depth;
+   }
 
-        if (batch->clear & PIPE_CLEAR_DEPTH) {
-                fb->zs.clear.z = true;
-                fb->zs.clear_value.depth = batch->clear_depth;
-        }
+   if (batch->clear & PIPE_CLEAR_STENCIL) {
+      fb->zs.clear.s = true;
+      fb->zs.clear_value.stencil = batch->clear_stencil;
+   }
 
-        if (batch->clear & PIPE_CLEAR_STENCIL) {
-                fb->zs.clear.s = true;
-                fb->zs.clear_value.stencil = batch->clear_stencil;
-        }
+   fb->zs.discard.z = !reserve && !(batch->resolve & PIPE_CLEAR_DEPTH);
+   fb->zs.discard.s = !reserve && !(batch->resolve & PIPE_CLEAR_STENCIL);
 
-        fb->zs.discard.z = !reserve && !(batch->resolve & PIPE_CLEAR_DEPTH);
-        fb->zs.discard.s = !reserve && !(batch->resolve & PIPE_CLEAR_STENCIL);
+   if (!fb->zs.clear.z && z_rsrc &&
+       ((batch->read & PIPE_CLEAR_DEPTH) ||
+        ((batch->draws & PIPE_CLEAR_DEPTH) &&
+         BITSET_TEST(z_rsrc->valid.data, z_view->first_level))))
+      fb->zs.preload.z = true;
 
-        if (!fb->zs.clear.z && z_rsrc &&
-            ((batch->read & PIPE_CLEAR_DEPTH) ||
-             ((batch->draws & PIPE_CLEAR_DEPTH) &&
-              BITSET_TEST(z_rsrc->valid.data, z_view->first_level))))
-                fb->zs.preload.z = true;
+   if (!fb->zs.clear.s && s_rsrc &&
+       ((batch->read & PIPE_CLEAR_STENCIL) ||
+        ((batch->draws & PIPE_CLEAR_STENCIL) &&
+         BITSET_TEST(s_rsrc->valid.data, s_view->first_level))))
+      fb->zs.preload.s = true;
 
-        if (!fb->zs.clear.s && s_rsrc &&
-            ((batch->read & PIPE_CLEAR_STENCIL) ||
-             ((batch->draws & PIPE_CLEAR_STENCIL) &&
-              BITSET_TEST(s_rsrc->valid.data, s_view->first_level))))
-                fb->zs.preload.s = true;
+   /* Preserve both component if we have a combined ZS view and
+    * one component needs to be preserved.
+    */
+   if (z_view && z_view == s_view && fb->zs.discard.z != fb->zs.discard.s) {
+      bool valid = BITSET_TEST(z_rsrc->valid.data, z_view->first_level);
 
-        /* Preserve both component if we have a combined ZS view and
-         * one component needs to be preserved.
-         */
-        if (z_view && z_view == s_view && fb->zs.discard.z != fb->zs.discard.s) {
-                bool valid = BITSET_TEST(z_rsrc->valid.data, z_view->first_level);
-
-                fb->zs.discard.z = false;
-                fb->zs.discard.s = false;
-                fb->zs.preload.z = !fb->zs.clear.z && valid;
-                fb->zs.preload.s = !fb->zs.clear.s && valid;
-        }
+      fb->zs.discard.z = false;
+      fb->zs.discard.s = false;
+      fb->zs.preload.z = !fb->zs.clear.z && valid;
+      fb->zs.preload.s = !fb->zs.clear.s && valid;
+   }
 }
 
 static int
 panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
-                            mali_ptr first_job_desc,
-                            uint32_t reqs,
-                            uint32_t in_sync,
-                            uint32_t out_sync)
+                            mali_ptr first_job_desc, uint32_t reqs,
+                            uint32_t in_sync, uint32_t out_sync)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        struct pipe_context *gallium = (struct pipe_context *) ctx;
-        struct panfrost_device *dev = pan_device(gallium->screen);
-        struct drm_panfrost_submit submit = {0,};
-        uint32_t in_syncs[2];
-        uint32_t *bo_handles;
-        int ret;
+   struct panfrost_context *ctx = batch->ctx;
+   struct pipe_context *gallium = (struct pipe_context *)ctx;
+   struct panfrost_device *dev = pan_device(gallium->screen);
+   struct drm_panfrost_submit submit = {
+      0,
+   };
+   uint32_t in_syncs[2];
+   uint32_t *bo_handles;
+   int ret;
 
-        /* If we trace, we always need a syncobj, so make one of our own if we
-         * weren't given one to use. Remember that we did so, so we can free it
-         * after we're done but preventing double-frees if we were given a
-         * syncobj */
+   /* If we trace, we always need a syncobj, so make one of our own if we
+    * weren't given one to use. Remember that we did so, so we can free it
+    * after we're done but preventing double-frees if we were given a
+    * syncobj */
 
-        if (!out_sync && dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
-                out_sync = ctx->syncobj;
+   if (!out_sync && dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
+      out_sync = ctx->syncobj;
 
-        submit.out_sync = out_sync;
-        submit.jc = first_job_desc;
-        submit.requirements = reqs;
+   submit.out_sync = out_sync;
+   submit.jc = first_job_desc;
+   submit.requirements = reqs;
 
-        if (in_sync)
-                in_syncs[submit.in_sync_count++] = in_sync;
+   if (in_sync)
+      in_syncs[submit.in_sync_count++] = in_sync;
 
-        if (ctx->in_sync_fd >= 0) {
-                ret = drmSyncobjImportSyncFile(dev->fd, ctx->in_sync_obj,
-                                               ctx->in_sync_fd);
-                assert(!ret);
+   if (ctx->in_sync_fd >= 0) {
+      ret =
+         drmSyncobjImportSyncFile(dev->fd, ctx->in_sync_obj, ctx->in_sync_fd);
+      assert(!ret);
 
-                in_syncs[submit.in_sync_count++] = ctx->in_sync_obj;
-                close(ctx->in_sync_fd);
-                ctx->in_sync_fd = -1;
-        }
+      in_syncs[submit.in_sync_count++] = ctx->in_sync_obj;
+      close(ctx->in_sync_fd);
+      ctx->in_sync_fd = -1;
+   }
 
-        if (submit.in_sync_count)
-                submit.in_syncs = (uintptr_t)in_syncs;
+   if (submit.in_sync_count)
+      submit.in_syncs = (uintptr_t)in_syncs;
 
-        bo_handles = calloc(panfrost_pool_num_bos(&batch->pool) +
-                            panfrost_pool_num_bos(&batch->invisible_pool) +
-                            batch->num_bos + 2,
-                            sizeof(*bo_handles));
-        assert(bo_handles);
+   bo_handles = calloc(panfrost_pool_num_bos(&batch->pool) +
+                          panfrost_pool_num_bos(&batch->invisible_pool) +
+                          batch->num_bos + 2,
+                       sizeof(*bo_handles));
+   assert(bo_handles);
 
-        pan_bo_access *flags = util_dynarray_begin(&batch->bos);
-        unsigned end_bo = util_dynarray_num_elements(&batch->bos, pan_bo_access);
+   pan_bo_access *flags = util_dynarray_begin(&batch->bos);
+   unsigned end_bo = util_dynarray_num_elements(&batch->bos, pan_bo_access);
 
-        for (int i = 0; i < end_bo; ++i) {
-                if (!flags[i])
-                        continue;
+   for (int i = 0; i < end_bo; ++i) {
+      if (!flags[i])
+         continue;
 
-                assert(submit.bo_handle_count < batch->num_bos);
-                bo_handles[submit.bo_handle_count++] = i;
+      assert(submit.bo_handle_count < batch->num_bos);
+      bo_handles[submit.bo_handle_count++] = i;
 
-                /* Update the BO access flags so that panfrost_bo_wait() knows
-                 * about all pending accesses.
-                 * We only keep the READ/WRITE info since this is all the BO
-                 * wait logic cares about.
-                 * We also preserve existing flags as this batch might not
-                 * be the first one to access the BO.
-                 */
-                struct panfrost_bo *bo = pan_lookup_bo(dev, i);
+      /* Update the BO access flags so that panfrost_bo_wait() knows
+       * about all pending accesses.
+       * We only keep the READ/WRITE info since this is all the BO
+       * wait logic cares about.
+       * We also preserve existing flags as this batch might not
+       * be the first one to access the BO.
+       */
+      struct panfrost_bo *bo = pan_lookup_bo(dev, i);
 
-                bo->gpu_access |= flags[i] & (PAN_BO_ACCESS_RW);
-        }
+      bo->gpu_access |= flags[i] & (PAN_BO_ACCESS_RW);
+   }
 
-        panfrost_pool_get_bo_handles(&batch->pool, bo_handles + submit.bo_handle_count);
-        submit.bo_handle_count += panfrost_pool_num_bos(&batch->pool);
-        panfrost_pool_get_bo_handles(&batch->invisible_pool, bo_handles + submit.bo_handle_count);
-        submit.bo_handle_count += panfrost_pool_num_bos(&batch->invisible_pool);
+   panfrost_pool_get_bo_handles(&batch->pool,
+                                bo_handles + submit.bo_handle_count);
+   submit.bo_handle_count += panfrost_pool_num_bos(&batch->pool);
+   panfrost_pool_get_bo_handles(&batch->invisible_pool,
+                                bo_handles + submit.bo_handle_count);
+   submit.bo_handle_count += panfrost_pool_num_bos(&batch->invisible_pool);
 
-        /* Add the tiler heap to the list of accessed BOs if the batch has at
-         * least one tiler job. Tiler heap is written by tiler jobs and read
-         * by fragment jobs (the polygon list is coming from this heap).
-         */
-        if (batch->scoreboard.first_tiler)
-                bo_handles[submit.bo_handle_count++] = dev->tiler_heap->gem_handle;
+   /* Add the tiler heap to the list of accessed BOs if the batch has at
+    * least one tiler job. Tiler heap is written by tiler jobs and read
+    * by fragment jobs (the polygon list is coming from this heap).
+    */
+   if (batch->scoreboard.first_tiler)
+      bo_handles[submit.bo_handle_count++] = dev->tiler_heap->gem_handle;
 
-        /* Always used on Bifrost, occassionally used on Midgard */
-        bo_handles[submit.bo_handle_count++] = dev->sample_positions->gem_handle;
+   /* Always used on Bifrost, occassionally used on Midgard */
+   bo_handles[submit.bo_handle_count++] = dev->sample_positions->gem_handle;
 
-        submit.bo_handles = (u64) (uintptr_t) bo_handles;
-        if (ctx->is_noop)
-                ret = 0;
-        else
-                ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
-        free(bo_handles);
+   submit.bo_handles = (u64)(uintptr_t)bo_handles;
+   if (ctx->is_noop)
+      ret = 0;
+   else
+      ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
+   free(bo_handles);
 
-        if (ret)
-                return errno;
+   if (ret)
+      return errno;
 
-        /* Trace the job if we're doing that */
-        if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {
-                /* Wait so we can get errors reported back */
-                drmSyncobjWait(dev->fd, &out_sync, 1,
-                               INT64_MAX, 0, NULL);
+   /* Trace the job if we're doing that */
+   if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {
+      /* Wait so we can get errors reported back */
+      drmSyncobjWait(dev->fd, &out_sync, 1, INT64_MAX, 0, NULL);
 
-                if (dev->debug & PAN_DBG_TRACE)
-                        pandecode_jc(submit.jc, dev->gpu_id);
+      if (dev->debug & PAN_DBG_TRACE)
+         pandecode_jc(submit.jc, dev->gpu_id);
 
-                if (dev->debug & PAN_DBG_DUMP)
-                        pandecode_dump_mappings();
+      if (dev->debug & PAN_DBG_DUMP)
+         pandecode_dump_mappings();
 
-                /* Jobs won't be complete if blackhole rendering, that's ok */
-                if (!ctx->is_noop && dev->debug & PAN_DBG_SYNC)
-                        pandecode_abort_on_fault(submit.jc, dev->gpu_id);
-        }
+      /* Jobs won't be complete if blackhole rendering, that's ok */
+      if (!ctx->is_noop && dev->debug & PAN_DBG_SYNC)
+         pandecode_abort_on_fault(submit.jc, dev->gpu_id);
+   }
 
-        return 0;
+   return 0;
 }
 
 static bool
 panfrost_has_fragment_job(struct panfrost_batch *batch)
 {
-        return batch->scoreboard.first_tiler || batch->clear;
+   return batch->scoreboard.first_tiler || batch->clear;
 }
 
 /* Submit both vertex/tiler and fragment jobs for a batch, possibly with an
@@ -691,141 +689,137 @@ panfrost_has_fragment_job(struct panfrost_batch *batch)
 
 static int
 panfrost_batch_submit_jobs(struct panfrost_batch *batch,
-                           const struct pan_fb_info *fb,
-                           uint32_t in_sync, uint32_t out_sync)
+                           const struct pan_fb_info *fb, uint32_t in_sync,
+                           uint32_t out_sync)
 {
-        struct pipe_screen *pscreen = batch->ctx->base.screen;
-        struct panfrost_screen *screen = pan_screen(pscreen);
-        struct panfrost_device *dev = pan_device(pscreen);
-        bool has_draws = batch->scoreboard.first_job;
-        bool has_tiler = batch->scoreboard.first_tiler;
-        bool has_frag = panfrost_has_fragment_job(batch);
-        int ret = 0;
+   struct pipe_screen *pscreen = batch->ctx->base.screen;
+   struct panfrost_screen *screen = pan_screen(pscreen);
+   struct panfrost_device *dev = pan_device(pscreen);
+   bool has_draws = batch->scoreboard.first_job;
+   bool has_tiler = batch->scoreboard.first_tiler;
+   bool has_frag = panfrost_has_fragment_job(batch);
+   int ret = 0;
 
-        /* Take the submit lock to make sure no tiler jobs from other context
-         * are inserted between our tiler and fragment jobs, failing to do that
-         * might result in tiler heap corruption.
-         */
-        if (has_tiler)
-                pthread_mutex_lock(&dev->submit_lock);
+   /* Take the submit lock to make sure no tiler jobs from other context
+    * are inserted between our tiler and fragment jobs, failing to do that
+    * might result in tiler heap corruption.
+    */
+   if (has_tiler)
+      pthread_mutex_lock(&dev->submit_lock);
 
-        if (has_draws) {
-                ret = panfrost_batch_submit_ioctl(batch, batch->scoreboard.first_job,
-                                                  0, in_sync, has_frag ? 0 : out_sync);
+   if (has_draws) {
+      ret = panfrost_batch_submit_ioctl(batch, batch->scoreboard.first_job, 0,
+                                        in_sync, has_frag ? 0 : out_sync);
 
-                if (ret)
-                        goto done;
-        }
+      if (ret)
+         goto done;
+   }
 
-        if (has_frag) {
-                mali_ptr fragjob = screen->vtbl.emit_fragment_job(batch, fb);
-                ret = panfrost_batch_submit_ioctl(batch, fragjob,
-                                                  PANFROST_JD_REQ_FS, 0,
-                                                  out_sync);
-                if (ret)
-                        goto done;
-        }
+   if (has_frag) {
+      mali_ptr fragjob = screen->vtbl.emit_fragment_job(batch, fb);
+      ret = panfrost_batch_submit_ioctl(batch, fragjob, PANFROST_JD_REQ_FS, 0,
+                                        out_sync);
+      if (ret)
+         goto done;
+   }
 
 done:
-        if (has_tiler)
-                pthread_mutex_unlock(&dev->submit_lock);
+   if (has_tiler)
+      pthread_mutex_unlock(&dev->submit_lock);
 
-        return ret;
+   return ret;
 }
 
 static void
 panfrost_emit_tile_map(struct panfrost_batch *batch, struct pan_fb_info *fb)
 {
-        if (batch->key.nr_cbufs < 1 || !batch->key.cbufs[0])
-                return;
+   if (batch->key.nr_cbufs < 1 || !batch->key.cbufs[0])
+      return;
 
-        struct pipe_surface *surf = batch->key.cbufs[0];
-        struct panfrost_resource *pres = surf ? pan_resource(surf->texture) : NULL;
+   struct pipe_surface *surf = batch->key.cbufs[0];
+   struct panfrost_resource *pres = surf ? pan_resource(surf->texture) : NULL;
 
-        if (pres && pres->damage.tile_map.enable) {
-                fb->tile_map.base =
-                        pan_pool_upload_aligned(&batch->pool.base,
-                                                pres->damage.tile_map.data,
-                                                pres->damage.tile_map.size,
-                                                64);
-                fb->tile_map.stride = pres->damage.tile_map.stride;
-        }
+   if (pres && pres->damage.tile_map.enable) {
+      fb->tile_map.base =
+         pan_pool_upload_aligned(&batch->pool.base, pres->damage.tile_map.data,
+                                 pres->damage.tile_map.size, 64);
+      fb->tile_map.stride = pres->damage.tile_map.stride;
+   }
 }
 
 static void
 panfrost_batch_submit(struct panfrost_context *ctx,
                       struct panfrost_batch *batch)
 {
-        struct pipe_screen *pscreen = ctx->base.screen;
-        struct panfrost_screen *screen = pan_screen(pscreen);
-        int ret;
+   struct pipe_screen *pscreen = ctx->base.screen;
+   struct panfrost_screen *screen = pan_screen(pscreen);
+   int ret;
 
-        /* Nothing to do! */
-        if (!batch->scoreboard.first_job && !batch->clear)
-                goto out;
+   /* Nothing to do! */
+   if (!batch->scoreboard.first_job && !batch->clear)
+      goto out;
 
-        if (batch->key.zsbuf && panfrost_has_fragment_job(batch)) {
-                struct pipe_surface *surf = batch->key.zsbuf;
-                struct panfrost_resource *z_rsrc = pan_resource(surf->texture);
+   if (batch->key.zsbuf && panfrost_has_fragment_job(batch)) {
+      struct pipe_surface *surf = batch->key.zsbuf;
+      struct panfrost_resource *z_rsrc = pan_resource(surf->texture);
 
-                /* Shared depth/stencil resources are not supported, and would
-                 * break this optimisation. */
-                assert(!(z_rsrc->base.bind & PAN_BIND_SHARED_MASK));
+      /* Shared depth/stencil resources are not supported, and would
+       * break this optimisation. */
+      assert(!(z_rsrc->base.bind & PAN_BIND_SHARED_MASK));
 
-                if (batch->clear & PIPE_CLEAR_STENCIL) {
-                        z_rsrc->stencil_value = batch->clear_stencil;
-                        z_rsrc->constant_stencil = true;
-                } else if (z_rsrc->constant_stencil) {
-                        batch->clear_stencil = z_rsrc->stencil_value;
-                        batch->clear |= PIPE_CLEAR_STENCIL;
-                }
+      if (batch->clear & PIPE_CLEAR_STENCIL) {
+         z_rsrc->stencil_value = batch->clear_stencil;
+         z_rsrc->constant_stencil = true;
+      } else if (z_rsrc->constant_stencil) {
+         batch->clear_stencil = z_rsrc->stencil_value;
+         batch->clear |= PIPE_CLEAR_STENCIL;
+      }
 
-                if (batch->draws & PIPE_CLEAR_STENCIL)
-                        z_rsrc->constant_stencil = false;
-        }
+      if (batch->draws & PIPE_CLEAR_STENCIL)
+         z_rsrc->constant_stencil = false;
+   }
 
-        struct pan_fb_info fb;
-        struct pan_image_view rts[8], zs, s;
+   struct pan_fb_info fb;
+   struct pan_image_view rts[8], zs, s;
 
-        panfrost_batch_to_fb_info(batch, &fb, rts, &zs, &s, false);
+   panfrost_batch_to_fb_info(batch, &fb, rts, &zs, &s, false);
 
-        screen->vtbl.preload(batch, &fb);
-        screen->vtbl.init_polygon_list(batch);
+   screen->vtbl.preload(batch, &fb);
+   screen->vtbl.init_polygon_list(batch);
 
-        /* Now that all draws are in, we can finally prepare the
-         * FBD for the batch (if there is one). */
+   /* Now that all draws are in, we can finally prepare the
+    * FBD for the batch (if there is one). */
 
-        screen->vtbl.emit_tls(batch);
-        panfrost_emit_tile_map(batch, &fb);
+   screen->vtbl.emit_tls(batch);
+   panfrost_emit_tile_map(batch, &fb);
 
-        if (batch->scoreboard.first_tiler || batch->clear)
-                screen->vtbl.emit_fbd(batch, &fb);
+   if (batch->scoreboard.first_tiler || batch->clear)
+      screen->vtbl.emit_fbd(batch, &fb);
 
-        ret = panfrost_batch_submit_jobs(batch, &fb, 0, ctx->syncobj);
+   ret = panfrost_batch_submit_jobs(batch, &fb, 0, ctx->syncobj);
 
-        if (ret)
-                fprintf(stderr, "panfrost_batch_submit failed: %d\n", ret);
+   if (ret)
+      fprintf(stderr, "panfrost_batch_submit failed: %d\n", ret);
 
-        /* We must reset the damage info of our render targets here even
-         * though a damage reset normally happens when the DRI layer swaps
-         * buffers. That's because there can be implicit flushes the GL
-         * app is not aware of, and those might impact the damage region: if
-         * part of the damaged portion is drawn during those implicit flushes,
-         * you have to reload those areas before next draws are pushed, and
-         * since the driver can't easily know what's been modified by the draws
-         * it flushed, the easiest solution is to reload everything.
-         */
-        for (unsigned i = 0; i < batch->key.nr_cbufs; i++) {
-                if (!batch->key.cbufs[i])
-                        continue;
+   /* We must reset the damage info of our render targets here even
+    * though a damage reset normally happens when the DRI layer swaps
+    * buffers. That's because there can be implicit flushes the GL
+    * app is not aware of, and those might impact the damage region: if
+    * part of the damaged portion is drawn during those implicit flushes,
+    * you have to reload those areas before next draws are pushed, and
+    * since the driver can't easily know what's been modified by the draws
+    * it flushed, the easiest solution is to reload everything.
+    */
+   for (unsigned i = 0; i < batch->key.nr_cbufs; i++) {
+      if (!batch->key.cbufs[i])
+         continue;
 
-                panfrost_resource_set_damage_region(ctx->base.screen,
-                                                    batch->key.cbufs[i]->texture,
-                                                    0, NULL);
-        }
+      panfrost_resource_set_damage_region(
+         ctx->base.screen, batch->key.cbufs[i]->texture, 0, NULL);
+   }
 
 out:
-        panfrost_batch_cleanup(ctx, batch);
+   panfrost_batch_cleanup(ctx, batch);
 }
 
 /* Submit all batches */
@@ -833,30 +827,29 @@ out:
 void
 panfrost_flush_all_batches(struct panfrost_context *ctx, const char *reason)
 {
-        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
-        panfrost_batch_submit(ctx, batch);
+   struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
+   panfrost_batch_submit(ctx, batch);
 
-        for (unsigned i = 0; i < PAN_MAX_BATCHES; i++) {
-                if (ctx->batches.slots[i].seqnum) {
-                        if (reason)
-                                perf_debug_ctx(ctx, "Flushing everything due to: %s", reason);
+   for (unsigned i = 0; i < PAN_MAX_BATCHES; i++) {
+      if (ctx->batches.slots[i].seqnum) {
+         if (reason)
+            perf_debug_ctx(ctx, "Flushing everything due to: %s", reason);
 
-                        panfrost_batch_submit(ctx, &ctx->batches.slots[i]);
-                }
-        }
+         panfrost_batch_submit(ctx, &ctx->batches.slots[i]);
+      }
+   }
 }
 
 void
 panfrost_flush_writer(struct panfrost_context *ctx,
-                      struct panfrost_resource *rsrc,
-                      const char *reason)
+                      struct panfrost_resource *rsrc, const char *reason)
 {
-        struct hash_entry *entry = _mesa_hash_table_search(ctx->writers, rsrc);
+   struct hash_entry *entry = _mesa_hash_table_search(ctx->writers, rsrc);
 
-        if (entry) {
-                perf_debug_ctx(ctx, "Flushing writer due to: %s", reason);
-                panfrost_batch_submit(ctx, entry->data);
-        }
+   if (entry) {
+      perf_debug_ctx(ctx, "Flushing writer due to: %s", reason);
+      panfrost_batch_submit(ctx, entry->data);
+   }
 }
 
 void
@@ -864,106 +857,103 @@ panfrost_flush_batches_accessing_rsrc(struct panfrost_context *ctx,
                                       struct panfrost_resource *rsrc,
                                       const char *reason)
 {
-        unsigned i;
-        foreach_batch(ctx, i) {
-                struct panfrost_batch *batch = &ctx->batches.slots[i];
+   unsigned i;
+   foreach_batch(ctx, i) {
+      struct panfrost_batch *batch = &ctx->batches.slots[i];
 
-                if (!panfrost_batch_uses_resource(batch, rsrc))
-                        continue;
+      if (!panfrost_batch_uses_resource(batch, rsrc))
+         continue;
 
-                perf_debug_ctx(ctx, "Flushing user due to: %s", reason);
-                panfrost_batch_submit(ctx, batch);
-        }
+      perf_debug_ctx(ctx, "Flushing user due to: %s", reason);
+      panfrost_batch_submit(ctx, batch);
+   }
 }
 
 bool
 panfrost_any_batch_reads_rsrc(struct panfrost_context *ctx,
                               struct panfrost_resource *rsrc)
 {
-        unsigned i;
-        foreach_batch(ctx, i) {
-                struct panfrost_batch *batch = &ctx->batches.slots[i];
+   unsigned i;
+   foreach_batch(ctx, i) {
+      struct panfrost_batch *batch = &ctx->batches.slots[i];
 
-                if (panfrost_batch_uses_resource(batch, rsrc))
-                        return true;
-        }
+      if (panfrost_batch_uses_resource(batch, rsrc))
+         return true;
+   }
 
-        return false;
+   return false;
 }
 
 bool
 panfrost_any_batch_writes_rsrc(struct panfrost_context *ctx,
                                struct panfrost_resource *rsrc)
 {
-        return _mesa_hash_table_search(ctx->writers, rsrc) != NULL;
+   return _mesa_hash_table_search(ctx->writers, rsrc) != NULL;
 }
 
 void
 panfrost_batch_adjust_stack_size(struct panfrost_batch *batch)
 {
-        struct panfrost_context *ctx = batch->ctx;
+   struct panfrost_context *ctx = batch->ctx;
 
-        for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) {
-                struct panfrost_compiled_shader *ss = ctx->prog[i];
+   for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) {
+      struct panfrost_compiled_shader *ss = ctx->prog[i];
 
-                if (!ss)
-                        continue;
+      if (!ss)
+         continue;
 
-                batch->stack_size = MAX2(batch->stack_size, ss->info.tls_size);
-        }
+      batch->stack_size = MAX2(batch->stack_size, ss->info.tls_size);
+   }
 }
 
 void
-panfrost_batch_clear(struct panfrost_batch *batch,
-                     unsigned buffers,
-                     const union pipe_color_union *color,
-                     double depth, unsigned stencil)
+panfrost_batch_clear(struct panfrost_batch *batch, unsigned buffers,
+                     const union pipe_color_union *color, double depth,
+                     unsigned stencil)
 {
-        struct panfrost_context *ctx = batch->ctx;
+   struct panfrost_context *ctx = batch->ctx;
 
-        if (buffers & PIPE_CLEAR_COLOR) {
-                for (unsigned i = 0; i < ctx->pipe_framebuffer.nr_cbufs; ++i) {
-                        if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
-                                continue;
+   if (buffers & PIPE_CLEAR_COLOR) {
+      for (unsigned i = 0; i < ctx->pipe_framebuffer.nr_cbufs; ++i) {
+         if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
+            continue;
 
-                        enum pipe_format format = ctx->pipe_framebuffer.cbufs[i]->format;
-                        pan_pack_color(batch->clear_color[i], color, format, false);
-                }
-        }
+         enum pipe_format format = ctx->pipe_framebuffer.cbufs[i]->format;
+         pan_pack_color(batch->clear_color[i], color, format, false);
+      }
+   }
 
-        if (buffers & PIPE_CLEAR_DEPTH) {
-                batch->clear_depth = depth;
-        }
+   if (buffers & PIPE_CLEAR_DEPTH) {
+      batch->clear_depth = depth;
+   }
 
-        if (buffers & PIPE_CLEAR_STENCIL) {
-                batch->clear_stencil = stencil;
-        }
+   if (buffers & PIPE_CLEAR_STENCIL) {
+      batch->clear_stencil = stencil;
+   }
 
-        batch->clear |= buffers;
-        batch->resolve |= buffers;
+   batch->clear |= buffers;
+   batch->resolve |= buffers;
 
-        /* Clearing affects the entire framebuffer (by definition -- this is
-         * the Gallium clear callback, which clears the whole framebuffer. If
-         * the scissor test were enabled from the GL side, the gallium frontend
-         * would emit a quad instead and we wouldn't go down this code path) */
+   /* Clearing affects the entire framebuffer (by definition -- this is
+    * the Gallium clear callback, which clears the whole framebuffer. If
+    * the scissor test were enabled from the GL side, the gallium frontend
+    * would emit a quad instead and we wouldn't go down this code path) */
 
-        panfrost_batch_union_scissor(batch, 0, 0,
-                                     ctx->pipe_framebuffer.width,
-                                     ctx->pipe_framebuffer.height);
+   panfrost_batch_union_scissor(batch, 0, 0, ctx->pipe_framebuffer.width,
+                                ctx->pipe_framebuffer.height);
 }
 
 /* Given a new bounding rectangle (scissor), let the job cover the union of the
  * new and old bounding rectangles */
 
 void
-panfrost_batch_union_scissor(struct panfrost_batch *batch,
-                             unsigned minx, unsigned miny,
-                             unsigned maxx, unsigned maxy)
+panfrost_batch_union_scissor(struct panfrost_batch *batch, unsigned minx,
+                             unsigned miny, unsigned maxx, unsigned maxy)
 {
-        batch->minx = MIN2(batch->minx, minx);
-        batch->miny = MIN2(batch->miny, miny);
-        batch->maxx = MAX2(batch->maxx, maxx);
-        batch->maxy = MAX2(batch->maxy, maxy);
+   batch->minx = MIN2(batch->minx, minx);
+   batch->miny = MIN2(batch->miny, miny);
+   batch->maxx = MAX2(batch->maxx, maxx);
+   batch->maxy = MAX2(batch->maxy, maxy);
 }
 
 /**
@@ -976,10 +966,9 @@ panfrost_batch_union_scissor(struct panfrost_batch *batch,
 bool
 panfrost_batch_skip_rasterization(struct panfrost_batch *batch)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        struct pipe_rasterizer_state *rast = (void *) ctx->rasterizer;
+   struct panfrost_context *ctx = batch->ctx;
+   struct pipe_rasterizer_state *rast = (void *)ctx->rasterizer;
 
-        return (rast->rasterizer_discard ||
-                batch->scissor_culls_everything ||
-                !batch->rsd[PIPE_SHADER_VERTEX]);
+   return (rast->rasterizer_discard || batch->scissor_culls_everything ||
+           !batch->rsd[PIPE_SHADER_VERTEX]);
 }
diff --git a/src/gallium/drivers/panfrost/pan_job.h b/src/gallium/drivers/panfrost/pan_job.h
index ed7d970a9b3..49953221f3d 100644
--- a/src/gallium/drivers/panfrost/pan_job.h
+++ b/src/gallium/drivers/panfrost/pan_job.h
@@ -26,8 +26,8 @@
 #ifndef __PAN_JOB_H__
 #define __PAN_JOB_H__
 
-#include "util/u_dynarray.h"
 #include "pipe/p_state.h"
+#include "util/u_dynarray.h"
 #include "pan_cs.h"
 #include "pan_mempool.h"
 #include "pan_resource.h"
@@ -39,11 +39,11 @@
  * error. The getter needs to be used instead.
  */
 struct pan_tristate {
-        enum {
-                PAN_TRISTATE_DONTCARE,
-                PAN_TRISTATE_FALSE,
-                PAN_TRISTATE_TRUE,
-        } v;
+   enum {
+      PAN_TRISTATE_DONTCARE,
+      PAN_TRISTATE_FALSE,
+      PAN_TRISTATE_TRUE,
+   } v;
 };
 
 /*
@@ -53,20 +53,20 @@ struct pan_tristate {
 static bool
 pan_tristate_set(struct pan_tristate *state, bool value)
 {
-        switch (state->v) {
-        case PAN_TRISTATE_DONTCARE:
-                state->v = value ? PAN_TRISTATE_TRUE : PAN_TRISTATE_FALSE;
-                return true;
+   switch (state->v) {
+   case PAN_TRISTATE_DONTCARE:
+      state->v = value ? PAN_TRISTATE_TRUE : PAN_TRISTATE_FALSE;
+      return true;
 
-        case PAN_TRISTATE_FALSE:
-                return (value == false);
+   case PAN_TRISTATE_FALSE:
+      return (value == false);
 
-        case PAN_TRISTATE_TRUE:
-                return (value == true);
+   case PAN_TRISTATE_TRUE:
+      return (value == true);
 
-        default:
-                unreachable("Invalid tristate value");
-        }
+   default:
+      unreachable("Invalid tristate value");
+   }
 }
 
 /*
@@ -76,189 +76,179 @@ pan_tristate_set(struct pan_tristate *state, bool value)
 static bool
 pan_tristate_get(struct pan_tristate state)
 {
-        return (state.v == PAN_TRISTATE_TRUE);
+   return (state.v == PAN_TRISTATE_TRUE);
 }
 
 /* A panfrost_batch corresponds to a bound FBO we're rendering to,
  * collecting over multiple draws. */
 
 struct panfrost_batch {
-        struct panfrost_context *ctx;
-        struct pipe_framebuffer_state key;
+   struct panfrost_context *ctx;
+   struct pipe_framebuffer_state key;
 
-        /* Sequence number used to implement LRU eviction when all batch slots are used */
-        uint64_t seqnum;
+   /* Sequence number used to implement LRU eviction when all batch slots are
+    * used */
+   uint64_t seqnum;
 
-        /* Buffers cleared (PIPE_CLEAR_* bitmask) */
-        unsigned clear;
+   /* Buffers cleared (PIPE_CLEAR_* bitmask) */
+   unsigned clear;
 
-        /* Buffers drawn */
-        unsigned draws;
+   /* Buffers drawn */
+   unsigned draws;
 
-        /* Buffers read */
-        unsigned read;
+   /* Buffers read */
+   unsigned read;
 
-        /* Buffers needing resolve to memory */
-        unsigned resolve;
+   /* Buffers needing resolve to memory */
+   unsigned resolve;
 
-        /* Packed clear values, indexed by both render target as well as word.
-         * Essentially, a single pixel is packed, with some padding to bring it
-         * up to a 32-bit interval; that pixel is then duplicated over to fill
-         * all 16-bytes */
+   /* Packed clear values, indexed by both render target as well as word.
+    * Essentially, a single pixel is packed, with some padding to bring it
+    * up to a 32-bit interval; that pixel is then duplicated over to fill
+    * all 16-bytes */
 
-        uint32_t clear_color[PIPE_MAX_COLOR_BUFS][4];
-        float clear_depth;
-        unsigned clear_stencil;
+   uint32_t clear_color[PIPE_MAX_COLOR_BUFS][4];
+   float clear_depth;
+   unsigned clear_stencil;
 
-        /* Amount of thread local storage required per thread */
-        unsigned stack_size;
+   /* Amount of thread local storage required per thread */
+   unsigned stack_size;
 
-        /* Amount of shared memory needed per workgroup (for compute) */
-        unsigned shared_size;
+   /* Amount of shared memory needed per workgroup (for compute) */
+   unsigned shared_size;
 
-        /* The bounding box covered by this job, taking scissors into account.
-         * Basically, the bounding box we have to run fragment shaders for */
+   /* The bounding box covered by this job, taking scissors into account.
+    * Basically, the bounding box we have to run fragment shaders for */
 
-        unsigned minx, miny;
-        unsigned maxx, maxy;
+   unsigned minx, miny;
+   unsigned maxx, maxy;
 
-        /* Acts as a rasterizer discard */
-        bool scissor_culls_everything;
+   /* Acts as a rasterizer discard */
+   bool scissor_culls_everything;
 
-        /* BOs referenced not in the pool */
-        unsigned num_bos;
-        struct util_dynarray bos;
+   /* BOs referenced not in the pool */
+   unsigned num_bos;
+   struct util_dynarray bos;
 
-        /* Pool owned by this batch (released when the batch is released) used for temporary descriptors */
-        struct panfrost_pool pool;
+   /* Pool owned by this batch (released when the batch is released) used for
+    * temporary descriptors */
+   struct panfrost_pool pool;
 
-        /* Pool also owned by this batch that is not CPU mapped (created as
-         * INVISIBLE) used for private GPU-internal structures, particularly
-         * varyings */
-        struct panfrost_pool invisible_pool;
+   /* Pool also owned by this batch that is not CPU mapped (created as
+    * INVISIBLE) used for private GPU-internal structures, particularly
+    * varyings */
+   struct panfrost_pool invisible_pool;
 
-        /* Job scoreboarding state */
-        struct pan_scoreboard scoreboard;
+   /* Job scoreboarding state */
+   struct pan_scoreboard scoreboard;
 
-        /* Polygon list bound to the batch, or NULL if none bound yet */
-        struct panfrost_bo *polygon_list;
+   /* Polygon list bound to the batch, or NULL if none bound yet */
+   struct panfrost_bo *polygon_list;
 
-        /* Scratchpad BO bound to the batch, or NULL if none bound yet */
-        struct panfrost_bo *scratchpad;
+   /* Scratchpad BO bound to the batch, or NULL if none bound yet */
+   struct panfrost_bo *scratchpad;
 
-        /* Shared memory BO bound to the batch, or NULL if none bound yet */
-        struct panfrost_bo *shared_memory;
+   /* Shared memory BO bound to the batch, or NULL if none bound yet */
+   struct panfrost_bo *shared_memory;
 
-        /* Framebuffer descriptor. */
-        struct panfrost_ptr framebuffer;
+   /* Framebuffer descriptor. */
+   struct panfrost_ptr framebuffer;
 
-        /* Thread local storage descriptor. */
-        struct panfrost_ptr tls;
+   /* Thread local storage descriptor. */
+   struct panfrost_ptr tls;
 
-        /* Tiler context */
-        struct pan_tiler_context tiler_ctx;
+   /* Tiler context */
+   struct pan_tiler_context tiler_ctx;
 
-        /* Keep the num_work_groups sysval around for indirect dispatch */
-        mali_ptr num_wg_sysval[3];
+   /* Keep the num_work_groups sysval around for indirect dispatch */
+   mali_ptr num_wg_sysval[3];
 
-        /* Cached descriptors */
-        mali_ptr viewport;
-        mali_ptr rsd[PIPE_SHADER_TYPES];
-        mali_ptr textures[PIPE_SHADER_TYPES];
-        mali_ptr samplers[PIPE_SHADER_TYPES];
-        mali_ptr attribs[PIPE_SHADER_TYPES];
-        mali_ptr attrib_bufs[PIPE_SHADER_TYPES];
-        mali_ptr uniform_buffers[PIPE_SHADER_TYPES];
-        mali_ptr push_uniforms[PIPE_SHADER_TYPES];
-        mali_ptr depth_stencil;
-        mali_ptr blend;
+   /* Cached descriptors */
+   mali_ptr viewport;
+   mali_ptr rsd[PIPE_SHADER_TYPES];
+   mali_ptr textures[PIPE_SHADER_TYPES];
+   mali_ptr samplers[PIPE_SHADER_TYPES];
+   mali_ptr attribs[PIPE_SHADER_TYPES];
+   mali_ptr attrib_bufs[PIPE_SHADER_TYPES];
+   mali_ptr uniform_buffers[PIPE_SHADER_TYPES];
+   mali_ptr push_uniforms[PIPE_SHADER_TYPES];
+   mali_ptr depth_stencil;
+   mali_ptr blend;
 
-        /* Valhall: struct mali_scissor_packed */
-        unsigned scissor[2];
-        float minimum_z, maximum_z;
+   /* Valhall: struct mali_scissor_packed */
+   unsigned scissor[2];
+   float minimum_z, maximum_z;
 
-        /* Used on Valhall only. Midgard includes attributes in-band with
-         * attributes, wildly enough.
-         */
-        mali_ptr images[PIPE_SHADER_TYPES];
+   /* Used on Valhall only. Midgard includes attributes in-band with
+    * attributes, wildly enough.
+    */
+   mali_ptr images[PIPE_SHADER_TYPES];
 
-        /* On Valhall, these are properties of the batch. On Bifrost, they are
-         * per draw.
-         */
-        struct pan_tristate sprite_coord_origin;
-        struct pan_tristate first_provoking_vertex;
+   /* On Valhall, these are properties of the batch. On Bifrost, they are
+    * per draw.
+    */
+   struct pan_tristate sprite_coord_origin;
+   struct pan_tristate first_provoking_vertex;
 };
 
 /* Functions for managing the above */
 
-struct panfrost_batch *
-panfrost_get_batch_for_fbo(struct panfrost_context *ctx);
+struct panfrost_batch *panfrost_get_batch_for_fbo(struct panfrost_context *ctx);
 
 struct panfrost_batch *
-panfrost_get_fresh_batch_for_fbo(struct panfrost_context *ctx, const char *reason);
+panfrost_get_fresh_batch_for_fbo(struct panfrost_context *ctx,
+                                 const char *reason);
 
-void
-panfrost_batch_add_bo(struct panfrost_batch *batch,
-                      struct panfrost_bo *bo,
-                      enum pipe_shader_type stage);
+void panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo,
+                           enum pipe_shader_type stage);
 
-void
-panfrost_batch_read_rsrc(struct panfrost_batch *batch,
-                         struct panfrost_resource *rsrc,
-                         enum pipe_shader_type stage);
+void panfrost_batch_read_rsrc(struct panfrost_batch *batch,
+                              struct panfrost_resource *rsrc,
+                              enum pipe_shader_type stage);
 
-void
-panfrost_batch_write_rsrc(struct panfrost_batch *batch,
-                          struct panfrost_resource *rsrc,
-                          enum pipe_shader_type stage);
+void panfrost_batch_write_rsrc(struct panfrost_batch *batch,
+                               struct panfrost_resource *rsrc,
+                               enum pipe_shader_type stage);
 
-bool
-panfrost_any_batch_reads_rsrc(struct panfrost_context *ctx,
-                              struct panfrost_resource *rsrc);
+bool panfrost_any_batch_reads_rsrc(struct panfrost_context *ctx,
+                                   struct panfrost_resource *rsrc);
 
-bool
-panfrost_any_batch_writes_rsrc(struct panfrost_context *ctx,
-                               struct panfrost_resource *rsrc);
+bool panfrost_any_batch_writes_rsrc(struct panfrost_context *ctx,
+                                    struct panfrost_resource *rsrc);
+
+struct panfrost_bo *panfrost_batch_create_bo(struct panfrost_batch *batch,
+                                             size_t size, uint32_t create_flags,
+                                             enum pipe_shader_type stage,
+                                             const char *label);
+
+void panfrost_flush_all_batches(struct panfrost_context *ctx,
+                                const char *reason);
+
+void panfrost_flush_batches_accessing_rsrc(struct panfrost_context *ctx,
+                                           struct panfrost_resource *rsrc,
+                                           const char *reason);
+
+void panfrost_flush_writer(struct panfrost_context *ctx,
+                           struct panfrost_resource *rsrc, const char *reason);
+
+void panfrost_batch_adjust_stack_size(struct panfrost_batch *batch);
+
+struct panfrost_bo *panfrost_batch_get_scratchpad(struct panfrost_batch *batch,
+                                                  unsigned size,
+                                                  unsigned thread_tls_alloc,
+                                                  unsigned core_id_range);
 
 struct panfrost_bo *
-panfrost_batch_create_bo(struct panfrost_batch *batch, size_t size,
-                         uint32_t create_flags, enum pipe_shader_type stage,
-                         const char *label);
+panfrost_batch_get_shared_memory(struct panfrost_batch *batch, unsigned size,
+                                 unsigned workgroup_count);
 
-void
-panfrost_flush_all_batches(struct panfrost_context *ctx, const char *reason);
+void panfrost_batch_clear(struct panfrost_batch *batch, unsigned buffers,
+                          const union pipe_color_union *color, double depth,
+                          unsigned stencil);
 
-void
-panfrost_flush_batches_accessing_rsrc(struct panfrost_context *ctx,
-                                      struct panfrost_resource *rsrc,
-                                      const char *reason);
+void panfrost_batch_union_scissor(struct panfrost_batch *batch, unsigned minx,
+                                  unsigned miny, unsigned maxx, unsigned maxy);
 
-void
-panfrost_flush_writer(struct panfrost_context *ctx,
-                      struct panfrost_resource *rsrc,
-                      const char *reason);
-
-void
-panfrost_batch_adjust_stack_size(struct panfrost_batch *batch);
-
-struct panfrost_bo *
-panfrost_batch_get_scratchpad(struct panfrost_batch *batch, unsigned size, unsigned thread_tls_alloc, unsigned core_id_range);
-
-struct panfrost_bo *
-panfrost_batch_get_shared_memory(struct panfrost_batch *batch, unsigned size, unsigned workgroup_count);
-
-void
-panfrost_batch_clear(struct panfrost_batch *batch,
-                     unsigned buffers,
-                     const union pipe_color_union *color,
-                     double depth, unsigned stencil);
-
-void
-panfrost_batch_union_scissor(struct panfrost_batch *batch,
-                             unsigned minx, unsigned miny,
-                             unsigned maxx, unsigned maxy);
-
-bool
-panfrost_batch_skip_rasterization(struct panfrost_batch *batch);
+bool panfrost_batch_skip_rasterization(struct panfrost_batch *batch);
 
 #endif
diff --git a/src/gallium/drivers/panfrost/pan_mempool.c b/src/gallium/drivers/panfrost/pan_mempool.c
index 1757e99b87c..89797cc3935 100644
--- a/src/gallium/drivers/panfrost/pan_mempool.c
+++ b/src/gallium/drivers/panfrost/pan_mempool.c
@@ -46,124 +46,124 @@
 static struct panfrost_bo *
 panfrost_pool_alloc_backing(struct panfrost_pool *pool, size_t bo_sz)
 {
-        /* We don't know what the BO will be used for, so let's flag it
-         * RW and attach it to both the fragment and vertex/tiler jobs.
-         * TODO: if we want fine grained BO assignment we should pass
-         * flags to this function and keep the read/write,
-         * fragment/vertex+tiler pools separate.
-         */
-        struct panfrost_bo *bo = panfrost_bo_create(pool->base.dev, bo_sz,
-                        pool->base.create_flags, pool->base.label);
+   /* We don't know what the BO will be used for, so let's flag it
+    * RW and attach it to both the fragment and vertex/tiler jobs.
+    * TODO: if we want fine grained BO assignment we should pass
+    * flags to this function and keep the read/write,
+    * fragment/vertex+tiler pools separate.
+    */
+   struct panfrost_bo *bo = panfrost_bo_create(
+      pool->base.dev, bo_sz, pool->base.create_flags, pool->base.label);
 
-        if (pool->owned)
-                util_dynarray_append(&pool->bos, struct panfrost_bo *, bo);
-        else
-                panfrost_bo_unreference(pool->transient_bo);
+   if (pool->owned)
+      util_dynarray_append(&pool->bos, struct panfrost_bo *, bo);
+   else
+      panfrost_bo_unreference(pool->transient_bo);
 
-        pool->transient_bo = bo;
-        pool->transient_offset = 0;
+   pool->transient_bo = bo;
+   pool->transient_offset = 0;
 
-        return bo;
+   return bo;
 }
 
 void
 panfrost_pool_init(struct panfrost_pool *pool, void *memctx,
-                   struct panfrost_device *dev,
-                   unsigned create_flags, size_t slab_size, const char *label,
-                   bool prealloc, bool owned)
+                   struct panfrost_device *dev, unsigned create_flags,
+                   size_t slab_size, const char *label, bool prealloc,
+                   bool owned)
 {
-        memset(pool, 0, sizeof(*pool));
-        pan_pool_init(&pool->base, dev, create_flags, slab_size, label);
-        pool->owned = owned;
+   memset(pool, 0, sizeof(*pool));
+   pan_pool_init(&pool->base, dev, create_flags, slab_size, label);
+   pool->owned = owned;
 
-        if (owned)
-                util_dynarray_init(&pool->bos, memctx);
+   if (owned)
+      util_dynarray_init(&pool->bos, memctx);
 
-        if (prealloc)
-                panfrost_pool_alloc_backing(pool, pool->base.slab_size);
+   if (prealloc)
+      panfrost_pool_alloc_backing(pool, pool->base.slab_size);
 }
 
 void
 panfrost_pool_cleanup(struct panfrost_pool *pool)
 {
-        if (!pool->owned) {
-                panfrost_bo_unreference(pool->transient_bo);
-                return;
-        }
+   if (!pool->owned) {
+      panfrost_bo_unreference(pool->transient_bo);
+      return;
+   }
 
-        util_dynarray_foreach(&pool->bos, struct panfrost_bo *, bo)
-                panfrost_bo_unreference(*bo);
+   util_dynarray_foreach(&pool->bos, struct panfrost_bo *, bo)
+      panfrost_bo_unreference(*bo);
 
-        util_dynarray_fini(&pool->bos);
+   util_dynarray_fini(&pool->bos);
 }
 
 void
 panfrost_pool_get_bo_handles(struct panfrost_pool *pool, uint32_t *handles)
 {
-        assert(pool->owned && "pool does not track BOs in unowned mode");
+   assert(pool->owned && "pool does not track BOs in unowned mode");
 
-        unsigned idx = 0;
-        util_dynarray_foreach(&pool->bos, struct panfrost_bo *, bo) {
-                assert((*bo)->gem_handle > 0);
-                handles[idx++] = (*bo)->gem_handle;
+   unsigned idx = 0;
+   util_dynarray_foreach(&pool->bos, struct panfrost_bo *, bo) {
+      assert((*bo)->gem_handle > 0);
+      handles[idx++] = (*bo)->gem_handle;
 
-               /* Update the BO access flags so that panfrost_bo_wait() knows
-                * about all pending accesses.
-                * We only keep the READ/WRITE info since this is all the BO
-                * wait logic cares about.
-                * We also preserve existing flags as this batch might not
-                * be the first one to access the BO.
-                */
-                (*bo)->gpu_access |= PAN_BO_ACCESS_RW;
-        }
+      /* Update the BO access flags so that panfrost_bo_wait() knows
+       * about all pending accesses.
+       * We only keep the READ/WRITE info since this is all the BO
+       * wait logic cares about.
+       * We also preserve existing flags as this batch might not
+       * be the first one to access the BO.
+       */
+      (*bo)->gpu_access |= PAN_BO_ACCESS_RW;
+   }
 }
 
 #define PAN_GUARD_SIZE 4096
 
 static struct panfrost_ptr
-panfrost_pool_alloc_aligned(struct panfrost_pool *pool, size_t sz, unsigned alignment)
+panfrost_pool_alloc_aligned(struct panfrost_pool *pool, size_t sz,
+                            unsigned alignment)
 {
-        assert(alignment == util_next_power_of_two(alignment));
+   assert(alignment == util_next_power_of_two(alignment));
 
-        /* Find or create a suitable BO */
-        struct panfrost_bo *bo = pool->transient_bo;
-        unsigned offset = ALIGN_POT(pool->transient_offset, alignment);
+   /* Find or create a suitable BO */
+   struct panfrost_bo *bo = pool->transient_bo;
+   unsigned offset = ALIGN_POT(pool->transient_offset, alignment);
 
 #ifdef PAN_DBG_OVERFLOW
-        if (unlikely(pool->base.dev->debug & PAN_DBG_OVERFLOW) &&
-            !(pool->base.create_flags & PAN_BO_INVISIBLE)) {
-                unsigned aligned = ALIGN_POT(sz, sysconf(_SC_PAGESIZE));
-                unsigned bo_size = aligned + PAN_GUARD_SIZE;
+   if (unlikely(pool->base.dev->debug & PAN_DBG_OVERFLOW) &&
+       !(pool->base.create_flags & PAN_BO_INVISIBLE)) {
+      unsigned aligned = ALIGN_POT(sz, sysconf(_SC_PAGESIZE));
+      unsigned bo_size = aligned + PAN_GUARD_SIZE;
 
-                bo = panfrost_pool_alloc_backing(pool, bo_size);
-                memset(bo->ptr.cpu, 0xbb, bo_size);
+      bo = panfrost_pool_alloc_backing(pool, bo_size);
+      memset(bo->ptr.cpu, 0xbb, bo_size);
 
-                /* Place the object as close as possible to the protected
-                 * region at the end of the buffer while keeping alignment. */
-                offset = ROUND_DOWN_TO(aligned - sz, alignment);
+      /* Place the object as close as possible to the protected
+       * region at the end of the buffer while keeping alignment. */
+      offset = ROUND_DOWN_TO(aligned - sz, alignment);
 
-                if (mprotect(bo->ptr.cpu + aligned,
-                             PAN_GUARD_SIZE, PROT_NONE) == -1)
-                        perror("mprotect");
+      if (mprotect(bo->ptr.cpu + aligned, PAN_GUARD_SIZE, PROT_NONE) == -1)
+         perror("mprotect");
 
-                pool->transient_bo = NULL;
-        }
+      pool->transient_bo = NULL;
+   }
 #endif
 
-        /* If we don't fit, allocate a new backing */
-        if (unlikely(bo == NULL || (offset + sz) >= pool->base.slab_size)) {
-                bo = panfrost_pool_alloc_backing(pool,
-                                ALIGN_POT(MAX2(pool->base.slab_size, sz), 4096));
-                offset = 0;
-        }
+   /* If we don't fit, allocate a new backing */
+   if (unlikely(bo == NULL || (offset + sz) >= pool->base.slab_size)) {
+      bo = panfrost_pool_alloc_backing(
+         pool, ALIGN_POT(MAX2(pool->base.slab_size, sz), 4096));
+      offset = 0;
+   }
 
-        pool->transient_offset = offset + sz;
+   pool->transient_offset = offset + sz;
 
-        struct panfrost_ptr ret = {
-                .cpu = bo->ptr.cpu + offset,
-                .gpu = bo->ptr.gpu + offset,
-        };
+   struct panfrost_ptr ret = {
+      .cpu = bo->ptr.cpu + offset,
+      .gpu = bo->ptr.gpu + offset,
+   };
 
-        return ret;
+   return ret;
 }
 PAN_POOL_ALLOCATOR(struct panfrost_pool, panfrost_pool_alloc_aligned)
diff --git a/src/gallium/drivers/panfrost/pan_mempool.h b/src/gallium/drivers/panfrost/pan_mempool.h
index 5b75a744515..e864176f373 100644
--- a/src/gallium/drivers/panfrost/pan_mempool.h
+++ b/src/gallium/drivers/panfrost/pan_mempool.h
@@ -31,37 +31,37 @@
    be unowned for persistent uploads. */
 
 struct panfrost_pool {
-        /* Inherit from pan_pool */
-        struct pan_pool base;
+   /* Inherit from pan_pool */
+   struct pan_pool base;
 
-        /* BOs allocated by this pool */
-        struct util_dynarray bos;
+   /* BOs allocated by this pool */
+   struct util_dynarray bos;
 
-        /* Current transient BO */
-        struct panfrost_bo *transient_bo;
+   /* Current transient BO */
+   struct panfrost_bo *transient_bo;
 
-        /* Within the topmost transient BO, how much has been used? */
-        unsigned transient_offset;
+   /* Within the topmost transient BO, how much has been used? */
+   unsigned transient_offset;
 
-        /* Mode of the pool. BO management is in the pool for owned mode, but
-         * the consumed for unowned mode. */
-        bool owned;
+   /* Mode of the pool. BO management is in the pool for owned mode, but
+    * the consumed for unowned mode. */
+   bool owned;
 };
 
 static inline struct panfrost_pool *
 to_panfrost_pool(struct pan_pool *pool)
 {
-        return container_of(pool, struct panfrost_pool, base);
+   return container_of(pool, struct panfrost_pool, base);
 }
 
 /* Reference to pool allocated memory for an unowned pool */
 
 struct panfrost_pool_ref {
-        /* Owning BO */
-        struct panfrost_bo *bo;
+   /* Owning BO */
+   struct panfrost_bo *bo;
 
-        /* Mapped GPU VA */
-        mali_ptr gpu;
+   /* Mapped GPU VA */
+   mali_ptr gpu;
 };
 
 /* Take a reference to an allocation pool. Call directly after allocating from
@@ -70,32 +70,30 @@ struct panfrost_pool_ref {
 static inline struct panfrost_pool_ref
 panfrost_pool_take_ref(struct panfrost_pool *pool, mali_ptr ptr)
 {
-        if (!pool->owned)
-                panfrost_bo_reference(pool->transient_bo);
+   if (!pool->owned)
+      panfrost_bo_reference(pool->transient_bo);
 
-        return (struct panfrost_pool_ref) {
-                .bo = pool->transient_bo,
-                .gpu = ptr,
-        };
+   return (struct panfrost_pool_ref){
+      .bo = pool->transient_bo,
+      .gpu = ptr,
+   };
 }
 
-void
-panfrost_pool_init(struct panfrost_pool *pool, void *memctx,
-                   struct panfrost_device *dev, unsigned create_flags,
-                   size_t slab_size, const char *label, bool prealloc, bool
-                   owned);
+void panfrost_pool_init(struct panfrost_pool *pool, void *memctx,
+                        struct panfrost_device *dev, unsigned create_flags,
+                        size_t slab_size, const char *label, bool prealloc,
+                        bool owned);
 
-void
-panfrost_pool_cleanup(struct panfrost_pool *pool);
+void panfrost_pool_cleanup(struct panfrost_pool *pool);
 
 static inline unsigned
 panfrost_pool_num_bos(struct panfrost_pool *pool)
 {
-        assert(pool->owned && "pool does not track BOs in unowned mode");
-        return util_dynarray_num_elements(&pool->bos, struct panfrost_bo *);
+   assert(pool->owned && "pool does not track BOs in unowned mode");
+   return util_dynarray_num_elements(&pool->bos, struct panfrost_bo *);
 }
 
-void
-panfrost_pool_get_bo_handles(struct panfrost_pool *pool, uint32_t *handles);
+void panfrost_pool_get_bo_handles(struct panfrost_pool *pool,
+                                  uint32_t *handles);
 
 #endif
diff --git a/src/gallium/drivers/panfrost/pan_public.h b/src/gallium/drivers/panfrost/pan_public.h
index c7e72f94246..ed21ccdda60 100644
--- a/src/gallium/drivers/panfrost/pan_public.h
+++ b/src/gallium/drivers/panfrost/pan_public.h
@@ -31,8 +31,7 @@ extern "C" {
 struct pipe_screen;
 struct renderonly;
 
-struct pipe_screen *
-panfrost_create_screen(int fd, struct renderonly *ro);
+struct pipe_screen *panfrost_create_screen(int fd, struct renderonly *ro);
 
 #ifdef __cplusplus
 }
diff --git a/src/gallium/drivers/panfrost/pan_resource.c b/src/gallium/drivers/panfrost/pan_resource.c
index 52d44fc62bf..6e87fc95e48 100644
--- a/src/gallium/drivers/panfrost/pan_resource.c
+++ b/src/gallium/drivers/panfrost/pan_resource.c
@@ -30,286 +30,279 @@
  *
  */
 
-#include <xf86drm.h>
 #include <fcntl.h>
+#include <xf86drm.h>
 #include "drm-uapi/drm_fourcc.h"
 
 #include "frontend/winsys_handle.h"
 #include "util/format/u_format.h"
+#include "util/u_drm.h"
+#include "util/u_gen_mipmap.h"
 #include "util/u_memory.h"
 #include "util/u_surface.h"
 #include "util/u_transfer.h"
 #include "util/u_transfer_helper.h"
-#include "util/u_gen_mipmap.h"
-#include "util/u_drm.h"
 
+#include "decode.h"
 #include "pan_bo.h"
 #include "pan_context.h"
-#include "pan_screen.h"
 #include "pan_resource.h"
-#include "pan_util.h"
+#include "pan_screen.h"
 #include "pan_tiling.h"
-#include "decode.h"
+#include "pan_util.h"
 
 static void
 panfrost_clear_depth_stencil(struct pipe_context *pipe,
-                             struct pipe_surface *dst,
-                             unsigned clear_flags,
-                             double depth,
-                             unsigned stencil,
-                             unsigned dstx, unsigned dsty,
-                             unsigned width, unsigned height,
+                             struct pipe_surface *dst, unsigned clear_flags,
+                             double depth, unsigned stencil, unsigned dstx,
+                             unsigned dsty, unsigned width, unsigned height,
                              bool render_condition_enabled)
 {
-        struct panfrost_context *ctx = pan_context(pipe);
+   struct panfrost_context *ctx = pan_context(pipe);
 
-        if (render_condition_enabled &&
-            !panfrost_render_condition_check(ctx))
-                return;
+   if (render_condition_enabled && !panfrost_render_condition_check(ctx))
+      return;
 
-        panfrost_blitter_save(ctx, render_condition_enabled);
-        util_blitter_clear_depth_stencil(ctx->blitter, dst,
-                                 clear_flags, depth, stencil,
-                                 dstx, dsty, width, height);
+   panfrost_blitter_save(ctx, render_condition_enabled);
+   util_blitter_clear_depth_stencil(ctx->blitter, dst, clear_flags, depth,
+                                    stencil, dstx, dsty, width, height);
 }
 
 static void
 panfrost_clear_render_target(struct pipe_context *pipe,
                              struct pipe_surface *dst,
-                             const union pipe_color_union *color,
-                             unsigned dstx, unsigned dsty,
-                             unsigned width, unsigned height,
+                             const union pipe_color_union *color, unsigned dstx,
+                             unsigned dsty, unsigned width, unsigned height,
                              bool render_condition_enabled)
 {
-        struct panfrost_context *ctx = pan_context(pipe);
+   struct panfrost_context *ctx = pan_context(pipe);
 
-        if (render_condition_enabled &&
-            !panfrost_render_condition_check(ctx))
-                return;
+   if (render_condition_enabled && !panfrost_render_condition_check(ctx))
+      return;
 
-        panfrost_blitter_save(ctx, render_condition_enabled);
-        util_blitter_clear_render_target(ctx->blitter, dst, color,
-                                         dstx, dsty, width, height);
+   panfrost_blitter_save(ctx, render_condition_enabled);
+   util_blitter_clear_render_target(ctx->blitter, dst, color, dstx, dsty, width,
+                                    height);
 }
 
 static struct pipe_resource *
 panfrost_resource_from_handle(struct pipe_screen *pscreen,
                               const struct pipe_resource *templat,
-                              struct winsys_handle *whandle,
-                              unsigned usage)
+                              struct winsys_handle *whandle, unsigned usage)
 {
-        struct panfrost_device *dev = pan_device(pscreen);
-        struct panfrost_resource *rsc;
-        struct pipe_resource *prsc;
+   struct panfrost_device *dev = pan_device(pscreen);
+   struct panfrost_resource *rsc;
+   struct pipe_resource *prsc;
 
-        assert(whandle->type == WINSYS_HANDLE_TYPE_FD);
+   assert(whandle->type == WINSYS_HANDLE_TYPE_FD);
 
-        rsc = CALLOC_STRUCT(panfrost_resource);
-        if (!rsc)
-                return NULL;
+   rsc = CALLOC_STRUCT(panfrost_resource);
+   if (!rsc)
+      return NULL;
 
-        prsc = &rsc->base;
+   prsc = &rsc->base;
 
-        *prsc = *templat;
+   *prsc = *templat;
 
-        pipe_reference_init(&prsc->reference, 1);
-        prsc->screen = pscreen;
+   pipe_reference_init(&prsc->reference, 1);
+   prsc->screen = pscreen;
 
-        uint64_t mod = whandle->modifier == DRM_FORMAT_MOD_INVALID ?
-                       DRM_FORMAT_MOD_LINEAR : whandle->modifier;
-        enum mali_texture_dimension dim =
-                panfrost_translate_texture_dimension(templat->target);
-        struct pan_image_explicit_layout explicit_layout = {
-                .offset = whandle->offset,
-                .row_stride = panfrost_from_legacy_stride(whandle->stride, templat->format, mod),
-        };
+   uint64_t mod = whandle->modifier == DRM_FORMAT_MOD_INVALID
+                     ? DRM_FORMAT_MOD_LINEAR
+                     : whandle->modifier;
+   enum mali_texture_dimension dim =
+      panfrost_translate_texture_dimension(templat->target);
+   struct pan_image_explicit_layout explicit_layout = {
+      .offset = whandle->offset,
+      .row_stride =
+         panfrost_from_legacy_stride(whandle->stride, templat->format, mod),
+   };
 
-        rsc->image.layout = (struct pan_image_layout) {
-                .modifier = mod,
-                .format = templat->format,
-                .dim = dim,
-                .width = prsc->width0,
-                .height = prsc->height0,
-                .depth = prsc->depth0,
-                .array_size = prsc->array_size,
-                .nr_samples = MAX2(prsc->nr_samples, 1),
-                .nr_slices = 1,
-        };
+   rsc->image.layout = (struct pan_image_layout){
+      .modifier = mod,
+      .format = templat->format,
+      .dim = dim,
+      .width = prsc->width0,
+      .height = prsc->height0,
+      .depth = prsc->depth0,
+      .array_size = prsc->array_size,
+      .nr_samples = MAX2(prsc->nr_samples, 1),
+      .nr_slices = 1,
+   };
 
-        bool valid = pan_image_layout_init(&rsc->image.layout, &explicit_layout);
+   bool valid = pan_image_layout_init(&rsc->image.layout, &explicit_layout);
 
-        if (!valid) {
-                FREE(rsc);
-                return NULL;
-        }
+   if (!valid) {
+      FREE(rsc);
+      return NULL;
+   }
 
-        rsc->image.data.bo = panfrost_bo_import(dev, whandle->handle);
-        /* Sometimes an import can fail e.g. on an invalid buffer fd, out of
-         * memory space to mmap it etc.
-         */
-        if (!rsc->image.data.bo) {
-                FREE(rsc);
-                return NULL;
-        }
+   rsc->image.data.bo = panfrost_bo_import(dev, whandle->handle);
+   /* Sometimes an import can fail e.g. on an invalid buffer fd, out of
+    * memory space to mmap it etc.
+    */
+   if (!rsc->image.data.bo) {
+      FREE(rsc);
+      return NULL;
+   }
 
-        rsc->modifier_constant = true;
+   rsc->modifier_constant = true;
 
-        BITSET_SET(rsc->valid.data, 0);
-        panfrost_resource_set_damage_region(pscreen, &rsc->base, 0, NULL);
+   BITSET_SET(rsc->valid.data, 0);
+   panfrost_resource_set_damage_region(pscreen, &rsc->base, 0, NULL);
 
-        if (dev->ro) {
-                rsc->scanout =
-                        renderonly_create_gpu_import_for_resource(prsc, dev->ro, NULL);
-                /* failure is expected in some cases.. */
-        }
+   if (dev->ro) {
+      rsc->scanout =
+         renderonly_create_gpu_import_for_resource(prsc, dev->ro, NULL);
+      /* failure is expected in some cases.. */
+   }
 
-        return prsc;
+   return prsc;
 }
 
 static bool
 panfrost_resource_get_handle(struct pipe_screen *pscreen,
-                             struct pipe_context *ctx,
-                             struct pipe_resource *pt,
-                             struct winsys_handle *handle,
-                             unsigned usage)
+                             struct pipe_context *ctx, struct pipe_resource *pt,
+                             struct winsys_handle *handle, unsigned usage)
 {
-        struct panfrost_device *dev = pan_device(pscreen);
-        struct panfrost_resource *rsrc;
-        struct renderonly_scanout *scanout;
-        struct pipe_resource *cur = pt;
+   struct panfrost_device *dev = pan_device(pscreen);
+   struct panfrost_resource *rsrc;
+   struct renderonly_scanout *scanout;
+   struct pipe_resource *cur = pt;
 
-        /* Even though panfrost doesn't support multi-planar formats, we
-         * can get here through GBM, which does. Walk the list of planes
-         * to find the right one.
-         */
-        for (int i = 0; i < handle->plane; i++) {
-                cur = cur->next;
-                if (!cur)
-                        return false;
-        }
-        rsrc = pan_resource(cur);
-        scanout = rsrc->scanout;
+   /* Even though panfrost doesn't support multi-planar formats, we
+    * can get here through GBM, which does. Walk the list of planes
+    * to find the right one.
+    */
+   for (int i = 0; i < handle->plane; i++) {
+      cur = cur->next;
+      if (!cur)
+         return false;
+   }
+   rsrc = pan_resource(cur);
+   scanout = rsrc->scanout;
 
-        handle->modifier = rsrc->image.layout.modifier;
-        rsrc->modifier_constant = true;
+   handle->modifier = rsrc->image.layout.modifier;
+   rsrc->modifier_constant = true;
 
-        if (handle->type == WINSYS_HANDLE_TYPE_KMS && dev->ro) {
-                return renderonly_get_handle(scanout, handle);
-        } else if (handle->type == WINSYS_HANDLE_TYPE_KMS) {
-                handle->handle = rsrc->image.data.bo->gem_handle;
-        } else if (handle->type == WINSYS_HANDLE_TYPE_FD) {
-                int fd = panfrost_bo_export(rsrc->image.data.bo);
+   if (handle->type == WINSYS_HANDLE_TYPE_KMS && dev->ro) {
+      return renderonly_get_handle(scanout, handle);
+   } else if (handle->type == WINSYS_HANDLE_TYPE_KMS) {
+      handle->handle = rsrc->image.data.bo->gem_handle;
+   } else if (handle->type == WINSYS_HANDLE_TYPE_FD) {
+      int fd = panfrost_bo_export(rsrc->image.data.bo);
 
-                if (fd < 0)
-                        return false;
+      if (fd < 0)
+         return false;
 
-                handle->handle = fd;
-        } else {
-                /* Other handle types not supported */
-                return false;
-        }
+      handle->handle = fd;
+   } else {
+      /* Other handle types not supported */
+      return false;
+   }
 
-        handle->stride = panfrost_get_legacy_stride(&rsrc->image.layout, 0);
-        handle->offset = rsrc->image.layout.slices[0].offset;
-        return true;
+   handle->stride = panfrost_get_legacy_stride(&rsrc->image.layout, 0);
+   handle->offset = rsrc->image.layout.slices[0].offset;
+   return true;
 }
 
 static bool
 panfrost_resource_get_param(struct pipe_screen *pscreen,
-                            struct pipe_context *pctx, struct pipe_resource *prsc,
-                            unsigned plane, unsigned layer, unsigned level,
-                            enum pipe_resource_param param,
-                            unsigned usage, uint64_t *value)
+                            struct pipe_context *pctx,
+                            struct pipe_resource *prsc, unsigned plane,
+                            unsigned layer, unsigned level,
+                            enum pipe_resource_param param, unsigned usage,
+                            uint64_t *value)
 {
-        struct panfrost_resource *rsrc = (struct panfrost_resource *) prsc;
-        struct pipe_resource *cur;
-        unsigned count;
+   struct panfrost_resource *rsrc = (struct panfrost_resource *)prsc;
+   struct pipe_resource *cur;
+   unsigned count;
 
-        switch (param) {
-        case PIPE_RESOURCE_PARAM_STRIDE:
-                *value = panfrost_get_legacy_stride(&rsrc->image.layout, level);
-                return true;
-        case PIPE_RESOURCE_PARAM_OFFSET:
-                *value = rsrc->image.layout.slices[level].offset;
-                return true;
-        case PIPE_RESOURCE_PARAM_MODIFIER:
-                *value = rsrc->image.layout.modifier;
-                return true;
-        case PIPE_RESOURCE_PARAM_NPLANES:
-                /* Panfrost doesn't directly support multi-planar formats,
-                 * but we should still handle this case for gbm users
-                 * that might want to use resources shared with panfrost
-                 * on video processing hardware that does.
-                 */
-                for (count = 0, cur = prsc; cur; cur = cur->next)
-                        count++;
-                *value = count;
-                return true;
-        default:
-                return false;
-        }
+   switch (param) {
+   case PIPE_RESOURCE_PARAM_STRIDE:
+      *value = panfrost_get_legacy_stride(&rsrc->image.layout, level);
+      return true;
+   case PIPE_RESOURCE_PARAM_OFFSET:
+      *value = rsrc->image.layout.slices[level].offset;
+      return true;
+   case PIPE_RESOURCE_PARAM_MODIFIER:
+      *value = rsrc->image.layout.modifier;
+      return true;
+   case PIPE_RESOURCE_PARAM_NPLANES:
+      /* Panfrost doesn't directly support multi-planar formats,
+       * but we should still handle this case for gbm users
+       * that might want to use resources shared with panfrost
+       * on video processing hardware that does.
+       */
+      for (count = 0, cur = prsc; cur; cur = cur->next)
+         count++;
+      *value = count;
+      return true;
+   default:
+      return false;
+   }
 }
 
 static void
 panfrost_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
 {
-        /* TODO */
+   /* TODO */
 }
 
 static struct pipe_surface *
-panfrost_create_surface(struct pipe_context *pipe,
-                        struct pipe_resource *pt,
+panfrost_create_surface(struct pipe_context *pipe, struct pipe_resource *pt,
                         const struct pipe_surface *surf_tmpl)
 {
-        struct panfrost_context *ctx = pan_context(pipe);
-        struct pipe_surface *ps = NULL;
+   struct panfrost_context *ctx = pan_context(pipe);
+   struct pipe_surface *ps = NULL;
 
-        pan_legalize_afbc_format(ctx, pan_resource(pt), surf_tmpl->format);
+   pan_legalize_afbc_format(ctx, pan_resource(pt), surf_tmpl->format);
 
-        ps = CALLOC_STRUCT(pipe_surface);
+   ps = CALLOC_STRUCT(pipe_surface);
 
-        if (ps) {
-                pipe_reference_init(&ps->reference, 1);
-                pipe_resource_reference(&ps->texture, pt);
-                ps->context = pipe;
-                ps->format = surf_tmpl->format;
+   if (ps) {
+      pipe_reference_init(&ps->reference, 1);
+      pipe_resource_reference(&ps->texture, pt);
+      ps->context = pipe;
+      ps->format = surf_tmpl->format;
 
-                if (pt->target != PIPE_BUFFER) {
-                        assert(surf_tmpl->u.tex.level <= pt->last_level);
-                        ps->width = u_minify(pt->width0, surf_tmpl->u.tex.level);
-                        ps->height = u_minify(pt->height0, surf_tmpl->u.tex.level);
-                        ps->nr_samples = surf_tmpl->nr_samples;
-                        ps->u.tex.level = surf_tmpl->u.tex.level;
-                        ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
-                        ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
-                } else {
-                        /* setting width as number of elements should get us correct renderbuffer width */
-                        ps->width = surf_tmpl->u.buf.last_element - surf_tmpl->u.buf.first_element + 1;
-                        ps->height = pt->height0;
-                        ps->u.buf.first_element = surf_tmpl->u.buf.first_element;
-                        ps->u.buf.last_element = surf_tmpl->u.buf.last_element;
-                        assert(ps->u.buf.first_element <= ps->u.buf.last_element);
-                        assert(ps->u.buf.last_element < ps->width);
-                }
-        }
+      if (pt->target != PIPE_BUFFER) {
+         assert(surf_tmpl->u.tex.level <= pt->last_level);
+         ps->width = u_minify(pt->width0, surf_tmpl->u.tex.level);
+         ps->height = u_minify(pt->height0, surf_tmpl->u.tex.level);
+         ps->nr_samples = surf_tmpl->nr_samples;
+         ps->u.tex.level = surf_tmpl->u.tex.level;
+         ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
+         ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
+      } else {
+         /* setting width as number of elements should get us correct
+          * renderbuffer width */
+         ps->width =
+            surf_tmpl->u.buf.last_element - surf_tmpl->u.buf.first_element + 1;
+         ps->height = pt->height0;
+         ps->u.buf.first_element = surf_tmpl->u.buf.first_element;
+         ps->u.buf.last_element = surf_tmpl->u.buf.last_element;
+         assert(ps->u.buf.first_element <= ps->u.buf.last_element);
+         assert(ps->u.buf.last_element < ps->width);
+      }
+   }
 
-        return ps;
+   return ps;
 }
 
 static void
-panfrost_surface_destroy(struct pipe_context *pipe,
-                         struct pipe_surface *surf)
+panfrost_surface_destroy(struct pipe_context *pipe, struct pipe_surface *surf)
 {
-        assert(surf->texture);
-        pipe_resource_reference(&surf->texture, NULL);
-        free(surf);
+   assert(surf->texture);
+   pipe_resource_reference(&surf->texture, NULL);
+   free(surf);
 }
 
 static inline bool
 panfrost_is_2d(const struct panfrost_resource *pres)
 {
-        return (pres->base.target == PIPE_TEXTURE_2D)
-                || (pres->base.target == PIPE_TEXTURE_RECT);
+   return (pres->base.target == PIPE_TEXTURE_2D) ||
+          (pres->base.target == PIPE_TEXTURE_RECT);
 }
 
 /* Based on the usage, determine if it makes sense to use u-inteleaved tiling.
@@ -321,67 +314,62 @@ panfrost_is_2d(const struct panfrost_resource *pres)
 
 static bool
 panfrost_should_afbc(struct panfrost_device *dev,
-                     const struct panfrost_resource *pres,
-                     enum pipe_format fmt)
+                     const struct panfrost_resource *pres, enum pipe_format fmt)
 {
-        /* AFBC resources may be rendered to, textured from, or shared across
-         * processes, but may not be used as e.g buffers */
-        const unsigned valid_binding =
-                PIPE_BIND_DEPTH_STENCIL |
-                PIPE_BIND_RENDER_TARGET |
-                PIPE_BIND_BLENDABLE |
-                PIPE_BIND_SAMPLER_VIEW |
-                PIPE_BIND_DISPLAY_TARGET |
-                PIPE_BIND_SCANOUT |
-                PIPE_BIND_SHARED;
+   /* AFBC resources may be rendered to, textured from, or shared across
+    * processes, but may not be used as e.g buffers */
+   const unsigned valid_binding =
+      PIPE_BIND_DEPTH_STENCIL | PIPE_BIND_RENDER_TARGET | PIPE_BIND_BLENDABLE |
+      PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT |
+      PIPE_BIND_SHARED;
 
-        if (pres->base.bind & ~valid_binding)
-                return false;
+   if (pres->base.bind & ~valid_binding)
+      return false;
 
-        /* AFBC support is optional */
-        if (!dev->has_afbc)
-                return false;
+   /* AFBC support is optional */
+   if (!dev->has_afbc)
+      return false;
 
-        /* AFBC<-->staging is expensive */
-        if (pres->base.usage == PIPE_USAGE_STREAM)
-                return false;
+   /* AFBC<-->staging is expensive */
+   if (pres->base.usage == PIPE_USAGE_STREAM)
+      return false;
 
-        /* Only a small selection of formats are AFBC'able */
-        if (!panfrost_format_supports_afbc(dev, fmt))
-                return false;
+   /* Only a small selection of formats are AFBC'able */
+   if (!panfrost_format_supports_afbc(dev, fmt))
+      return false;
 
-        /* AFBC does not support layered (GLES3 style) multisampling. Use
-         * EXT_multisampled_render_to_texture instead */
-        if (pres->base.nr_samples > 1)
-                return false;
+   /* AFBC does not support layered (GLES3 style) multisampling. Use
+    * EXT_multisampled_render_to_texture instead */
+   if (pres->base.nr_samples > 1)
+      return false;
 
-        switch (pres->base.target) {
-        case PIPE_TEXTURE_2D:
-        case PIPE_TEXTURE_RECT:
-        case PIPE_TEXTURE_2D_ARRAY:
-        case PIPE_TEXTURE_CUBE:
-        case PIPE_TEXTURE_CUBE_ARRAY:
-                break;
+   switch (pres->base.target) {
+   case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_RECT:
+   case PIPE_TEXTURE_2D_ARRAY:
+   case PIPE_TEXTURE_CUBE:
+   case PIPE_TEXTURE_CUBE_ARRAY:
+      break;
 
-        case PIPE_TEXTURE_3D:
-                /* 3D AFBC is only supported on Bifrost v7+. It's supposed to
-                 * be supported on Midgard but it doesn't seem to work */
-                if (dev->arch != 7)
-                        return false;
+   case PIPE_TEXTURE_3D:
+      /* 3D AFBC is only supported on Bifrost v7+. It's supposed to
+       * be supported on Midgard but it doesn't seem to work */
+      if (dev->arch != 7)
+         return false;
 
-                break;
+      break;
 
-        default:
-                return false;
-        }
+   default:
+      return false;
+   }
 
-        /* For one tile, AFBC is a loss compared to u-interleaved */
-        if (pres->base.width0 <= 16 && pres->base.height0 <= 16)
-                return false;
+   /* For one tile, AFBC is a loss compared to u-interleaved */
+   if (pres->base.width0 <= 16 && pres->base.height0 <= 16)
+      return false;
 
-        /* Otherwise, we'd prefer AFBC as it is dramatically more efficient
-         * than linear or usually even u-interleaved */
-        return true;
+   /* Otherwise, we'd prefer AFBC as it is dramatically more efficient
+    * than linear or usually even u-interleaved */
+   return true;
 }
 
 /*
@@ -393,37 +381,31 @@ static bool
 panfrost_should_tile_afbc(const struct panfrost_device *dev,
                           const struct panfrost_resource *pres)
 {
-        return panfrost_afbc_can_tile(dev) &&
-               pres->base.width0 >= 128 &&
-               pres->base.height0 >= 128;
+   return panfrost_afbc_can_tile(dev) && pres->base.width0 >= 128 &&
+          pres->base.height0 >= 128;
 }
 
 static bool
 panfrost_should_tile(struct panfrost_device *dev,
-                     const struct panfrost_resource *pres,
-                     enum pipe_format fmt)
+                     const struct panfrost_resource *pres, enum pipe_format fmt)
 {
-        const unsigned valid_binding =
-                PIPE_BIND_DEPTH_STENCIL |
-                PIPE_BIND_RENDER_TARGET |
-                PIPE_BIND_BLENDABLE |
-                PIPE_BIND_SAMPLER_VIEW |
-                PIPE_BIND_DISPLAY_TARGET |
-                PIPE_BIND_SCANOUT |
-                PIPE_BIND_SHARED;
+   const unsigned valid_binding =
+      PIPE_BIND_DEPTH_STENCIL | PIPE_BIND_RENDER_TARGET | PIPE_BIND_BLENDABLE |
+      PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT |
+      PIPE_BIND_SHARED;
 
-        /* The purpose of tiling is improving locality in both X- and
-         * Y-directions. If there is only a single pixel in either direction,
-         * tiling does not make sense; using a linear layout instead is optimal
-         * for both memory usage and performance.
-         */
-        if (MIN2(pres->base.width0, pres->base.height0) < 2)
-                return false;
+   /* The purpose of tiling is improving locality in both X- and
+    * Y-directions. If there is only a single pixel in either direction,
+    * tiling does not make sense; using a linear layout instead is optimal
+    * for both memory usage and performance.
+    */
+   if (MIN2(pres->base.width0, pres->base.height0) < 2)
+      return false;
 
-        bool can_tile = (pres->base.target != PIPE_BUFFER)
-                && ((pres->base.bind & ~valid_binding) == 0);
+   bool can_tile = (pres->base.target != PIPE_BUFFER) &&
+                   ((pres->base.bind & ~valid_binding) == 0);
 
-        return can_tile && (pres->base.usage != PIPE_USAGE_STREAM);
+   return can_tile && (pres->base.usage != PIPE_USAGE_STREAM);
 }
 
 static uint64_t
@@ -431,111 +413,107 @@ panfrost_best_modifier(struct panfrost_device *dev,
                        const struct panfrost_resource *pres,
                        enum pipe_format fmt)
 {
-        /* Force linear textures when debugging tiling/compression */
-        if (unlikely(dev->debug & PAN_DBG_LINEAR))
-                return DRM_FORMAT_MOD_LINEAR;
+   /* Force linear textures when debugging tiling/compression */
+   if (unlikely(dev->debug & PAN_DBG_LINEAR))
+      return DRM_FORMAT_MOD_LINEAR;
 
-        if (panfrost_should_afbc(dev, pres, fmt)) {
-                uint64_t afbc =
-                        AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
-                        AFBC_FORMAT_MOD_SPARSE;
+   if (panfrost_should_afbc(dev, pres, fmt)) {
+      uint64_t afbc = AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 | AFBC_FORMAT_MOD_SPARSE;
 
-                if (panfrost_afbc_can_ytr(pres->base.format))
-                        afbc |= AFBC_FORMAT_MOD_YTR;
+      if (panfrost_afbc_can_ytr(pres->base.format))
+         afbc |= AFBC_FORMAT_MOD_YTR;
 
-                if (panfrost_should_tile_afbc(dev, pres))
-                        afbc |= AFBC_FORMAT_MOD_TILED | AFBC_FORMAT_MOD_SC;
+      if (panfrost_should_tile_afbc(dev, pres))
+         afbc |= AFBC_FORMAT_MOD_TILED | AFBC_FORMAT_MOD_SC;
 
-                return DRM_FORMAT_MOD_ARM_AFBC(afbc);
-        } else if (panfrost_should_tile(dev, pres, fmt))
-                return DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED;
-        else
-                return DRM_FORMAT_MOD_LINEAR;
+      return DRM_FORMAT_MOD_ARM_AFBC(afbc);
+   } else if (panfrost_should_tile(dev, pres, fmt))
+      return DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED;
+   else
+      return DRM_FORMAT_MOD_LINEAR;
 }
 
 static bool
-panfrost_should_checksum(const struct panfrost_device *dev, const struct panfrost_resource *pres)
+panfrost_should_checksum(const struct panfrost_device *dev,
+                         const struct panfrost_resource *pres)
 {
-        /* When checksumming is enabled, the tile data must fit in the
-         * size of the writeback buffer, so don't checksum formats
-         * that use too much space. */
+   /* When checksumming is enabled, the tile data must fit in the
+    * size of the writeback buffer, so don't checksum formats
+    * that use too much space. */
 
-        unsigned bytes_per_pixel_max = (dev->arch == 6) ? 6 : 4;
+   unsigned bytes_per_pixel_max = (dev->arch == 6) ? 6 : 4;
 
-        unsigned bytes_per_pixel = MAX2(pres->base.nr_samples, 1) *
-                util_format_get_blocksize(pres->base.format);
+   unsigned bytes_per_pixel = MAX2(pres->base.nr_samples, 1) *
+                              util_format_get_blocksize(pres->base.format);
 
-        return pres->base.bind & PIPE_BIND_RENDER_TARGET &&
-                panfrost_is_2d(pres) &&
-                bytes_per_pixel <= bytes_per_pixel_max &&
-                pres->base.last_level == 0 &&
-                !(dev->debug & PAN_DBG_NO_CRC);
+   return pres->base.bind & PIPE_BIND_RENDER_TARGET && panfrost_is_2d(pres) &&
+          bytes_per_pixel <= bytes_per_pixel_max &&
+          pres->base.last_level == 0 && !(dev->debug & PAN_DBG_NO_CRC);
 }
 
 static void
 panfrost_resource_setup(struct panfrost_device *dev,
-                        struct panfrost_resource *pres,
-                        uint64_t modifier, enum pipe_format fmt)
+                        struct panfrost_resource *pres, uint64_t modifier,
+                        enum pipe_format fmt)
 {
-        uint64_t chosen_mod = modifier != DRM_FORMAT_MOD_INVALID ?
-                              modifier : panfrost_best_modifier(dev, pres, fmt);
-        enum mali_texture_dimension dim =
-                panfrost_translate_texture_dimension(pres->base.target);
+   uint64_t chosen_mod = modifier != DRM_FORMAT_MOD_INVALID
+                            ? modifier
+                            : panfrost_best_modifier(dev, pres, fmt);
+   enum mali_texture_dimension dim =
+      panfrost_translate_texture_dimension(pres->base.target);
 
-        /* We can only switch tiled->linear if the resource isn't already
-         * linear and if we control the modifier */
-        pres->modifier_constant =
-                !(chosen_mod != DRM_FORMAT_MOD_LINEAR &&
-                  modifier == DRM_FORMAT_MOD_INVALID);
+   /* We can only switch tiled->linear if the resource isn't already
+    * linear and if we control the modifier */
+   pres->modifier_constant = !(chosen_mod != DRM_FORMAT_MOD_LINEAR &&
+                               modifier == DRM_FORMAT_MOD_INVALID);
 
-        /* Z32_S8X24 variants are actually stored in 2 planes (one per
-         * component), we have to adjust the format on the first plane.
-         */
-        if (fmt == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
-                fmt = PIPE_FORMAT_Z32_FLOAT;
+   /* Z32_S8X24 variants are actually stored in 2 planes (one per
+    * component), we have to adjust the format on the first plane.
+    */
+   if (fmt == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
+      fmt = PIPE_FORMAT_Z32_FLOAT;
 
-        pres->image.layout = (struct pan_image_layout) {
-                .modifier = chosen_mod,
-                .format = fmt,
-                .dim = dim,
-                .width = pres->base.width0,
-                .height = pres->base.height0,
-                .depth = pres->base.depth0,
-                .array_size = pres->base.array_size,
-                .nr_samples = MAX2(pres->base.nr_samples, 1),
-                .nr_slices = pres->base.last_level + 1,
-                .crc = panfrost_should_checksum(dev, pres),
-        };
+   pres->image.layout = (struct pan_image_layout){
+      .modifier = chosen_mod,
+      .format = fmt,
+      .dim = dim,
+      .width = pres->base.width0,
+      .height = pres->base.height0,
+      .depth = pres->base.depth0,
+      .array_size = pres->base.array_size,
+      .nr_samples = MAX2(pres->base.nr_samples, 1),
+      .nr_slices = pres->base.last_level + 1,
+      .crc = panfrost_should_checksum(dev, pres),
+   };
 
-        ASSERTED bool valid = pan_image_layout_init(&pres->image.layout, NULL);
-        assert(valid);
+   ASSERTED bool valid = pan_image_layout_init(&pres->image.layout, NULL);
+   assert(valid);
 }
 
 static void
 panfrost_resource_init_afbc_headers(struct panfrost_resource *pres)
 {
-        panfrost_bo_mmap(pres->image.data.bo);
+   panfrost_bo_mmap(pres->image.data.bo);
 
-        unsigned nr_samples = MAX2(pres->base.nr_samples, 1);
+   unsigned nr_samples = MAX2(pres->base.nr_samples, 1);
 
-        for (unsigned i = 0; i < pres->base.array_size; ++i) {
-                for (unsigned l = 0; l <= pres->base.last_level; ++l) {
-                        struct pan_image_slice_layout *slice = &pres->image.layout.slices[l];
+   for (unsigned i = 0; i < pres->base.array_size; ++i) {
+      for (unsigned l = 0; l <= pres->base.last_level; ++l) {
+         struct pan_image_slice_layout *slice = &pres->image.layout.slices[l];
 
-                        for (unsigned s = 0; s < nr_samples; ++s) {
-                                void *ptr = pres->image.data.bo->ptr.cpu +
-                                            (i * pres->image.layout.array_stride) +
-                                            slice->offset +
-                                            (s * slice->afbc.surface_stride);
+         for (unsigned s = 0; s < nr_samples; ++s) {
+            void *ptr = pres->image.data.bo->ptr.cpu +
+                        (i * pres->image.layout.array_stride) + slice->offset +
+                        (s * slice->afbc.surface_stride);
 
-                                /* Zero-ed AFBC headers seem to encode a plain
-                                 * black. Let's use this pattern to keep the
-                                 * initialization simple.
-                                 */
-                                memset(ptr, 0, slice->afbc.header_size);
-                        }
-                }
-        }
+            /* Zero-ed AFBC headers seem to encode a plain
+             * black. Let's use this pattern to keep the
+             * initialization simple.
+             */
+            memset(ptr, 0, slice->afbc.header_size);
+         }
+      }
+   }
 }
 
 void
@@ -544,224 +522,219 @@ panfrost_resource_set_damage_region(struct pipe_screen *screen,
                                     unsigned int nrects,
                                     const struct pipe_box *rects)
 {
-        struct panfrost_device *dev = pan_device(screen);
-        struct panfrost_resource *pres = pan_resource(res);
-        struct pipe_scissor_state *damage_extent = &pres->damage.extent;
-        unsigned int i;
+   struct panfrost_device *dev = pan_device(screen);
+   struct panfrost_resource *pres = pan_resource(res);
+   struct pipe_scissor_state *damage_extent = &pres->damage.extent;
+   unsigned int i;
 
-        /* Partial updates are implemented with a tile enable map only on v5.
-         * Later architectures have a more efficient method of implementing
-         * partial updates (frame shaders), while earlier architectures lack
-         * tile enable maps altogether.
-         */
-        if (dev->arch == 5 && nrects > 1) {
-                if (!pres->damage.tile_map.data) {
-                        pres->damage.tile_map.stride =
-                                ALIGN_POT(DIV_ROUND_UP(res->width0, 32 * 8), 64);
-                        pres->damage.tile_map.size =
-                                pres->damage.tile_map.stride *
-                                DIV_ROUND_UP(res->height0, 32);
-                        pres->damage.tile_map.data =
-                                malloc(pres->damage.tile_map.size);
-                }
+   /* Partial updates are implemented with a tile enable map only on v5.
+    * Later architectures have a more efficient method of implementing
+    * partial updates (frame shaders), while earlier architectures lack
+    * tile enable maps altogether.
+    */
+   if (dev->arch == 5 && nrects > 1) {
+      if (!pres->damage.tile_map.data) {
+         pres->damage.tile_map.stride =
+            ALIGN_POT(DIV_ROUND_UP(res->width0, 32 * 8), 64);
+         pres->damage.tile_map.size =
+            pres->damage.tile_map.stride * DIV_ROUND_UP(res->height0, 32);
+         pres->damage.tile_map.data = malloc(pres->damage.tile_map.size);
+      }
 
-                memset(pres->damage.tile_map.data, 0, pres->damage.tile_map.size);
-                pres->damage.tile_map.enable = true;
-        } else {
-                pres->damage.tile_map.enable = false;
-        }
+      memset(pres->damage.tile_map.data, 0, pres->damage.tile_map.size);
+      pres->damage.tile_map.enable = true;
+   } else {
+      pres->damage.tile_map.enable = false;
+   }
 
-        /* Track the damage extent: the quad including all damage regions. Will
-         * be used restrict the rendering area */
+   /* Track the damage extent: the quad including all damage regions. Will
+    * be used restrict the rendering area */
 
-        damage_extent->minx = 0xffff;
-        damage_extent->miny = 0xffff;
+   damage_extent->minx = 0xffff;
+   damage_extent->miny = 0xffff;
 
-        unsigned enable_count = 0;
+   unsigned enable_count = 0;
 
-        for (i = 0; i < nrects; i++) {
-                int x = rects[i].x, w = rects[i].width, h = rects[i].height;
-                int y = res->height0 - (rects[i].y + h);
+   for (i = 0; i < nrects; i++) {
+      int x = rects[i].x, w = rects[i].width, h = rects[i].height;
+      int y = res->height0 - (rects[i].y + h);
 
-                damage_extent->minx = MIN2(damage_extent->minx, x);
-                damage_extent->miny = MIN2(damage_extent->miny, y);
-                damage_extent->maxx = MAX2(damage_extent->maxx,
-                                           MIN2(x + w, res->width0));
-                damage_extent->maxy = MAX2(damage_extent->maxy,
-                                           MIN2(y + h, res->height0));
+      damage_extent->minx = MIN2(damage_extent->minx, x);
+      damage_extent->miny = MIN2(damage_extent->miny, y);
+      damage_extent->maxx = MAX2(damage_extent->maxx, MIN2(x + w, res->width0));
+      damage_extent->maxy =
+         MAX2(damage_extent->maxy, MIN2(y + h, res->height0));
 
-                if (!pres->damage.tile_map.enable)
-                        continue;
+      if (!pres->damage.tile_map.enable)
+         continue;
 
-                unsigned t_x_start = x / 32;
-                unsigned t_x_end = (x + w - 1) / 32;
-                unsigned t_y_start = y / 32;
-                unsigned t_y_end = (y + h - 1) / 32;
+      unsigned t_x_start = x / 32;
+      unsigned t_x_end = (x + w - 1) / 32;
+      unsigned t_y_start = y / 32;
+      unsigned t_y_end = (y + h - 1) / 32;
 
-                for (unsigned t_y = t_y_start; t_y <= t_y_end; t_y++) {
-                        for (unsigned t_x = t_x_start; t_x <= t_x_end; t_x++) {
-                                unsigned b = (t_y * pres->damage.tile_map.stride * 8) + t_x;
+      for (unsigned t_y = t_y_start; t_y <= t_y_end; t_y++) {
+         for (unsigned t_x = t_x_start; t_x <= t_x_end; t_x++) {
+            unsigned b = (t_y * pres->damage.tile_map.stride * 8) + t_x;
 
-                                if (BITSET_TEST(pres->damage.tile_map.data, b))
-                                        continue;
+            if (BITSET_TEST(pres->damage.tile_map.data, b))
+               continue;
 
-                                BITSET_SET(pres->damage.tile_map.data, b);
-                                enable_count++;
-                        }
-                }
-        }
+            BITSET_SET(pres->damage.tile_map.data, b);
+            enable_count++;
+         }
+      }
+   }
 
-        if (nrects == 0) {
-                damage_extent->minx = 0;
-                damage_extent->miny = 0;
-                damage_extent->maxx = res->width0;
-                damage_extent->maxy = res->height0;
-        }
+   if (nrects == 0) {
+      damage_extent->minx = 0;
+      damage_extent->miny = 0;
+      damage_extent->maxx = res->width0;
+      damage_extent->maxy = res->height0;
+   }
 
-        if (pres->damage.tile_map.enable) {
-                unsigned t_x_start = damage_extent->minx / 32;
-                unsigned t_x_end = damage_extent->maxx / 32;
-                unsigned t_y_start = damage_extent->miny / 32;
-                unsigned t_y_end = damage_extent->maxy / 32;
-                unsigned tile_count = (t_x_end - t_x_start + 1) *
-                                      (t_y_end - t_y_start + 1);
-
-                /* Don't bother passing a tile-enable-map if the amount of
-                 * tiles to reload is to close to the total number of tiles.
-                 */
-                if (tile_count - enable_count < 10)
-                        pres->damage.tile_map.enable = false;
-        }
+   if (pres->damage.tile_map.enable) {
+      unsigned t_x_start = damage_extent->minx / 32;
+      unsigned t_x_end = damage_extent->maxx / 32;
+      unsigned t_y_start = damage_extent->miny / 32;
+      unsigned t_y_end = damage_extent->maxy / 32;
+      unsigned tile_count =
+         (t_x_end - t_x_start + 1) * (t_y_end - t_y_start + 1);
 
+      /* Don't bother passing a tile-enable-map if the amount of
+       * tiles to reload is to close to the total number of tiles.
+       */
+      if (tile_count - enable_count < 10)
+         pres->damage.tile_map.enable = false;
+   }
 }
 
 static struct pipe_resource *
 panfrost_resource_create_with_modifier(struct pipe_screen *screen,
-                         const struct pipe_resource *template,
-                         uint64_t modifier)
+                                       const struct pipe_resource *template,
+                                       uint64_t modifier)
 {
-        struct panfrost_device *dev = pan_device(screen);
+   struct panfrost_device *dev = pan_device(screen);
 
-        struct panfrost_resource *so = CALLOC_STRUCT(panfrost_resource);
-        so->base = *template;
-        so->base.screen = screen;
+   struct panfrost_resource *so = CALLOC_STRUCT(panfrost_resource);
+   so->base = *template;
+   so->base.screen = screen;
 
-        pipe_reference_init(&so->base.reference, 1);
+   pipe_reference_init(&so->base.reference, 1);
 
-        util_range_init(&so->valid_buffer_range);
+   util_range_init(&so->valid_buffer_range);
 
-        if (template->bind & PAN_BIND_SHARED_MASK) {
-                /* For compatibility with older consumers that may not be
-                 * modifiers aware, treat INVALID as LINEAR for shared
-                 * resources.
-                 */
-                if (modifier == DRM_FORMAT_MOD_INVALID)
-                        modifier = DRM_FORMAT_MOD_LINEAR;
+   if (template->bind & PAN_BIND_SHARED_MASK) {
+      /* For compatibility with older consumers that may not be
+       * modifiers aware, treat INVALID as LINEAR for shared
+       * resources.
+       */
+      if (modifier == DRM_FORMAT_MOD_INVALID)
+         modifier = DRM_FORMAT_MOD_LINEAR;
 
-                /* At any rate, we can't change the modifier later for shared
-                 * resources, since we have no way to propagate the modifier
-                 * change.
-                 */
-                so->modifier_constant = true;
-        }
+      /* At any rate, we can't change the modifier later for shared
+       * resources, since we have no way to propagate the modifier
+       * change.
+       */
+      so->modifier_constant = true;
+   }
 
-        panfrost_resource_setup(dev, so, modifier, template->format);
+   panfrost_resource_setup(dev, so, modifier, template->format);
 
-        /* Guess a label based on the bind */
-        unsigned bind = template->bind;
-        const char *label =
-                (bind & PIPE_BIND_INDEX_BUFFER) ? "Index buffer" :
-                (bind & PIPE_BIND_SCANOUT) ? "Scanout" :
-                (bind & PIPE_BIND_DISPLAY_TARGET) ? "Display target" :
-                (bind & PIPE_BIND_SHARED) ? "Shared resource" :
-                (bind & PIPE_BIND_RENDER_TARGET) ? "Render target" :
-                (bind & PIPE_BIND_DEPTH_STENCIL) ? "Depth/stencil buffer" :
-                (bind & PIPE_BIND_SAMPLER_VIEW) ? "Texture" :
-                (bind & PIPE_BIND_VERTEX_BUFFER) ? "Vertex buffer" :
-                (bind & PIPE_BIND_CONSTANT_BUFFER) ? "Constant buffer" :
-                (bind & PIPE_BIND_GLOBAL) ? "Global memory" :
-                (bind & PIPE_BIND_SHADER_BUFFER) ? "Shader buffer" :
-                (bind & PIPE_BIND_SHADER_IMAGE) ? "Shader image" :
-                "Other resource";
+   /* Guess a label based on the bind */
+   unsigned bind = template->bind;
+   const char *label = (bind & PIPE_BIND_INDEX_BUFFER)     ? "Index buffer"
+                       : (bind & PIPE_BIND_SCANOUT)        ? "Scanout"
+                       : (bind & PIPE_BIND_DISPLAY_TARGET) ? "Display target"
+                       : (bind & PIPE_BIND_SHARED)         ? "Shared resource"
+                       : (bind & PIPE_BIND_RENDER_TARGET)  ? "Render target"
+                       : (bind & PIPE_BIND_DEPTH_STENCIL)
+                          ? "Depth/stencil buffer"
+                       : (bind & PIPE_BIND_SAMPLER_VIEW)    ? "Texture"
+                       : (bind & PIPE_BIND_VERTEX_BUFFER)   ? "Vertex buffer"
+                       : (bind & PIPE_BIND_CONSTANT_BUFFER) ? "Constant buffer"
+                       : (bind & PIPE_BIND_GLOBAL)          ? "Global memory"
+                       : (bind & PIPE_BIND_SHADER_BUFFER)   ? "Shader buffer"
+                       : (bind & PIPE_BIND_SHADER_IMAGE)    ? "Shader image"
+                                                            : "Other resource";
 
-        if (dev->ro && (template->bind & PIPE_BIND_SCANOUT)) {
-                struct winsys_handle handle;
-                struct pan_block_size blocksize = panfrost_block_size(modifier, template->format);
+   if (dev->ro && (template->bind & PIPE_BIND_SCANOUT)) {
+      struct winsys_handle handle;
+      struct pan_block_size blocksize =
+         panfrost_block_size(modifier, template->format);
 
-                /* Block-based texture formats are only used for texture
-                 * compression (not framebuffer compression!), which doesn't
-                 * make sense to share across processes.
-                 */
-                assert(util_format_get_blockwidth(template->format) == 1);
-                assert(util_format_get_blockheight(template->format) == 1);
+      /* Block-based texture formats are only used for texture
+       * compression (not framebuffer compression!), which doesn't
+       * make sense to share across processes.
+       */
+      assert(util_format_get_blockwidth(template->format) == 1);
+      assert(util_format_get_blockheight(template->format) == 1);
 
-                /* Present a resource with similar dimensions that, if allocated
-                 * as a linear image, is big enough to fit the resource in the
-                 * actual layout. For linear images, this is a no-op. For 16x16
-                 * tiling, this aligns the dimensions to 16x16.
-                 *
-                 * For AFBC, this aligns the width to the superblock width (as
-                 * expected) and adds extra rows to account for the header. This
-                 * is a bit of a lie, but it's the best we can do with dumb
-                 * buffers, which are extremely not meant for AFBC. And yet this
-                 * has to work anyway...
-                 *
-                 * Moral of the story: if you're reading this comment, that
-                 * means you're working on WSI and so it's already too late for
-                 * you. I'm sorry.
-                 */
-                unsigned width = ALIGN_POT(template->width0, blocksize.width);
-                unsigned stride = ALIGN_POT(template->width0, blocksize.width) *
-                                  util_format_get_blocksize(template->format);
-                unsigned size = so->image.layout.data_size;
-                unsigned effective_rows = DIV_ROUND_UP(size, stride);
+      /* Present a resource with similar dimensions that, if allocated
+       * as a linear image, is big enough to fit the resource in the
+       * actual layout. For linear images, this is a no-op. For 16x16
+       * tiling, this aligns the dimensions to 16x16.
+       *
+       * For AFBC, this aligns the width to the superblock width (as
+       * expected) and adds extra rows to account for the header. This
+       * is a bit of a lie, but it's the best we can do with dumb
+       * buffers, which are extremely not meant for AFBC. And yet this
+       * has to work anyway...
+       *
+       * Moral of the story: if you're reading this comment, that
+       * means you're working on WSI and so it's already too late for
+       * you. I'm sorry.
+       */
+      unsigned width = ALIGN_POT(template->width0, blocksize.width);
+      unsigned stride = ALIGN_POT(template->width0, blocksize.width) *
+                        util_format_get_blocksize(template->format);
+      unsigned size = so->image.layout.data_size;
+      unsigned effective_rows = DIV_ROUND_UP(size, stride);
 
-                struct pipe_resource scanout_tmpl = {
-                        .target = so->base.target,
-                        .format = template->format,
-                        .width0 = width,
-                        .height0 = effective_rows,
-                        .depth0 = 1,
-                        .array_size = 1,
-                };
+      struct pipe_resource scanout_tmpl = {
+         .target = so->base.target,
+         .format = template->format,
+         .width0 = width,
+         .height0 = effective_rows,
+         .depth0 = 1,
+         .array_size = 1,
+      };
 
-                so->scanout =
-                        renderonly_scanout_for_resource(&scanout_tmpl,
-                                                        dev->ro,
-                                                        &handle);
+      so->scanout =
+         renderonly_scanout_for_resource(&scanout_tmpl, dev->ro, &handle);
 
-                if (!so->scanout) {
-                        fprintf(stderr, "Failed to create scanout resource\n");
-                        free(so);
-                        return NULL;
-                }
-                assert(handle.type == WINSYS_HANDLE_TYPE_FD);
-                so->image.data.bo = panfrost_bo_import(dev, handle.handle);
-                close(handle.handle);
+      if (!so->scanout) {
+         fprintf(stderr, "Failed to create scanout resource\n");
+         free(so);
+         return NULL;
+      }
+      assert(handle.type == WINSYS_HANDLE_TYPE_FD);
+      so->image.data.bo = panfrost_bo_import(dev, handle.handle);
+      close(handle.handle);
 
-                if (!so->image.data.bo) {
-                        free(so);
-                        return NULL;
-                }
-        } else {
-                /* We create a BO immediately but don't bother mapping, since we don't
-                 * care to map e.g. FBOs which the CPU probably won't touch */
+      if (!so->image.data.bo) {
+         free(so);
+         return NULL;
+      }
+   } else {
+      /* We create a BO immediately but don't bother mapping, since we don't
+       * care to map e.g. FBOs which the CPU probably won't touch */
 
-                so->image.data.bo =
-                        panfrost_bo_create(dev, so->image.layout.data_size, PAN_BO_DELAY_MMAP, label);
+      so->image.data.bo = panfrost_bo_create(dev, so->image.layout.data_size,
+                                             PAN_BO_DELAY_MMAP, label);
 
-                so->constant_stencil = true;
-        }
+      so->constant_stencil = true;
+   }
 
-        if (drm_is_afbc(so->image.layout.modifier))
-                panfrost_resource_init_afbc_headers(so);
+   if (drm_is_afbc(so->image.layout.modifier))
+      panfrost_resource_init_afbc_headers(so);
 
-        panfrost_resource_set_damage_region(screen, &so->base, 0, NULL);
+   panfrost_resource_set_damage_region(screen, &so->base, 0, NULL);
 
-        if (template->bind & PIPE_BIND_INDEX_BUFFER)
-                so->index_cache = CALLOC_STRUCT(panfrost_minmax_cache);
+   if (template->bind & PIPE_BIND_INDEX_BUFFER)
+      so->index_cache = CALLOC_STRUCT(panfrost_minmax_cache);
 
-        return (struct pipe_resource *)so;
+   return (struct pipe_resource *)so;
 }
 
 /* Default is to create a resource as don't care */
@@ -770,8 +743,8 @@ static struct pipe_resource *
 panfrost_resource_create(struct pipe_screen *screen,
                          const struct pipe_resource *template)
 {
-        return panfrost_resource_create_with_modifier(screen, template,
-                        DRM_FORMAT_MOD_INVALID);
+   return panfrost_resource_create_with_modifier(screen, template,
+                                                 DRM_FORMAT_MOD_INVALID);
 }
 
 /* If no modifier is specified, we'll choose. Otherwise, the order of
@@ -779,39 +752,38 @@ panfrost_resource_create(struct pipe_screen *screen,
 
 static struct pipe_resource *
 panfrost_resource_create_with_modifiers(struct pipe_screen *screen,
-                         const struct pipe_resource *template,
-                         const uint64_t *modifiers, int count)
+                                        const struct pipe_resource *template,
+                                        const uint64_t *modifiers, int count)
 {
-        for (unsigned i = 0; i < PAN_MODIFIER_COUNT; ++i) {
-                if (drm_find_modifier(pan_best_modifiers[i], modifiers, count)) {
-                        return panfrost_resource_create_with_modifier(screen, template,
-                                        pan_best_modifiers[i]);
-                }
-        }
+   for (unsigned i = 0; i < PAN_MODIFIER_COUNT; ++i) {
+      if (drm_find_modifier(pan_best_modifiers[i], modifiers, count)) {
+         return panfrost_resource_create_with_modifier(screen, template,
+                                                       pan_best_modifiers[i]);
+      }
+   }
 
-        /* If we didn't find one, app specified invalid */
-        assert(count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID);
-        return panfrost_resource_create(screen, template);
+   /* If we didn't find one, app specified invalid */
+   assert(count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID);
+   return panfrost_resource_create(screen, template);
 }
 
 static void
-panfrost_resource_destroy(struct pipe_screen *screen,
-                          struct pipe_resource *pt)
+panfrost_resource_destroy(struct pipe_screen *screen, struct pipe_resource *pt)
 {
-        struct panfrost_device *dev = pan_device(screen);
-        struct panfrost_resource *rsrc = (struct panfrost_resource *) pt;
+   struct panfrost_device *dev = pan_device(screen);
+   struct panfrost_resource *rsrc = (struct panfrost_resource *)pt;
 
-        if (rsrc->scanout)
-                renderonly_scanout_destroy(rsrc->scanout, dev->ro);
+   if (rsrc->scanout)
+      renderonly_scanout_destroy(rsrc->scanout, dev->ro);
 
-        if (rsrc->image.data.bo)
-                panfrost_bo_unreference(rsrc->image.data.bo);
+   if (rsrc->image.data.bo)
+      panfrost_bo_unreference(rsrc->image.data.bo);
 
-        free(rsrc->index_cache);
-        free(rsrc->damage.tile_map.data);
+   free(rsrc->index_cache);
+   free(rsrc->damage.tile_map.data);
 
-        util_range_destroy(&rsrc->valid_buffer_range);
-        free(rsrc);
+   util_range_destroy(&rsrc->valid_buffer_range);
+   free(rsrc);
 }
 
 /* Most of the time we can do CPU-side transfers, but sometimes we need to use
@@ -820,417 +792,411 @@ panfrost_resource_destroy(struct pipe_screen *screen,
 
 static struct panfrost_resource *
 pan_alloc_staging(struct panfrost_context *ctx, struct panfrost_resource *rsc,
-		unsigned level, const struct pipe_box *box)
+                  unsigned level, const struct pipe_box *box)
 {
-        struct pipe_context *pctx = &ctx->base;
-        struct pipe_resource tmpl = rsc->base;
+   struct pipe_context *pctx = &ctx->base;
+   struct pipe_resource tmpl = rsc->base;
 
-        tmpl.width0  = box->width;
-        tmpl.height0 = box->height;
-        /* for array textures, box->depth is the array_size, otherwise
-         * for 3d textures, it is the depth:
-         */
-        if (tmpl.array_size > 1) {
-                if (tmpl.target == PIPE_TEXTURE_CUBE)
-                        tmpl.target = PIPE_TEXTURE_2D_ARRAY;
-                tmpl.array_size = box->depth;
-                tmpl.depth0 = 1;
-        } else {
-                tmpl.array_size = 1;
-                tmpl.depth0 = box->depth;
-        }
-        tmpl.last_level = 0;
-        tmpl.bind |= PIPE_BIND_LINEAR;
-        tmpl.bind &= ~PAN_BIND_SHARED_MASK;
+   tmpl.width0 = box->width;
+   tmpl.height0 = box->height;
+   /* for array textures, box->depth is the array_size, otherwise
+    * for 3d textures, it is the depth:
+    */
+   if (tmpl.array_size > 1) {
+      if (tmpl.target == PIPE_TEXTURE_CUBE)
+         tmpl.target = PIPE_TEXTURE_2D_ARRAY;
+      tmpl.array_size = box->depth;
+      tmpl.depth0 = 1;
+   } else {
+      tmpl.array_size = 1;
+      tmpl.depth0 = box->depth;
+   }
+   tmpl.last_level = 0;
+   tmpl.bind |= PIPE_BIND_LINEAR;
+   tmpl.bind &= ~PAN_BIND_SHARED_MASK;
 
-        struct pipe_resource *pstaging =
-                pctx->screen->resource_create(pctx->screen, &tmpl);
-        if (!pstaging)
-                return NULL;
+   struct pipe_resource *pstaging =
+      pctx->screen->resource_create(pctx->screen, &tmpl);
+   if (!pstaging)
+      return NULL;
 
-        return pan_resource(pstaging);
+   return pan_resource(pstaging);
 }
 
 static void
-pan_blit_from_staging(struct pipe_context *pctx, struct panfrost_transfer *trans)
+pan_blit_from_staging(struct pipe_context *pctx,
+                      struct panfrost_transfer *trans)
 {
-        struct pipe_resource *dst = trans->base.resource;
-        struct pipe_blit_info blit = {0};
+   struct pipe_resource *dst = trans->base.resource;
+   struct pipe_blit_info blit = {0};
 
-        blit.dst.resource = dst;
-        blit.dst.format   = dst->format;
-        blit.dst.level    = trans->base.level;
-        blit.dst.box      = trans->base.box;
-        blit.src.resource = trans->staging.rsrc;
-        blit.src.format   = trans->staging.rsrc->format;
-        blit.src.level    = 0;
-        blit.src.box      = trans->staging.box;
-        blit.mask = util_format_get_mask(blit.src.format);
-        blit.filter = PIPE_TEX_FILTER_NEAREST;
+   blit.dst.resource = dst;
+   blit.dst.format = dst->format;
+   blit.dst.level = trans->base.level;
+   blit.dst.box = trans->base.box;
+   blit.src.resource = trans->staging.rsrc;
+   blit.src.format = trans->staging.rsrc->format;
+   blit.src.level = 0;
+   blit.src.box = trans->staging.box;
+   blit.mask = util_format_get_mask(blit.src.format);
+   blit.filter = PIPE_TEX_FILTER_NEAREST;
 
-        panfrost_blit(pctx, &blit);
+   panfrost_blit(pctx, &blit);
 }
 
 static void
 pan_blit_to_staging(struct pipe_context *pctx, struct panfrost_transfer *trans)
 {
-        struct pipe_resource *src = trans->base.resource;
-        struct pipe_blit_info blit = {0};
+   struct pipe_resource *src = trans->base.resource;
+   struct pipe_blit_info blit = {0};
 
-        blit.src.resource = src;
-        blit.src.format   = src->format;
-        blit.src.level    = trans->base.level;
-        blit.src.box      = trans->base.box;
-        blit.dst.resource = trans->staging.rsrc;
-        blit.dst.format   = trans->staging.rsrc->format;
-        blit.dst.level    = 0;
-        blit.dst.box      = trans->staging.box;
-        blit.mask = util_format_get_mask(blit.dst.format);
-        blit.filter = PIPE_TEX_FILTER_NEAREST;
+   blit.src.resource = src;
+   blit.src.format = src->format;
+   blit.src.level = trans->base.level;
+   blit.src.box = trans->base.box;
+   blit.dst.resource = trans->staging.rsrc;
+   blit.dst.format = trans->staging.rsrc->format;
+   blit.dst.level = 0;
+   blit.dst.box = trans->staging.box;
+   blit.mask = util_format_get_mask(blit.dst.format);
+   blit.filter = PIPE_TEX_FILTER_NEAREST;
 
-        panfrost_blit(pctx, &blit);
+   panfrost_blit(pctx, &blit);
 }
 
 static void
 panfrost_load_tiled_images(struct panfrost_transfer *transfer,
                            struct panfrost_resource *rsrc)
 {
-        struct pipe_transfer *ptrans = &transfer->base;
-        unsigned level = ptrans->level;
+   struct pipe_transfer *ptrans = &transfer->base;
+   unsigned level = ptrans->level;
 
-        /* If the requested level of the image is uninitialized, it's not
-         * necessary to copy it. Leave the result unintiialized too.
-         */
-        if (!BITSET_TEST(rsrc->valid.data, level))
-                return;
+   /* If the requested level of the image is uninitialized, it's not
+    * necessary to copy it. Leave the result unintiialized too.
+    */
+   if (!BITSET_TEST(rsrc->valid.data, level))
+      return;
 
-        struct panfrost_bo *bo = rsrc->image.data.bo;
-        unsigned stride = panfrost_get_layer_stride(&rsrc->image.layout, level);
+   struct panfrost_bo *bo = rsrc->image.data.bo;
+   unsigned stride = panfrost_get_layer_stride(&rsrc->image.layout, level);
 
-        /* Otherwise, load each layer separately, required to load from 3D and
-         * array textures.
-         */
-        for (unsigned z = 0; z < ptrans->box.depth; ++z) {
-                void *dst = transfer->map + (ptrans->layer_stride * z);
-                uint8_t *map = bo->ptr.cpu +
-                               rsrc->image.layout.slices[level].offset +
-                               (z + ptrans->box.z) * stride;
+   /* Otherwise, load each layer separately, required to load from 3D and
+    * array textures.
+    */
+   for (unsigned z = 0; z < ptrans->box.depth; ++z) {
+      void *dst = transfer->map + (ptrans->layer_stride * z);
+      uint8_t *map = bo->ptr.cpu + rsrc->image.layout.slices[level].offset +
+                     (z + ptrans->box.z) * stride;
 
-                panfrost_load_tiled_image(dst, map, ptrans->box.x,
-                                          ptrans->box.y, ptrans->box.width,
-                                          ptrans->box.height, ptrans->stride,
-                                          rsrc->image.layout.slices[level].row_stride,
-                                          rsrc->image.layout.format);
-        }
+      panfrost_load_tiled_image(dst, map, ptrans->box.x, ptrans->box.y,
+                                ptrans->box.width, ptrans->box.height,
+                                ptrans->stride,
+                                rsrc->image.layout.slices[level].row_stride,
+                                rsrc->image.layout.format);
+   }
 }
 
 static void
 panfrost_store_tiled_images(struct panfrost_transfer *transfer,
                             struct panfrost_resource *rsrc)
 {
-        struct panfrost_bo *bo = rsrc->image.data.bo;
-        struct pipe_transfer *ptrans = &transfer->base;
-        unsigned level = ptrans->level;
-        unsigned stride = panfrost_get_layer_stride(&rsrc->image.layout, level);
+   struct panfrost_bo *bo = rsrc->image.data.bo;
+   struct pipe_transfer *ptrans = &transfer->base;
+   unsigned level = ptrans->level;
+   unsigned stride = panfrost_get_layer_stride(&rsrc->image.layout, level);
 
-        /* Otherwise, store each layer separately, required to store to 3D and
-         * array textures.
-         */
-        for (unsigned z = 0; z < ptrans->box.depth; ++z) {
-                void *src = transfer->map + (ptrans->layer_stride * z);
-                uint8_t *map = bo->ptr.cpu +
-                               rsrc->image.layout.slices[level].offset +
-                               (z + ptrans->box.z) * stride;
+   /* Otherwise, store each layer separately, required to store to 3D and
+    * array textures.
+    */
+   for (unsigned z = 0; z < ptrans->box.depth; ++z) {
+      void *src = transfer->map + (ptrans->layer_stride * z);
+      uint8_t *map = bo->ptr.cpu + rsrc->image.layout.slices[level].offset +
+                     (z + ptrans->box.z) * stride;
 
-                panfrost_store_tiled_image(map, src,
-                                ptrans->box.x, ptrans->box.y,
-                                ptrans->box.width, ptrans->box.height,
-                                rsrc->image.layout.slices[level].row_stride,
-                                ptrans->stride, rsrc->image.layout.format);
-        }
+      panfrost_store_tiled_image(map, src, ptrans->box.x, ptrans->box.y,
+                                 ptrans->box.width, ptrans->box.height,
+                                 rsrc->image.layout.slices[level].row_stride,
+                                 ptrans->stride, rsrc->image.layout.format);
+   }
 }
 
 static bool
 panfrost_box_covers_resource(const struct pipe_resource *resource,
                              const struct pipe_box *box)
 {
-        return resource->last_level == 0 &&
-               util_texrange_covers_whole_level(resource, 0, box->x, box->y,
-                                                box->z, box->width, box->height,
-                                                box->depth);
+   return resource->last_level == 0 &&
+          util_texrange_covers_whole_level(resource, 0, box->x, box->y, box->z,
+                                           box->width, box->height, box->depth);
 }
 
 static void *
-panfrost_ptr_map(struct pipe_context *pctx,
-                      struct pipe_resource *resource,
-                      unsigned level,
-                      unsigned usage,  /* a combination of PIPE_MAP_x */
-                      const struct pipe_box *box,
-                      struct pipe_transfer **out_transfer)
+panfrost_ptr_map(struct pipe_context *pctx, struct pipe_resource *resource,
+                 unsigned level,
+                 unsigned usage, /* a combination of PIPE_MAP_x */
+                 const struct pipe_box *box,
+                 struct pipe_transfer **out_transfer)
 {
-        struct panfrost_context *ctx = pan_context(pctx);
-        struct panfrost_device *dev = pan_device(pctx->screen);
-        struct panfrost_resource *rsrc = pan_resource(resource);
-        enum pipe_format format = rsrc->image.layout.format;
-        int bytes_per_block = util_format_get_blocksize(format);
-        struct panfrost_bo *bo = rsrc->image.data.bo;
+   struct panfrost_context *ctx = pan_context(pctx);
+   struct panfrost_device *dev = pan_device(pctx->screen);
+   struct panfrost_resource *rsrc = pan_resource(resource);
+   enum pipe_format format = rsrc->image.layout.format;
+   int bytes_per_block = util_format_get_blocksize(format);
+   struct panfrost_bo *bo = rsrc->image.data.bo;
 
-        /* Can't map tiled/compressed directly */
-        if ((usage & PIPE_MAP_DIRECTLY) && rsrc->image.layout.modifier != DRM_FORMAT_MOD_LINEAR)
-                return NULL;
+   /* Can't map tiled/compressed directly */
+   if ((usage & PIPE_MAP_DIRECTLY) &&
+       rsrc->image.layout.modifier != DRM_FORMAT_MOD_LINEAR)
+      return NULL;
 
-        struct panfrost_transfer *transfer = rzalloc(pctx, struct panfrost_transfer);
-        transfer->base.level = level;
-        transfer->base.usage = usage;
-        transfer->base.box = *box;
+   struct panfrost_transfer *transfer = rzalloc(pctx, struct panfrost_transfer);
+   transfer->base.level = level;
+   transfer->base.usage = usage;
+   transfer->base.box = *box;
 
-        pipe_resource_reference(&transfer->base.resource, resource);
-        *out_transfer = &transfer->base;
+   pipe_resource_reference(&transfer->base.resource, resource);
+   *out_transfer = &transfer->base;
 
-        if (usage & PIPE_MAP_WRITE)
-                rsrc->constant_stencil = false;
+   if (usage & PIPE_MAP_WRITE)
+      rsrc->constant_stencil = false;
 
-        /* We don't have s/w routines for AFBC, so use a staging texture */
-        if (drm_is_afbc(rsrc->image.layout.modifier)) {
-                struct panfrost_resource *staging = pan_alloc_staging(ctx, rsrc, level, box);
-                assert(staging);
+   /* We don't have s/w routines for AFBC, so use a staging texture */
+   if (drm_is_afbc(rsrc->image.layout.modifier)) {
+      struct panfrost_resource *staging =
+         pan_alloc_staging(ctx, rsrc, level, box);
+      assert(staging);
 
-                /* Staging resources have one LOD: level 0. Query the strides
-                 * on this LOD.
-                 */
-                transfer->base.stride = staging->image.layout.slices[0].row_stride;
-                transfer->base.layer_stride =
-                        panfrost_get_layer_stride(&staging->image.layout, 0);
+      /* Staging resources have one LOD: level 0. Query the strides
+       * on this LOD.
+       */
+      transfer->base.stride = staging->image.layout.slices[0].row_stride;
+      transfer->base.layer_stride =
+         panfrost_get_layer_stride(&staging->image.layout, 0);
 
-                transfer->staging.rsrc = &staging->base;
+      transfer->staging.rsrc = &staging->base;
 
-                transfer->staging.box = *box;
-                transfer->staging.box.x = 0;
-                transfer->staging.box.y = 0;
-                transfer->staging.box.z = 0;
+      transfer->staging.box = *box;
+      transfer->staging.box.x = 0;
+      transfer->staging.box.y = 0;
+      transfer->staging.box.z = 0;
 
-                assert(transfer->staging.rsrc != NULL);
+      assert(transfer->staging.rsrc != NULL);
 
-                bool valid = BITSET_TEST(rsrc->valid.data, level);
+      bool valid = BITSET_TEST(rsrc->valid.data, level);
 
-                if ((usage & PIPE_MAP_READ) &&
-                    (valid || panfrost_any_batch_writes_rsrc(ctx, rsrc))) {
-                        pan_blit_to_staging(pctx, transfer);
-                        panfrost_flush_writer(ctx, staging, "AFBC read staging blit");
-                        panfrost_bo_wait(staging->image.data.bo, INT64_MAX, false);
-                }
+      if ((usage & PIPE_MAP_READ) &&
+          (valid || panfrost_any_batch_writes_rsrc(ctx, rsrc))) {
+         pan_blit_to_staging(pctx, transfer);
+         panfrost_flush_writer(ctx, staging, "AFBC read staging blit");
+         panfrost_bo_wait(staging->image.data.bo, INT64_MAX, false);
+      }
 
-                panfrost_bo_mmap(staging->image.data.bo);
-                return staging->image.data.bo->ptr.cpu;
-        }
+      panfrost_bo_mmap(staging->image.data.bo);
+      return staging->image.data.bo->ptr.cpu;
+   }
 
-        /* If we haven't already mmaped, now's the time */
-        panfrost_bo_mmap(bo);
+   /* If we haven't already mmaped, now's the time */
+   panfrost_bo_mmap(bo);
 
-        if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
-                pandecode_inject_mmap(bo->ptr.gpu, bo->ptr.cpu, bo->size, NULL);
+   if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
+      pandecode_inject_mmap(bo->ptr.gpu, bo->ptr.cpu, bo->size, NULL);
 
-        /* Upgrade writes to uninitialized ranges to UNSYNCHRONIZED */
-        if ((usage & PIPE_MAP_WRITE) &&
-            resource->target == PIPE_BUFFER &&
-            !util_ranges_intersect(&rsrc->valid_buffer_range, box->x, box->x + box->width)) {
+   /* Upgrade writes to uninitialized ranges to UNSYNCHRONIZED */
+   if ((usage & PIPE_MAP_WRITE) && resource->target == PIPE_BUFFER &&
+       !util_ranges_intersect(&rsrc->valid_buffer_range, box->x,
+                              box->x + box->width)) {
 
-                usage |= PIPE_MAP_UNSYNCHRONIZED;
-        }
+      usage |= PIPE_MAP_UNSYNCHRONIZED;
+   }
 
-        /* Upgrade DISCARD_RANGE to WHOLE_RESOURCE if the whole resource is
-         * being mapped.
-         */
-        if ((usage & PIPE_MAP_DISCARD_RANGE) &&
-            !(usage & PIPE_MAP_UNSYNCHRONIZED) &&
-            !(resource->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) &&
-            panfrost_box_covers_resource(resource, box) &&
-            !(rsrc->image.data.bo->flags & PAN_BO_SHARED)) {
+   /* Upgrade DISCARD_RANGE to WHOLE_RESOURCE if the whole resource is
+    * being mapped.
+    */
+   if ((usage & PIPE_MAP_DISCARD_RANGE) && !(usage & PIPE_MAP_UNSYNCHRONIZED) &&
+       !(resource->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) &&
+       panfrost_box_covers_resource(resource, box) &&
+       !(rsrc->image.data.bo->flags & PAN_BO_SHARED)) {
 
-                usage |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
-        }
+      usage |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
+   }
 
-        bool create_new_bo = usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE;
-        bool copy_resource = false;
+   bool create_new_bo = usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE;
+   bool copy_resource = false;
 
-        if (!create_new_bo &&
-            !(usage & PIPE_MAP_UNSYNCHRONIZED) &&
-            !(resource->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) &&
-            (usage & PIPE_MAP_WRITE) &&
-            panfrost_any_batch_reads_rsrc(ctx, rsrc)) {
-                /* When a resource to be modified is already being used by a
-                 * pending batch, it is often faster to copy the whole BO than
-                 * to flush and split the frame in two.
-                 */
+   if (!create_new_bo && !(usage & PIPE_MAP_UNSYNCHRONIZED) &&
+       !(resource->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) &&
+       (usage & PIPE_MAP_WRITE) && panfrost_any_batch_reads_rsrc(ctx, rsrc)) {
+      /* When a resource to be modified is already being used by a
+       * pending batch, it is often faster to copy the whole BO than
+       * to flush and split the frame in two.
+       */
 
-                panfrost_flush_writer(ctx, rsrc, "Shadow resource creation");
-                panfrost_bo_wait(bo, INT64_MAX, false);
+      panfrost_flush_writer(ctx, rsrc, "Shadow resource creation");
+      panfrost_bo_wait(bo, INT64_MAX, false);
 
-                create_new_bo = true;
-                copy_resource = !(usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE);
-        }
+      create_new_bo = true;
+      copy_resource = !(usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE);
+   }
 
-        /* Shadowing with separate stencil may require additional accounting.
-         * Bail in these exotic cases.
-         */
-        if (rsrc->separate_stencil) {
-                create_new_bo = false;
-                copy_resource = false;
-        }
+   /* Shadowing with separate stencil may require additional accounting.
+    * Bail in these exotic cases.
+    */
+   if (rsrc->separate_stencil) {
+      create_new_bo = false;
+      copy_resource = false;
+   }
 
-        if (create_new_bo) {
-                /* Make sure we re-emit any descriptors using this resource */
-                panfrost_dirty_state_all(ctx);
+   if (create_new_bo) {
+      /* Make sure we re-emit any descriptors using this resource */
+      panfrost_dirty_state_all(ctx);
 
-                /* If the BO is used by one of the pending batches or if it's
-                 * not ready yet (still accessed by one of the already flushed
-                 * batches), we try to allocate a new one to avoid waiting.
-                 */
-                if (panfrost_any_batch_reads_rsrc(ctx, rsrc) ||
-                    !panfrost_bo_wait(bo, 0, true)) {
-                        /* We want the BO to be MMAPed. */
-                        uint32_t flags = bo->flags & ~PAN_BO_DELAY_MMAP;
-                        struct panfrost_bo *newbo = NULL;
+      /* If the BO is used by one of the pending batches or if it's
+       * not ready yet (still accessed by one of the already flushed
+       * batches), we try to allocate a new one to avoid waiting.
+       */
+      if (panfrost_any_batch_reads_rsrc(ctx, rsrc) ||
+          !panfrost_bo_wait(bo, 0, true)) {
+         /* We want the BO to be MMAPed. */
+         uint32_t flags = bo->flags & ~PAN_BO_DELAY_MMAP;
+         struct panfrost_bo *newbo = NULL;
 
-                        /* When the BO has been imported/exported, we can't
-                         * replace it by another one, otherwise the
-                         * importer/exporter wouldn't see the change we're
-                         * doing to it.
-                         */
-                        if (!(bo->flags & PAN_BO_SHARED))
-                                newbo = panfrost_bo_create(dev, bo->size,
-                                                           flags, bo->label);
+         /* When the BO has been imported/exported, we can't
+          * replace it by another one, otherwise the
+          * importer/exporter wouldn't see the change we're
+          * doing to it.
+          */
+         if (!(bo->flags & PAN_BO_SHARED))
+            newbo = panfrost_bo_create(dev, bo->size, flags, bo->label);
 
-                        if (newbo) {
-                                if (copy_resource)
-                                        memcpy(newbo->ptr.cpu, rsrc->image.data.bo->ptr.cpu, bo->size);
+         if (newbo) {
+            if (copy_resource)
+               memcpy(newbo->ptr.cpu, rsrc->image.data.bo->ptr.cpu, bo->size);
 
-                                /* Swap the pointers, dropping a reference to
-                                 * the old BO which is no long referenced from
-                                 * the resource.
-                                 */
-                                panfrost_bo_unreference(rsrc->image.data.bo);
-                                rsrc->image.data.bo = newbo;
+            /* Swap the pointers, dropping a reference to
+             * the old BO which is no long referenced from
+             * the resource.
+             */
+            panfrost_bo_unreference(rsrc->image.data.bo);
+            rsrc->image.data.bo = newbo;
 
-	                        if (!copy_resource &&
-                                    drm_is_afbc(rsrc->image.layout.modifier))
-                                        panfrost_resource_init_afbc_headers(rsrc);
+            if (!copy_resource && drm_is_afbc(rsrc->image.layout.modifier))
+               panfrost_resource_init_afbc_headers(rsrc);
 
-                                bo = newbo;
-                        } else {
-                                /* Allocation failed or was impossible, let's
-                                 * fall back on a flush+wait.
-                                 */
-                                panfrost_flush_batches_accessing_rsrc(ctx, rsrc,
-                                                "Resource access with high memory pressure");
-                                panfrost_bo_wait(bo, INT64_MAX, true);
-                        }
-                }
-        } else if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
-                if (usage & PIPE_MAP_WRITE) {
-                        panfrost_flush_batches_accessing_rsrc(ctx, rsrc, "Synchronized write");
-                        panfrost_bo_wait(bo, INT64_MAX, true);
-                } else if (usage & PIPE_MAP_READ) {
-                        panfrost_flush_writer(ctx, rsrc, "Synchronized read");
-                        panfrost_bo_wait(bo, INT64_MAX, false);
-                }
-        }
+            bo = newbo;
+         } else {
+            /* Allocation failed or was impossible, let's
+             * fall back on a flush+wait.
+             */
+            panfrost_flush_batches_accessing_rsrc(
+               ctx, rsrc, "Resource access with high memory pressure");
+            panfrost_bo_wait(bo, INT64_MAX, true);
+         }
+      }
+   } else if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
+      if (usage & PIPE_MAP_WRITE) {
+         panfrost_flush_batches_accessing_rsrc(ctx, rsrc, "Synchronized write");
+         panfrost_bo_wait(bo, INT64_MAX, true);
+      } else if (usage & PIPE_MAP_READ) {
+         panfrost_flush_writer(ctx, rsrc, "Synchronized read");
+         panfrost_bo_wait(bo, INT64_MAX, false);
+      }
+   }
 
-        /* For access to compressed textures, we want the (x, y, w, h)
-         * region-of-interest in blocks, not pixels. Then we compute the stride
-         * between rows of blocks as the width in blocks times the width per
-         * block, etc.
-         */
-        struct pipe_box box_blocks;
-        u_box_pixels_to_blocks(&box_blocks, box, format);
+   /* For access to compressed textures, we want the (x, y, w, h)
+    * region-of-interest in blocks, not pixels. Then we compute the stride
+    * between rows of blocks as the width in blocks times the width per
+    * block, etc.
+    */
+   struct pipe_box box_blocks;
+   u_box_pixels_to_blocks(&box_blocks, box, format);
 
-        if (rsrc->image.layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED) {
-                transfer->base.stride = box_blocks.width * bytes_per_block;
-                transfer->base.layer_stride = transfer->base.stride * box_blocks.height;
-                transfer->map = ralloc_size(transfer, transfer->base.layer_stride * box->depth);
+   if (rsrc->image.layout.modifier ==
+       DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED) {
+      transfer->base.stride = box_blocks.width * bytes_per_block;
+      transfer->base.layer_stride = transfer->base.stride * box_blocks.height;
+      transfer->map =
+         ralloc_size(transfer, transfer->base.layer_stride * box->depth);
 
-                if (usage & PIPE_MAP_READ)
-                        panfrost_load_tiled_images(transfer, rsrc);
+      if (usage & PIPE_MAP_READ)
+         panfrost_load_tiled_images(transfer, rsrc);
 
-                return transfer->map;
-        } else {
-                assert (rsrc->image.layout.modifier == DRM_FORMAT_MOD_LINEAR);
+      return transfer->map;
+   } else {
+      assert(rsrc->image.layout.modifier == DRM_FORMAT_MOD_LINEAR);
 
-                /* Direct, persistent writes create holes in time for
-                 * caching... I don't know if this is actually possible but we
-                 * should still get it right */
+      /* Direct, persistent writes create holes in time for
+       * caching... I don't know if this is actually possible but we
+       * should still get it right */
 
-                unsigned dpw = PIPE_MAP_DIRECTLY | PIPE_MAP_WRITE | PIPE_MAP_PERSISTENT;
+      unsigned dpw = PIPE_MAP_DIRECTLY | PIPE_MAP_WRITE | PIPE_MAP_PERSISTENT;
 
-                if ((usage & dpw) == dpw && rsrc->index_cache)
-                        return NULL;
+      if ((usage & dpw) == dpw && rsrc->index_cache)
+         return NULL;
 
-                transfer->base.stride = rsrc->image.layout.slices[level].row_stride;
-                transfer->base.layer_stride =
-                        panfrost_get_layer_stride(&rsrc->image.layout, level);
+      transfer->base.stride = rsrc->image.layout.slices[level].row_stride;
+      transfer->base.layer_stride =
+         panfrost_get_layer_stride(&rsrc->image.layout, level);
 
-                /* By mapping direct-write, we're implicitly already
-                 * initialized (maybe), so be conservative */
+      /* By mapping direct-write, we're implicitly already
+       * initialized (maybe), so be conservative */
 
-                if (usage & PIPE_MAP_WRITE) {
-                        BITSET_SET(rsrc->valid.data, level);
-                        panfrost_minmax_cache_invalidate(rsrc->index_cache, &transfer->base);
-                }
+      if (usage & PIPE_MAP_WRITE) {
+         BITSET_SET(rsrc->valid.data, level);
+         panfrost_minmax_cache_invalidate(rsrc->index_cache, &transfer->base);
+      }
 
-                return bo->ptr.cpu
-                       + rsrc->image.layout.slices[level].offset
-                       + box->z * transfer->base.layer_stride
-                       + box_blocks.y * rsrc->image.layout.slices[level].row_stride
-                       + box_blocks.x * bytes_per_block;
-        }
+      return bo->ptr.cpu + rsrc->image.layout.slices[level].offset +
+             box->z * transfer->base.layer_stride +
+             box_blocks.y * rsrc->image.layout.slices[level].row_stride +
+             box_blocks.x * bytes_per_block;
+   }
 }
 
 void
 pan_resource_modifier_convert(struct panfrost_context *ctx,
-                              struct panfrost_resource *rsrc,
-                              uint64_t modifier, const char *reason)
+                              struct panfrost_resource *rsrc, uint64_t modifier,
+                              const char *reason)
 {
-        assert(!rsrc->modifier_constant);
+   assert(!rsrc->modifier_constant);
 
-        perf_debug_ctx(ctx, "Disabling AFBC with a blit. Reason: %s", reason);
+   perf_debug_ctx(ctx, "Disabling AFBC with a blit. Reason: %s", reason);
 
-        struct pipe_resource *tmp_prsrc =
-                panfrost_resource_create_with_modifier(
-                        ctx->base.screen, &rsrc->base, modifier);
-        struct panfrost_resource *tmp_rsrc = pan_resource(tmp_prsrc);
+   struct pipe_resource *tmp_prsrc = panfrost_resource_create_with_modifier(
+      ctx->base.screen, &rsrc->base, modifier);
+   struct panfrost_resource *tmp_rsrc = pan_resource(tmp_prsrc);
 
-        unsigned depth = rsrc->base.target == PIPE_TEXTURE_3D ?
-                rsrc->base.depth0 : rsrc->base.array_size;
+   unsigned depth = rsrc->base.target == PIPE_TEXTURE_3D
+                       ? rsrc->base.depth0
+                       : rsrc->base.array_size;
 
-        struct pipe_box box =
-                { 0, 0, 0, rsrc->base.width0, rsrc->base.height0, depth };
+   struct pipe_box box = {0,    0, 0, rsrc->base.width0, rsrc->base.height0,
+                          depth};
 
-        struct pipe_blit_info blit = {
-                .dst.resource = &tmp_rsrc->base,
-                .dst.format   = tmp_rsrc->base.format,
-                .dst.box      = box,
-                .src.resource = &rsrc->base,
-                .src.format   = rsrc->base.format,
-                .src.box      = box,
-                .mask         = util_format_get_mask(tmp_rsrc->base.format),
-                .filter       = PIPE_TEX_FILTER_NEAREST,
-        };
+   struct pipe_blit_info blit = {
+      .dst.resource = &tmp_rsrc->base,
+      .dst.format = tmp_rsrc->base.format,
+      .dst.box = box,
+      .src.resource = &rsrc->base,
+      .src.format = rsrc->base.format,
+      .src.box = box,
+      .mask = util_format_get_mask(tmp_rsrc->base.format),
+      .filter = PIPE_TEX_FILTER_NEAREST,
+   };
 
-        for (int i = 0; i <= rsrc->base.last_level; i++) {
-                if (BITSET_TEST(rsrc->valid.data, i)) {
-                        blit.dst.level = blit.src.level  = i;
-                        panfrost_blit(&ctx->base, &blit);
-                }
-        }
+   for (int i = 0; i <= rsrc->base.last_level; i++) {
+      if (BITSET_TEST(rsrc->valid.data, i)) {
+         blit.dst.level = blit.src.level = i;
+         panfrost_blit(&ctx->base, &blit);
+      }
+   }
 
-        panfrost_bo_unreference(rsrc->image.data.bo);
+   panfrost_bo_unreference(rsrc->image.data.bo);
 
-        rsrc->image.data.bo = tmp_rsrc->image.data.bo;
-        panfrost_bo_reference(rsrc->image.data.bo);
+   rsrc->image.data.bo = tmp_rsrc->image.data.bo;
+   panfrost_bo_reference(rsrc->image.data.bo);
 
-        panfrost_resource_setup(pan_device(ctx->base.screen), rsrc, modifier,
-                                blit.dst.format);
-        pipe_resource_reference(&tmp_prsrc, NULL);
+   panfrost_resource_setup(pan_device(ctx->base.screen), rsrc, modifier,
+                           blit.dst.format);
+   pipe_resource_reference(&tmp_prsrc, NULL);
 }
 
 /* Validate that an AFBC resource may be used as a particular format. If it may
@@ -1242,18 +1208,18 @@ pan_legalize_afbc_format(struct panfrost_context *ctx,
                          struct panfrost_resource *rsrc,
                          enum pipe_format format)
 {
-        struct panfrost_device *dev = pan_device(ctx->base.screen);
+   struct panfrost_device *dev = pan_device(ctx->base.screen);
 
-        if (!drm_is_afbc(rsrc->image.layout.modifier))
-                return;
+   if (!drm_is_afbc(rsrc->image.layout.modifier))
+      return;
 
-        if (panfrost_afbc_format(dev->arch, rsrc->base.format) ==
-            panfrost_afbc_format(dev->arch, format))
-                return;
+   if (panfrost_afbc_format(dev->arch, rsrc->base.format) ==
+       panfrost_afbc_format(dev->arch, format))
+      return;
 
-        pan_resource_modifier_convert(ctx, rsrc,
-                        DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED,
-                        "Reinterpreting AFBC surface as incompatible format");
+   pan_resource_modifier_convert(
+      ctx, rsrc, DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED,
+      "Reinterpreting AFBC surface as incompatible format");
 }
 
 static bool
@@ -1261,271 +1227,259 @@ panfrost_should_linear_convert(struct panfrost_device *dev,
                                struct panfrost_resource *prsrc,
                                struct pipe_transfer *transfer)
 {
-        if (prsrc->modifier_constant)
-                return false;
+   if (prsrc->modifier_constant)
+      return false;
 
-        /* Overwriting the entire resource indicates streaming, for which
-         * linear layout is most efficient due to the lack of expensive
-         * conversion.
-         *
-         * For now we just switch to linear after a number of complete
-         * overwrites to keep things simple, but we could do better.
-         *
-         * This mechanism is only implemented for 2D resources. This suffices
-         * for video players, its intended use case.
-         */
+   /* Overwriting the entire resource indicates streaming, for which
+    * linear layout is most efficient due to the lack of expensive
+    * conversion.
+    *
+    * For now we just switch to linear after a number of complete
+    * overwrites to keep things simple, but we could do better.
+    *
+    * This mechanism is only implemented for 2D resources. This suffices
+    * for video players, its intended use case.
+    */
 
-        bool entire_overwrite =
-                panfrost_is_2d(prsrc) &&
-                prsrc->base.last_level == 0 &&
-                transfer->box.width == prsrc->base.width0 &&
-                transfer->box.height == prsrc->base.height0 &&
-                transfer->box.x == 0 &&
-                transfer->box.y == 0;
+   bool entire_overwrite = panfrost_is_2d(prsrc) &&
+                           prsrc->base.last_level == 0 &&
+                           transfer->box.width == prsrc->base.width0 &&
+                           transfer->box.height == prsrc->base.height0 &&
+                           transfer->box.x == 0 && transfer->box.y == 0;
 
-        if (entire_overwrite)
-                ++prsrc->modifier_updates;
+   if (entire_overwrite)
+      ++prsrc->modifier_updates;
 
-        if (prsrc->modifier_updates >= LAYOUT_CONVERT_THRESHOLD) {
-                perf_debug(dev, "Transitioning to linear due to streaming usage");
-                return true;
-        } else {
-                return false;
-        }
+   if (prsrc->modifier_updates >= LAYOUT_CONVERT_THRESHOLD) {
+      perf_debug(dev, "Transitioning to linear due to streaming usage");
+      return true;
+   } else {
+      return false;
+   }
 }
 
 static void
-panfrost_ptr_unmap(struct pipe_context *pctx,
-                        struct pipe_transfer *transfer)
+panfrost_ptr_unmap(struct pipe_context *pctx, struct pipe_transfer *transfer)
 {
-        /* Gallium expects writeback here, so we tile */
+   /* Gallium expects writeback here, so we tile */
 
-        struct panfrost_transfer *trans = pan_transfer(transfer);
-        struct panfrost_resource *prsrc = (struct panfrost_resource *) transfer->resource;
-        struct panfrost_device *dev = pan_device(pctx->screen);
+   struct panfrost_transfer *trans = pan_transfer(transfer);
+   struct panfrost_resource *prsrc =
+      (struct panfrost_resource *)transfer->resource;
+   struct panfrost_device *dev = pan_device(pctx->screen);
 
-        if (transfer->usage & PIPE_MAP_WRITE)
-                prsrc->valid.crc = false;
+   if (transfer->usage & PIPE_MAP_WRITE)
+      prsrc->valid.crc = false;
 
-        /* AFBC will use a staging resource. `initialized` will be set when the
-         * fragment job is created; this is deferred to prevent useless surface
-         * reloads that can cascade into DATA_INVALID_FAULTs due to reading
-         * malformed AFBC data if uninitialized */
+   /* AFBC will use a staging resource. `initialized` will be set when the
+    * fragment job is created; this is deferred to prevent useless surface
+    * reloads that can cascade into DATA_INVALID_FAULTs due to reading
+    * malformed AFBC data if uninitialized */
 
-        if (trans->staging.rsrc) {
-                if (transfer->usage & PIPE_MAP_WRITE) {
-                        if (panfrost_should_linear_convert(dev, prsrc, transfer)) {
+   if (trans->staging.rsrc) {
+      if (transfer->usage & PIPE_MAP_WRITE) {
+         if (panfrost_should_linear_convert(dev, prsrc, transfer)) {
 
-                                panfrost_bo_unreference(prsrc->image.data.bo);
+            panfrost_bo_unreference(prsrc->image.data.bo);
 
-                                panfrost_resource_setup(dev, prsrc, DRM_FORMAT_MOD_LINEAR,
-                                                        prsrc->image.layout.format);
+            panfrost_resource_setup(dev, prsrc, DRM_FORMAT_MOD_LINEAR,
+                                    prsrc->image.layout.format);
 
-                                prsrc->image.data.bo = pan_resource(trans->staging.rsrc)->image.data.bo;
-                                panfrost_bo_reference(prsrc->image.data.bo);
-                        } else {
-                                pan_blit_from_staging(pctx, trans);
-                                panfrost_flush_batches_accessing_rsrc(pan_context(pctx),
-                                                pan_resource(trans->staging.rsrc),
-                                                "AFBC write staging blit");
-                        }
-                }
+            prsrc->image.data.bo =
+               pan_resource(trans->staging.rsrc)->image.data.bo;
+            panfrost_bo_reference(prsrc->image.data.bo);
+         } else {
+            pan_blit_from_staging(pctx, trans);
+            panfrost_flush_batches_accessing_rsrc(
+               pan_context(pctx), pan_resource(trans->staging.rsrc),
+               "AFBC write staging blit");
+         }
+      }
 
-                pipe_resource_reference(&trans->staging.rsrc, NULL);
-        }
+      pipe_resource_reference(&trans->staging.rsrc, NULL);
+   }
 
-        /* Tiling will occur in software from a staging cpu buffer */
-        if (trans->map) {
-                struct panfrost_bo *bo = prsrc->image.data.bo;
+   /* Tiling will occur in software from a staging cpu buffer */
+   if (trans->map) {
+      struct panfrost_bo *bo = prsrc->image.data.bo;
 
-                if (transfer->usage & PIPE_MAP_WRITE) {
-                        BITSET_SET(prsrc->valid.data, transfer->level);
+      if (transfer->usage & PIPE_MAP_WRITE) {
+         BITSET_SET(prsrc->valid.data, transfer->level);
 
-                        if (prsrc->image.layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED) {
-                                if (panfrost_should_linear_convert(dev, prsrc, transfer)) {
-                                        panfrost_resource_setup(dev, prsrc, DRM_FORMAT_MOD_LINEAR,
-                                                                prsrc->image.layout.format);
-                                        if (prsrc->image.layout.data_size > bo->size) {
-                                                const char *label = bo->label;
-                                                panfrost_bo_unreference(bo);
-                                                bo = prsrc->image.data.bo =
-                                                        panfrost_bo_create(dev, prsrc->image.layout.data_size, 0, label);
-                                                assert(bo);
-                                        }
+         if (prsrc->image.layout.modifier ==
+             DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED) {
+            if (panfrost_should_linear_convert(dev, prsrc, transfer)) {
+               panfrost_resource_setup(dev, prsrc, DRM_FORMAT_MOD_LINEAR,
+                                       prsrc->image.layout.format);
+               if (prsrc->image.layout.data_size > bo->size) {
+                  const char *label = bo->label;
+                  panfrost_bo_unreference(bo);
+                  bo = prsrc->image.data.bo = panfrost_bo_create(
+                     dev, prsrc->image.layout.data_size, 0, label);
+                  assert(bo);
+               }
 
-                                        util_copy_rect(
-                                                bo->ptr.cpu + prsrc->image.layout.slices[0].offset,
-                                                prsrc->base.format,
-                                                prsrc->image.layout.slices[0].row_stride,
-                                                0, 0,
-                                                transfer->box.width,
-                                                transfer->box.height,
-                                                trans->map,
-                                                transfer->stride,
-                                                0, 0);
-                                } else {
-                                        panfrost_store_tiled_images(trans, prsrc);
-                                }
-                        }
-                }
-        }
+               util_copy_rect(
+                  bo->ptr.cpu + prsrc->image.layout.slices[0].offset,
+                  prsrc->base.format, prsrc->image.layout.slices[0].row_stride,
+                  0, 0, transfer->box.width, transfer->box.height, trans->map,
+                  transfer->stride, 0, 0);
+            } else {
+               panfrost_store_tiled_images(trans, prsrc);
+            }
+         }
+      }
+   }
 
+   util_range_add(&prsrc->base, &prsrc->valid_buffer_range, transfer->box.x,
+                  transfer->box.x + transfer->box.width);
 
-        util_range_add(&prsrc->base, &prsrc->valid_buffer_range,
-                       transfer->box.x,
-                       transfer->box.x + transfer->box.width);
+   panfrost_minmax_cache_invalidate(prsrc->index_cache, transfer);
 
-        panfrost_minmax_cache_invalidate(prsrc->index_cache, transfer);
+   /* Derefence the resource */
+   pipe_resource_reference(&transfer->resource, NULL);
 
-        /* Derefence the resource */
-        pipe_resource_reference(&transfer->resource, NULL);
-
-        /* Transfer itself is RALLOCed at the moment */
-        ralloc_free(transfer);
+   /* Transfer itself is RALLOCed at the moment */
+   ralloc_free(transfer);
 }
 
 static void
 panfrost_ptr_flush_region(struct pipe_context *pctx,
-                               struct pipe_transfer *transfer,
-                               const struct pipe_box *box)
+                          struct pipe_transfer *transfer,
+                          const struct pipe_box *box)
 {
-        struct panfrost_resource *rsc = pan_resource(transfer->resource);
+   struct panfrost_resource *rsc = pan_resource(transfer->resource);
 
-        if (transfer->resource->target == PIPE_BUFFER) {
-                util_range_add(&rsc->base, &rsc->valid_buffer_range,
-                               transfer->box.x + box->x,
-                               transfer->box.x + box->x + box->width);
-        } else {
-                BITSET_SET(rsc->valid.data, transfer->level);
-        }
+   if (transfer->resource->target == PIPE_BUFFER) {
+      util_range_add(&rsc->base, &rsc->valid_buffer_range,
+                     transfer->box.x + box->x,
+                     transfer->box.x + box->x + box->width);
+   } else {
+      BITSET_SET(rsc->valid.data, transfer->level);
+   }
 }
 
 static void
-panfrost_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsrc)
+panfrost_invalidate_resource(struct pipe_context *pctx,
+                             struct pipe_resource *prsrc)
 {
-        struct panfrost_context *ctx = pan_context(pctx);
-        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
-        struct panfrost_resource *rsrc = pan_resource(prsrc);
+   struct panfrost_context *ctx = pan_context(pctx);
+   struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
+   struct panfrost_resource *rsrc = pan_resource(prsrc);
 
-        rsrc->constant_stencil = true;
+   rsrc->constant_stencil = true;
 
-        /* Handle the glInvalidateFramebuffer case */
-        if (batch->key.zsbuf && batch->key.zsbuf->texture == prsrc)
-                batch->resolve &= ~PIPE_CLEAR_DEPTHSTENCIL;
+   /* Handle the glInvalidateFramebuffer case */
+   if (batch->key.zsbuf && batch->key.zsbuf->texture == prsrc)
+      batch->resolve &= ~PIPE_CLEAR_DEPTHSTENCIL;
 
-        for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
-                struct pipe_surface *surf = batch->key.cbufs[i];
+   for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
+      struct pipe_surface *surf = batch->key.cbufs[i];
 
-                if (surf && surf->texture == prsrc)
-                        batch->resolve &= ~(PIPE_CLEAR_COLOR0 << i);
-        }
+      if (surf && surf->texture == prsrc)
+         batch->resolve &= ~(PIPE_CLEAR_COLOR0 << i);
+   }
 }
 
 static enum pipe_format
 panfrost_resource_get_internal_format(struct pipe_resource *rsrc)
 {
-        struct panfrost_resource *prsrc = (struct panfrost_resource *) rsrc;
-        return prsrc->image.layout.format;
+   struct panfrost_resource *prsrc = (struct panfrost_resource *)rsrc;
+   return prsrc->image.layout.format;
 }
 
 static bool
-panfrost_generate_mipmap(
-        struct pipe_context *pctx,
-        struct pipe_resource *prsrc,
-        enum pipe_format format,
-        unsigned base_level,
-        unsigned last_level,
-        unsigned first_layer,
-        unsigned last_layer)
+panfrost_generate_mipmap(struct pipe_context *pctx, struct pipe_resource *prsrc,
+                         enum pipe_format format, unsigned base_level,
+                         unsigned last_level, unsigned first_layer,
+                         unsigned last_layer)
 {
-        struct panfrost_resource *rsrc = pan_resource(prsrc);
+   struct panfrost_resource *rsrc = pan_resource(prsrc);
 
-        perf_debug_ctx(pan_context(pctx), "Unoptimized mipmap generation");
+   perf_debug_ctx(pan_context(pctx), "Unoptimized mipmap generation");
 
-        /* Generating a mipmap invalidates the written levels, so make that
-         * explicit so we don't try to wallpaper them back and end up with
-         * u_blitter recursion */
+   /* Generating a mipmap invalidates the written levels, so make that
+    * explicit so we don't try to wallpaper them back and end up with
+    * u_blitter recursion */
 
-        assert(rsrc->image.data.bo);
-        for (unsigned l = base_level + 1; l <= last_level; ++l)
-                BITSET_CLEAR(rsrc->valid.data, l);
+   assert(rsrc->image.data.bo);
+   for (unsigned l = base_level + 1; l <= last_level; ++l)
+      BITSET_CLEAR(rsrc->valid.data, l);
 
-        /* Beyond that, we just delegate the hard stuff. */
+   /* Beyond that, we just delegate the hard stuff. */
 
-        bool blit_res = util_gen_mipmap(
-                                pctx, prsrc, format,
-                                base_level, last_level,
-                                first_layer, last_layer,
-                                PIPE_TEX_FILTER_LINEAR);
+   bool blit_res =
+      util_gen_mipmap(pctx, prsrc, format, base_level, last_level, first_layer,
+                      last_layer, PIPE_TEX_FILTER_LINEAR);
 
-        return blit_res;
+   return blit_res;
 }
 
 static void
 panfrost_resource_set_stencil(struct pipe_resource *prsrc,
                               struct pipe_resource *stencil)
 {
-        pan_resource(prsrc)->separate_stencil = pan_resource(stencil);
+   pan_resource(prsrc)->separate_stencil = pan_resource(stencil);
 }
 
 static struct pipe_resource *
 panfrost_resource_get_stencil(struct pipe_resource *prsrc)
 {
-        if (!pan_resource(prsrc)->separate_stencil)
-                return NULL;
+   if (!pan_resource(prsrc)->separate_stencil)
+      return NULL;
 
-        return &pan_resource(prsrc)->separate_stencil->base;
+   return &pan_resource(prsrc)->separate_stencil->base;
 }
 
 static const struct u_transfer_vtbl transfer_vtbl = {
-        .resource_create          = panfrost_resource_create,
-        .resource_destroy         = panfrost_resource_destroy,
-        .transfer_map             = panfrost_ptr_map,
-        .transfer_unmap           = panfrost_ptr_unmap,
-        .transfer_flush_region    = panfrost_ptr_flush_region,
-        .get_internal_format      = panfrost_resource_get_internal_format,
-        .set_stencil              = panfrost_resource_set_stencil,
-        .get_stencil              = panfrost_resource_get_stencil,
+   .resource_create = panfrost_resource_create,
+   .resource_destroy = panfrost_resource_destroy,
+   .transfer_map = panfrost_ptr_map,
+   .transfer_unmap = panfrost_ptr_unmap,
+   .transfer_flush_region = panfrost_ptr_flush_region,
+   .get_internal_format = panfrost_resource_get_internal_format,
+   .set_stencil = panfrost_resource_set_stencil,
+   .get_stencil = panfrost_resource_get_stencil,
 };
 
 void
 panfrost_resource_screen_init(struct pipe_screen *pscreen)
 {
-        pscreen->resource_create_with_modifiers =
-                panfrost_resource_create_with_modifiers;
-        pscreen->resource_create = u_transfer_helper_resource_create;
-        pscreen->resource_destroy = u_transfer_helper_resource_destroy;
-        pscreen->resource_from_handle = panfrost_resource_from_handle;
-        pscreen->resource_get_handle = panfrost_resource_get_handle;
-        pscreen->resource_get_param = panfrost_resource_get_param;
-        pscreen->transfer_helper = u_transfer_helper_create(&transfer_vtbl,
-                                        U_TRANSFER_HELPER_SEPARATE_Z32S8 |
-                                        U_TRANSFER_HELPER_MSAA_MAP);
+   pscreen->resource_create_with_modifiers =
+      panfrost_resource_create_with_modifiers;
+   pscreen->resource_create = u_transfer_helper_resource_create;
+   pscreen->resource_destroy = u_transfer_helper_resource_destroy;
+   pscreen->resource_from_handle = panfrost_resource_from_handle;
+   pscreen->resource_get_handle = panfrost_resource_get_handle;
+   pscreen->resource_get_param = panfrost_resource_get_param;
+   pscreen->transfer_helper = u_transfer_helper_create(
+      &transfer_vtbl,
+      U_TRANSFER_HELPER_SEPARATE_Z32S8 | U_TRANSFER_HELPER_MSAA_MAP);
 }
 void
 panfrost_resource_screen_destroy(struct pipe_screen *pscreen)
 {
-        u_transfer_helper_destroy(pscreen->transfer_helper);
+   u_transfer_helper_destroy(pscreen->transfer_helper);
 }
 
 void
 panfrost_resource_context_init(struct pipe_context *pctx)
 {
-        pctx->buffer_map = u_transfer_helper_transfer_map;
-        pctx->buffer_unmap = u_transfer_helper_transfer_unmap;
-        pctx->texture_map = u_transfer_helper_transfer_map;
-        pctx->texture_unmap = u_transfer_helper_transfer_unmap;
-        pctx->create_surface = panfrost_create_surface;
-        pctx->surface_destroy = panfrost_surface_destroy;
-        pctx->resource_copy_region = util_resource_copy_region;
-        pctx->blit = panfrost_blit;
-        pctx->generate_mipmap = panfrost_generate_mipmap;
-        pctx->flush_resource = panfrost_flush_resource;
-        pctx->invalidate_resource = panfrost_invalidate_resource;
-        pctx->transfer_flush_region = u_transfer_helper_transfer_flush_region;
-        pctx->buffer_subdata = u_default_buffer_subdata;
-        pctx->texture_subdata = u_default_texture_subdata;
-        pctx->clear_buffer = u_default_clear_buffer;
-        pctx->clear_render_target = panfrost_clear_render_target;
-        pctx->clear_depth_stencil = panfrost_clear_depth_stencil;
+   pctx->buffer_map = u_transfer_helper_transfer_map;
+   pctx->buffer_unmap = u_transfer_helper_transfer_unmap;
+   pctx->texture_map = u_transfer_helper_transfer_map;
+   pctx->texture_unmap = u_transfer_helper_transfer_unmap;
+   pctx->create_surface = panfrost_create_surface;
+   pctx->surface_destroy = panfrost_surface_destroy;
+   pctx->resource_copy_region = util_resource_copy_region;
+   pctx->blit = panfrost_blit;
+   pctx->generate_mipmap = panfrost_generate_mipmap;
+   pctx->flush_resource = panfrost_flush_resource;
+   pctx->invalidate_resource = panfrost_invalidate_resource;
+   pctx->transfer_flush_region = u_transfer_helper_transfer_flush_region;
+   pctx->buffer_subdata = u_default_buffer_subdata;
+   pctx->texture_subdata = u_default_texture_subdata;
+   pctx->clear_buffer = u_default_clear_buffer;
+   pctx->clear_render_target = panfrost_clear_render_target;
+   pctx->clear_depth_stencil = panfrost_clear_depth_stencil;
 }
diff --git a/src/gallium/drivers/panfrost/pan_resource.h b/src/gallium/drivers/panfrost/pan_resource.h
index eb39726c46e..c3d76d75bf3 100644
--- a/src/gallium/drivers/panfrost/pan_resource.h
+++ b/src/gallium/drivers/panfrost/pan_resource.h
@@ -22,87 +22,86 @@
  *
  */
 
-
 #ifndef PAN_RESOURCE_H
 #define PAN_RESOURCE_H
 
-#include "pan_screen.h"
-#include "pan_minmax_cache.h"
-#include "pan_texture.h"
 #include "drm-uapi/drm.h"
 #include "util/u_range.h"
+#include "pan_minmax_cache.h"
+#include "pan_screen.h"
+#include "pan_texture.h"
 
 #define LAYOUT_CONVERT_THRESHOLD 8
-#define PAN_MAX_BATCHES 32
+#define PAN_MAX_BATCHES          32
 
-#define PAN_BIND_SHARED_MASK (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | \
-                              PIPE_BIND_SHARED)
+#define PAN_BIND_SHARED_MASK                                                   \
+   (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)
 
 struct panfrost_resource {
-        struct pipe_resource base;
-        struct {
-                struct pipe_scissor_state extent;
-                struct {
-                        bool enable;
-                        unsigned stride;
-                        unsigned size;
-                        BITSET_WORD *data;
-                } tile_map;
-        } damage;
+   struct pipe_resource base;
+   struct {
+      struct pipe_scissor_state extent;
+      struct {
+         bool enable;
+         unsigned stride;
+         unsigned size;
+         BITSET_WORD *data;
+      } tile_map;
+   } damage;
 
-        struct renderonly_scanout *scanout;
+   struct renderonly_scanout *scanout;
 
-        struct panfrost_resource *separate_stencil;
+   struct panfrost_resource *separate_stencil;
 
-        struct util_range valid_buffer_range;
+   struct util_range valid_buffer_range;
 
-        /* Description of the resource layout */
-        struct pan_image image;
+   /* Description of the resource layout */
+   struct pan_image image;
 
-        struct {
-                /* Is the checksum for this image valid? Implicitly refers to
-                 * the first slice; we only checksum non-mipmapped 2D images */
-                bool crc;
+   struct {
+      /* Is the checksum for this image valid? Implicitly refers to
+       * the first slice; we only checksum non-mipmapped 2D images */
+      bool crc;
 
-                /* Has anything been written to this slice? */
-                BITSET_DECLARE(data, MAX_MIP_LEVELS);
-        } valid;
+      /* Has anything been written to this slice? */
+      BITSET_DECLARE(data, MAX_MIP_LEVELS);
+   } valid;
 
-        /* Whether the modifier can be changed */
-        bool modifier_constant;
+   /* Whether the modifier can be changed */
+   bool modifier_constant;
 
-        /* Used to decide when to convert to another modifier */
-        uint16_t modifier_updates;
+   /* Used to decide when to convert to another modifier */
+   uint16_t modifier_updates;
 
-        /* Do all pixels have the same stencil value? */
-        bool constant_stencil;
+   /* Do all pixels have the same stencil value? */
+   bool constant_stencil;
 
-        /* The stencil value if constant_stencil is set */
-        uint8_t stencil_value;
+   /* The stencil value if constant_stencil is set */
+   uint8_t stencil_value;
 
-        /* Cached min/max values for index buffers */
-        struct panfrost_minmax_cache *index_cache;
+   /* Cached min/max values for index buffers */
+   struct panfrost_minmax_cache *index_cache;
 };
 
 static inline struct panfrost_resource *
 pan_resource(struct pipe_resource *p)
 {
-        return (struct panfrost_resource *)p;
+   return (struct panfrost_resource *)p;
 }
 
 struct panfrost_transfer {
-        struct pipe_transfer base;
-        void *map;
-        struct {
-                struct pipe_resource *rsrc;
-                struct pipe_box box;
-        } staging;
+   struct pipe_transfer base;
+   void *map;
+   struct {
+      struct pipe_resource *rsrc;
+      struct pipe_box box;
+   } staging;
 };
 
 static inline struct panfrost_transfer *
 pan_transfer(struct pipe_transfer *p)
 {
-        return (struct panfrost_transfer *)p;
+   return (struct panfrost_transfer *)p;
 }
 
 void panfrost_resource_screen_init(struct pipe_screen *screen);
@@ -113,53 +112,48 @@ void panfrost_resource_context_init(struct pipe_context *pctx);
 
 /* Blitting */
 
-void
-panfrost_blitter_save(struct panfrost_context *ctx, bool render_cond);
+void panfrost_blitter_save(struct panfrost_context *ctx, bool render_cond);
 
-void
-panfrost_blit(struct pipe_context *pipe,
-              const struct pipe_blit_info *info);
+void panfrost_blit(struct pipe_context *pipe,
+                   const struct pipe_blit_info *info);
 
-void
-panfrost_resource_set_damage_region(struct pipe_screen *screen,
-                                    struct pipe_resource *res,
-                                    unsigned int nrects,
-                                    const struct pipe_box *rects);
+void panfrost_resource_set_damage_region(struct pipe_screen *screen,
+                                         struct pipe_resource *res,
+                                         unsigned int nrects,
+                                         const struct pipe_box *rects);
 
 static inline enum mali_texture_dimension
-panfrost_translate_texture_dimension(enum pipe_texture_target t) {
-        switch (t)
-        {
-        case PIPE_BUFFER:
-        case PIPE_TEXTURE_1D:
-        case PIPE_TEXTURE_1D_ARRAY:
-                return MALI_TEXTURE_DIMENSION_1D;
+panfrost_translate_texture_dimension(enum pipe_texture_target t)
+{
+   switch (t) {
+   case PIPE_BUFFER:
+   case PIPE_TEXTURE_1D:
+   case PIPE_TEXTURE_1D_ARRAY:
+      return MALI_TEXTURE_DIMENSION_1D;
 
-        case PIPE_TEXTURE_2D:
-        case PIPE_TEXTURE_2D_ARRAY:
-        case PIPE_TEXTURE_RECT:
-                return MALI_TEXTURE_DIMENSION_2D;
+   case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_2D_ARRAY:
+   case PIPE_TEXTURE_RECT:
+      return MALI_TEXTURE_DIMENSION_2D;
 
-        case PIPE_TEXTURE_3D:
-                return MALI_TEXTURE_DIMENSION_3D;
+   case PIPE_TEXTURE_3D:
+      return MALI_TEXTURE_DIMENSION_3D;
 
-        case PIPE_TEXTURE_CUBE:
-        case PIPE_TEXTURE_CUBE_ARRAY:
-                return MALI_TEXTURE_DIMENSION_CUBE;
+   case PIPE_TEXTURE_CUBE:
+   case PIPE_TEXTURE_CUBE_ARRAY:
+      return MALI_TEXTURE_DIMENSION_CUBE;
 
-        default:
-                unreachable("Unknown target");
-        }
+   default:
+      unreachable("Unknown target");
+   }
 }
 
-void
-pan_resource_modifier_convert(struct panfrost_context *ctx,
-                              struct panfrost_resource *rsrc,
-                              uint64_t modifier, const char *reason);
+void pan_resource_modifier_convert(struct panfrost_context *ctx,
+                                   struct panfrost_resource *rsrc,
+                                   uint64_t modifier, const char *reason);
 
-void
-pan_legalize_afbc_format(struct panfrost_context *ctx,
-                         struct panfrost_resource *rsrc,
-                         enum pipe_format format);
+void pan_legalize_afbc_format(struct panfrost_context *ctx,
+                              struct panfrost_resource *rsrc,
+                              enum pipe_format format);
 
 #endif /* PAN_RESOURCE_H */
diff --git a/src/gallium/drivers/panfrost/pan_screen.c b/src/gallium/drivers/panfrost/pan_screen.c
index cd83576c9e7..00ad57e03cf 100644
--- a/src/gallium/drivers/panfrost/pan_screen.c
+++ b/src/gallium/drivers/panfrost/pan_screen.c
@@ -26,31 +26,31 @@
  *
  */
 
-#include "util/u_debug.h"
-#include "util/u_memory.h"
-#include "util/format/u_format.h"
-#include "util/format/u_format_s3tc.h"
-#include "util/u_video.h"
-#include "util/u_screen.h"
-#include "util/os_time.h"
-#include "util/u_process.h"
+#include "draw/draw_context.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_screen.h"
-#include "draw/draw_context.h"
+#include "util/format/u_format.h"
+#include "util/format/u_format_s3tc.h"
+#include "util/os_time.h"
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "util/u_process.h"
+#include "util/u_screen.h"
+#include "util/u_video.h"
 
 #include <fcntl.h>
 
 #include "drm-uapi/drm_fourcc.h"
 #include "drm-uapi/panfrost_drm.h"
 
+#include "decode.h"
 #include "pan_bo.h"
 #include "pan_fence.h"
-#include "pan_shader.h"
-#include "pan_screen.h"
-#include "pan_resource.h"
 #include "pan_public.h"
+#include "pan_resource.h"
+#include "pan_screen.h"
+#include "pan_shader.h"
 #include "pan_util.h"
-#include "decode.h"
 
 #include "pan_context.h"
 
@@ -80,294 +80,294 @@ static const struct debug_named_value panfrost_debug_options[] = {
 static const char *
 panfrost_get_name(struct pipe_screen *screen)
 {
-        return pan_device(screen)->model->name;
+   return pan_device(screen)->model->name;
 }
 
 static const char *
 panfrost_get_vendor(struct pipe_screen *screen)
 {
-        return "Mesa";
+   return "Mesa";
 }
 
 static const char *
 panfrost_get_device_vendor(struct pipe_screen *screen)
 {
-        return "Arm";
+   return "Arm";
 }
 
 static int
 panfrost_get_param(struct pipe_screen *screen, enum pipe_cap param)
 {
-        struct panfrost_device *dev = pan_device(screen);
+   struct panfrost_device *dev = pan_device(screen);
 
-        /* Our GL 3.x implementation is WIP */
-        bool is_gl3 = dev->debug & (PAN_DBG_GL3 | PAN_DBG_DEQP);
+   /* Our GL 3.x implementation is WIP */
+   bool is_gl3 = dev->debug & (PAN_DBG_GL3 | PAN_DBG_DEQP);
 
-        /* Native MRT is introduced with v5 */
-        bool has_mrt = (dev->arch >= 5);
+   /* Native MRT is introduced with v5 */
+   bool has_mrt = (dev->arch >= 5);
 
-        /* Only kernel drivers >= 1.1 can allocate HEAP BOs */
-        bool has_heap = dev->kernel_version->version_major > 1 ||
-                        dev->kernel_version->version_minor >= 1;
+   /* Only kernel drivers >= 1.1 can allocate HEAP BOs */
+   bool has_heap = dev->kernel_version->version_major > 1 ||
+                   dev->kernel_version->version_minor >= 1;
 
-        switch (param) {
-        case PIPE_CAP_NPOT_TEXTURES:
-        case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
-        case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:
-        case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
-        case PIPE_CAP_DEPTH_CLIP_DISABLE:
-        case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE:
-        case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
-        case PIPE_CAP_FRONTEND_NOOP:
-        case PIPE_CAP_SAMPLE_SHADING:
-        case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:
-        case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
-        case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
-        case PIPE_CAP_SHADER_PACK_HALF_FLOAT:
-                return 1;
+   switch (param) {
+   case PIPE_CAP_NPOT_TEXTURES:
+   case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
+   case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:
+   case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
+   case PIPE_CAP_DEPTH_CLIP_DISABLE:
+   case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE:
+   case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
+   case PIPE_CAP_FRONTEND_NOOP:
+   case PIPE_CAP_SAMPLE_SHADING:
+   case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:
+   case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
+   case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+   case PIPE_CAP_SHADER_PACK_HALF_FLOAT:
+      return 1;
 
-        case PIPE_CAP_MAX_RENDER_TARGETS:
-        case PIPE_CAP_FBFETCH:
-        case PIPE_CAP_FBFETCH_COHERENT:
-                return has_mrt ? 8 : 1;
+   case PIPE_CAP_MAX_RENDER_TARGETS:
+   case PIPE_CAP_FBFETCH:
+   case PIPE_CAP_FBFETCH_COHERENT:
+      return has_mrt ? 8 : 1;
 
-        case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
-                return 1;
+   case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+      return 1;
 
-        case PIPE_CAP_OCCLUSION_QUERY:
-        case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX:
-                return true;
+   case PIPE_CAP_OCCLUSION_QUERY:
+   case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX:
+      return true;
 
-        case PIPE_CAP_ANISOTROPIC_FILTER:
-                return dev->revision >= dev->model->min_rev_anisotropic;
+   case PIPE_CAP_ANISOTROPIC_FILTER:
+      return dev->revision >= dev->model->min_rev_anisotropic;
 
-        /* Compile side is done for Bifrost, Midgard TODO. Needs some kernel
-         * work to turn on, since CYCLE_COUNT_START needs to be issued. In
-         * kbase, userspace requests this via BASE_JD_REQ_PERMON. There is not
-         * yet way to request this with mainline TODO */
-        case PIPE_CAP_SHADER_CLOCK:
-                return 0;
+   /* Compile side is done for Bifrost, Midgard TODO. Needs some kernel
+    * work to turn on, since CYCLE_COUNT_START needs to be issued. In
+    * kbase, userspace requests this via BASE_JD_REQ_PERMON. There is not
+    * yet way to request this with mainline TODO */
+   case PIPE_CAP_SHADER_CLOCK:
+      return 0;
 
-        case PIPE_CAP_VS_INSTANCEID:
-        case PIPE_CAP_TEXTURE_MULTISAMPLE:
-        case PIPE_CAP_SURFACE_SAMPLE_COUNT:
-                return true;
+   case PIPE_CAP_VS_INSTANCEID:
+   case PIPE_CAP_TEXTURE_MULTISAMPLE:
+   case PIPE_CAP_SURFACE_SAMPLE_COUNT:
+      return true;
 
-        case PIPE_CAP_SAMPLER_VIEW_TARGET:
-        case PIPE_CAP_CLIP_HALFZ:
-        case PIPE_CAP_TEXTURE_SWIZZLE:
-        case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE:
-        case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
-        case PIPE_CAP_BLEND_EQUATION_SEPARATE:
-        case PIPE_CAP_INDEP_BLEND_ENABLE:
-        case PIPE_CAP_INDEP_BLEND_FUNC:
-        case PIPE_CAP_GENERATE_MIPMAP:
-        case PIPE_CAP_ACCELERATED:
-        case PIPE_CAP_UMA:
-        case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
-        case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
-        case PIPE_CAP_SHADER_ARRAY_COMPONENTS:
-        case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
-        case PIPE_CAP_TEXTURE_BUFFER_SAMPLER:
-        case PIPE_CAP_PACKED_UNIFORMS:
-        case PIPE_CAP_IMAGE_LOAD_FORMATTED:
-        case PIPE_CAP_CUBE_MAP_ARRAY:
-        case PIPE_CAP_COMPUTE:
-        case PIPE_CAP_INT64:
-                return 1;
+   case PIPE_CAP_SAMPLER_VIEW_TARGET:
+   case PIPE_CAP_CLIP_HALFZ:
+   case PIPE_CAP_TEXTURE_SWIZZLE:
+   case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE:
+   case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+   case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+   case PIPE_CAP_INDEP_BLEND_ENABLE:
+   case PIPE_CAP_INDEP_BLEND_FUNC:
+   case PIPE_CAP_GENERATE_MIPMAP:
+   case PIPE_CAP_ACCELERATED:
+   case PIPE_CAP_UMA:
+   case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+   case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+   case PIPE_CAP_SHADER_ARRAY_COMPONENTS:
+   case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+   case PIPE_CAP_TEXTURE_BUFFER_SAMPLER:
+   case PIPE_CAP_PACKED_UNIFORMS:
+   case PIPE_CAP_IMAGE_LOAD_FORMATTED:
+   case PIPE_CAP_CUBE_MAP_ARRAY:
+   case PIPE_CAP_COMPUTE:
+   case PIPE_CAP_INT64:
+      return 1;
 
-        /* We need this for OES_copy_image, but currently there are some awful
-         * interactions with AFBC that need to be worked out. */
-        case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
-                return 0;
+   /* We need this for OES_copy_image, but currently there are some awful
+    * interactions with AFBC that need to be worked out. */
+   case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+      return 0;
 
-        case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
-                return PIPE_MAX_SO_BUFFERS;
+   case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+      return PIPE_MAX_SO_BUFFERS;
 
-        case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
-        case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
-                return PIPE_MAX_SO_OUTPUTS;
+   case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+   case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+      return PIPE_MAX_SO_OUTPUTS;
 
-        case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
-        case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
-                return 1;
+   case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+   case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
+      return 1;
 
-        case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
-                return 2048;
+   case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+      return 2048;
 
-        case PIPE_CAP_GLSL_FEATURE_LEVEL:
-        case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
-                return is_gl3 ? 330 : 140;
-        case PIPE_CAP_ESSL_FEATURE_LEVEL:
-                return dev->arch >= 6 ? 320 : 310;
+   case PIPE_CAP_GLSL_FEATURE_LEVEL:
+   case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
+      return is_gl3 ? 330 : 140;
+   case PIPE_CAP_ESSL_FEATURE_LEVEL:
+      return dev->arch >= 6 ? 320 : 310;
 
-        case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
-                return 16;
+   case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
+      return 16;
 
-        /* v7 (only) restricts component orders with AFBC. To workaround, we
-         * compose format swizzles with texture swizzles. pan_texture.c motsly
-         * handles this but we need to fix up the border colour.
-         */
-        case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
-                if (dev->arch == 7)
-                        return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_FREEDRENO;
-                else
-                        return 0;
+   /* v7 (only) restricts component orders with AFBC. To workaround, we
+    * compose format swizzles with texture swizzles. pan_texture.c motsly
+    * handles this but we need to fix up the border colour.
+    */
+   case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
+      if (dev->arch == 7)
+         return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_FREEDRENO;
+      else
+         return 0;
 
-        case PIPE_CAP_MAX_TEXEL_BUFFER_ELEMENTS_UINT:
-                return 65536;
+   case PIPE_CAP_MAX_TEXEL_BUFFER_ELEMENTS_UINT:
+      return 65536;
 
-        /* Must be at least 64 for correct behaviour */
-        case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
-                return 64;
+   /* Must be at least 64 for correct behaviour */
+   case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
+      return 64;
 
-        case PIPE_CAP_QUERY_TIMESTAMP:
-                return is_gl3;
+   case PIPE_CAP_QUERY_TIMESTAMP:
+      return is_gl3;
 
-        /* The hardware requires element alignment for data conversion to work
-         * as expected. If data conversion is not required, this restriction is
-         * lifted on Midgard at a performance penalty. We conservatively
-         * require element alignment for vertex buffers, using u_vbuf to
-         * translate to match the hardware requirement.
-         *
-         * This is less heavy-handed than the 4BYTE_ALIGNED_ONLY caps, which
-         * would needlessly require alignment even for 8-bit formats.
-         */
-        case PIPE_CAP_VERTEX_ATTRIB_ELEMENT_ALIGNED_ONLY:
-                return 1;
+   /* The hardware requires element alignment for data conversion to work
+    * as expected. If data conversion is not required, this restriction is
+    * lifted on Midgard at a performance penalty. We conservatively
+    * require element alignment for vertex buffers, using u_vbuf to
+    * translate to match the hardware requirement.
+    *
+    * This is less heavy-handed than the 4BYTE_ALIGNED_ONLY caps, which
+    * would needlessly require alignment even for 8-bit formats.
+    */
+   case PIPE_CAP_VERTEX_ATTRIB_ELEMENT_ALIGNED_ONLY:
+      return 1;
 
-        case PIPE_CAP_MAX_TEXTURE_2D_SIZE:
-                return 1 << (MAX_MIP_LEVELS - 1);
+   case PIPE_CAP_MAX_TEXTURE_2D_SIZE:
+      return 1 << (MAX_MIP_LEVELS - 1);
 
-        case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
-        case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
-                return MAX_MIP_LEVELS;
+   case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+   case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+      return MAX_MIP_LEVELS;
 
-        case PIPE_CAP_FS_COORD_ORIGIN_LOWER_LEFT:
-        case PIPE_CAP_FS_COORD_PIXEL_CENTER_INTEGER:
-                /* Hardware is upper left. Pixel center at (0.5, 0.5) */
-                return 0;
+   case PIPE_CAP_FS_COORD_ORIGIN_LOWER_LEFT:
+   case PIPE_CAP_FS_COORD_PIXEL_CENTER_INTEGER:
+      /* Hardware is upper left. Pixel center at (0.5, 0.5) */
+      return 0;
 
-        case PIPE_CAP_FS_COORD_ORIGIN_UPPER_LEFT:
-        case PIPE_CAP_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
-        case PIPE_CAP_TGSI_TEXCOORD:
-                return 1;
+   case PIPE_CAP_FS_COORD_ORIGIN_UPPER_LEFT:
+   case PIPE_CAP_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+   case PIPE_CAP_TGSI_TEXCOORD:
+      return 1;
 
-        /* We would prefer varyings on Midgard, but proper sysvals on Bifrost */
-        case PIPE_CAP_FS_FACE_IS_INTEGER_SYSVAL:
-        case PIPE_CAP_FS_POSITION_IS_SYSVAL:
-        case PIPE_CAP_FS_POINT_IS_SYSVAL:
-                return dev->arch >= 6;
+   /* We would prefer varyings on Midgard, but proper sysvals on Bifrost */
+   case PIPE_CAP_FS_FACE_IS_INTEGER_SYSVAL:
+   case PIPE_CAP_FS_POSITION_IS_SYSVAL:
+   case PIPE_CAP_FS_POINT_IS_SYSVAL:
+      return dev->arch >= 6;
 
-        case PIPE_CAP_SEAMLESS_CUBE_MAP:
-        case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
-                return true;
+   case PIPE_CAP_SEAMLESS_CUBE_MAP:
+   case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+      return true;
 
-        case PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET:
-                return 0xffff;
+   case PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET:
+      return 0xffff;
 
-        case PIPE_CAP_TEXTURE_TRANSFER_MODES:
-                return 0;
+   case PIPE_CAP_TEXTURE_TRANSFER_MODES:
+      return 0;
 
-        case PIPE_CAP_ENDIANNESS:
-                return PIPE_ENDIAN_NATIVE;
+   case PIPE_CAP_ENDIANNESS:
+      return PIPE_ENDIAN_NATIVE;
 
-        case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
-                return 4;
+   case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
+      return 4;
 
-        case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
-                return -8;
+   case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
+      return -8;
 
-        case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
-                return 7;
+   case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
+      return 7;
 
-        case PIPE_CAP_VIDEO_MEMORY: {
-                uint64_t system_memory;
+   case PIPE_CAP_VIDEO_MEMORY: {
+      uint64_t system_memory;
 
-                if (!os_get_total_physical_memory(&system_memory))
-                        return 0;
+      if (!os_get_total_physical_memory(&system_memory))
+         return 0;
 
-                return (int)(system_memory >> 20);
-        }
+      return (int)(system_memory >> 20);
+   }
 
-        case PIPE_CAP_SHADER_STENCIL_EXPORT:
-        case PIPE_CAP_CONDITIONAL_RENDER:
-        case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
-                return true;
+   case PIPE_CAP_SHADER_STENCIL_EXPORT:
+   case PIPE_CAP_CONDITIONAL_RENDER:
+   case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+      return true;
 
-        case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
-                return 4;
+   case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+      return 4;
 
-        case PIPE_CAP_MAX_VARYINGS:
-                /* Return the GLSL maximum. The internal maximum
-                 * PAN_MAX_VARYINGS accommodates internal varyings. */
-                return MAX_VARYING;
+   case PIPE_CAP_MAX_VARYINGS:
+      /* Return the GLSL maximum. The internal maximum
+       * PAN_MAX_VARYINGS accommodates internal varyings. */
+      return MAX_VARYING;
 
-        /* Removed in v6 (Bifrost) */
-        case PIPE_CAP_GL_CLAMP:
-        case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
-        case PIPE_CAP_ALPHA_TEST:
-                return dev->arch <= 5;
+   /* Removed in v6 (Bifrost) */
+   case PIPE_CAP_GL_CLAMP:
+   case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+   case PIPE_CAP_ALPHA_TEST:
+      return dev->arch <= 5;
 
-        /* Removed in v9 (Valhall). PRIMTIIVE_RESTART_FIXED_INDEX is of course
-         * still supported as it is core GLES3.0 functionality
-         */
-        case PIPE_CAP_PRIMITIVE_RESTART:
-                return dev->arch <= 7;
+   /* Removed in v9 (Valhall). PRIMTIIVE_RESTART_FIXED_INDEX is of course
+    * still supported as it is core GLES3.0 functionality
+    */
+   case PIPE_CAP_PRIMITIVE_RESTART:
+      return dev->arch <= 7;
 
-        case PIPE_CAP_FLATSHADE:
-        case PIPE_CAP_TWO_SIDED_COLOR:
-        case PIPE_CAP_CLIP_PLANES:
-                return 0;
+   case PIPE_CAP_FLATSHADE:
+   case PIPE_CAP_TWO_SIDED_COLOR:
+   case PIPE_CAP_CLIP_PLANES:
+      return 0;
 
-        case PIPE_CAP_PACKED_STREAM_OUTPUT:
-                return 0;
+   case PIPE_CAP_PACKED_STREAM_OUTPUT:
+      return 0;
 
-        case PIPE_CAP_VIEWPORT_TRANSFORM_LOWERED:
-        case PIPE_CAP_PSIZ_CLAMPED:
-                return 1;
+   case PIPE_CAP_VIEWPORT_TRANSFORM_LOWERED:
+   case PIPE_CAP_PSIZ_CLAMPED:
+      return 1;
 
-        case PIPE_CAP_NIR_IMAGES_AS_DEREF:
-                return 0;
+   case PIPE_CAP_NIR_IMAGES_AS_DEREF:
+      return 0;
 
-        case PIPE_CAP_DRAW_INDIRECT:
-                return has_heap;
+   case PIPE_CAP_DRAW_INDIRECT:
+      return has_heap;
 
-        case PIPE_CAP_START_INSTANCE:
-        case PIPE_CAP_DRAW_PARAMETERS:
-                return pan_is_bifrost(dev);
+   case PIPE_CAP_START_INSTANCE:
+   case PIPE_CAP_DRAW_PARAMETERS:
+      return pan_is_bifrost(dev);
 
-        case PIPE_CAP_SUPPORTED_PRIM_MODES:
-        case PIPE_CAP_SUPPORTED_PRIM_MODES_WITH_RESTART: {
-                /* Mali supports GLES and QUADS. Midgard and v6 Bifrost
-                 * support more */
-                uint32_t modes = BITFIELD_MASK(PIPE_PRIM_QUADS + 1);
+   case PIPE_CAP_SUPPORTED_PRIM_MODES:
+   case PIPE_CAP_SUPPORTED_PRIM_MODES_WITH_RESTART: {
+      /* Mali supports GLES and QUADS. Midgard and v6 Bifrost
+       * support more */
+      uint32_t modes = BITFIELD_MASK(PIPE_PRIM_QUADS + 1);
 
-                if (dev->arch <= 6) {
-                        modes |= BITFIELD_BIT(PIPE_PRIM_QUAD_STRIP);
-                        modes |= BITFIELD_BIT(PIPE_PRIM_POLYGON);
-                }
+      if (dev->arch <= 6) {
+         modes |= BITFIELD_BIT(PIPE_PRIM_QUAD_STRIP);
+         modes |= BITFIELD_BIT(PIPE_PRIM_POLYGON);
+      }
 
-                if (dev->arch >= 9) {
-                        /* Although Valhall is supposed to support quads, they
-                         * don't seem to work correctly. Disable to fix
-                         * arb-provoking-vertex-render.
-                         */
-                        modes &= ~BITFIELD_BIT(PIPE_PRIM_QUADS);
-                }
+      if (dev->arch >= 9) {
+         /* Although Valhall is supposed to support quads, they
+          * don't seem to work correctly. Disable to fix
+          * arb-provoking-vertex-render.
+          */
+         modes &= ~BITFIELD_BIT(PIPE_PRIM_QUADS);
+      }
 
-                return modes;
-        }
+      return modes;
+   }
 
-        case PIPE_CAP_IMAGE_STORE_FORMATTED:
-                return 1;
+   case PIPE_CAP_IMAGE_STORE_FORMATTED:
+      return 1;
 
-        case PIPE_CAP_NATIVE_FENCE_FD:
-                return 1;
+   case PIPE_CAP_NATIVE_FENCE_FD:
+      return 1;
 
-        default:
-                return u_pipe_screen_get_param_defaults(screen, param);
-        }
+   default:
+      return u_pipe_screen_get_param_defaults(screen, param);
+   }
 }
 
 static int
@@ -375,163 +375,163 @@ panfrost_get_shader_param(struct pipe_screen *screen,
                           enum pipe_shader_type shader,
                           enum pipe_shader_cap param)
 {
-        struct panfrost_device *dev = pan_device(screen);
-        bool is_nofp16 = dev->debug & PAN_DBG_NOFP16;
-        bool is_deqp = dev->debug & PAN_DBG_DEQP;
+   struct panfrost_device *dev = pan_device(screen);
+   bool is_nofp16 = dev->debug & PAN_DBG_NOFP16;
+   bool is_deqp = dev->debug & PAN_DBG_DEQP;
 
-        switch (shader) {
-        case PIPE_SHADER_VERTEX:
-        case PIPE_SHADER_FRAGMENT:
-        case PIPE_SHADER_COMPUTE:
-                break;
-        default:
-                return 0;
-        }
+   switch (shader) {
+   case PIPE_SHADER_VERTEX:
+   case PIPE_SHADER_FRAGMENT:
+   case PIPE_SHADER_COMPUTE:
+      break;
+   default:
+      return 0;
+   }
 
-        /* We only allow observable side effects (memory writes) in compute and
-         * fragment shaders. Side effects in the geometry pipeline cause
-         * trouble with IDVS and conflict with our transform feedback lowering.
-         */
-        bool allow_side_effects = (shader != PIPE_SHADER_VERTEX);
+   /* We only allow observable side effects (memory writes) in compute and
+    * fragment shaders. Side effects in the geometry pipeline cause
+    * trouble with IDVS and conflict with our transform feedback lowering.
+    */
+   bool allow_side_effects = (shader != PIPE_SHADER_VERTEX);
 
-        switch (param) {
-        case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
-        case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
-        case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
-        case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
-                return 16384; /* arbitrary */
+   switch (param) {
+   case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+   case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+   case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+   case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+      return 16384; /* arbitrary */
 
-        case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
-                return 1024; /* arbitrary */
+   case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+      return 1024; /* arbitrary */
 
-        case PIPE_SHADER_CAP_MAX_INPUTS:
-                /* Used as ABI on Midgard */
-                return 16;
+   case PIPE_SHADER_CAP_MAX_INPUTS:
+      /* Used as ABI on Midgard */
+      return 16;
 
-        case PIPE_SHADER_CAP_MAX_OUTPUTS:
-                return shader == PIPE_SHADER_FRAGMENT ? 8 : PIPE_MAX_ATTRIBS;
+   case PIPE_SHADER_CAP_MAX_OUTPUTS:
+      return shader == PIPE_SHADER_FRAGMENT ? 8 : PIPE_MAX_ATTRIBS;
 
-        case PIPE_SHADER_CAP_MAX_TEMPS:
-                return 256; /* arbitrary */
+   case PIPE_SHADER_CAP_MAX_TEMPS:
+      return 256; /* arbitrary */
 
-        case PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE:
-                return 16 * 1024 * sizeof(float);
+   case PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE:
+      return 16 * 1024 * sizeof(float);
 
-        case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
-                STATIC_ASSERT(PAN_MAX_CONST_BUFFERS < 0x100);
-                return PAN_MAX_CONST_BUFFERS;
+   case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+      STATIC_ASSERT(PAN_MAX_CONST_BUFFERS < 0x100);
+      return PAN_MAX_CONST_BUFFERS;
 
-        case PIPE_SHADER_CAP_CONT_SUPPORTED:
-                return 0;
+   case PIPE_SHADER_CAP_CONT_SUPPORTED:
+      return 0;
 
-        case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
-                return 1;
-        case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
-                return 0;
+   case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+      return 1;
+   case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+      return 0;
 
-        case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
-                return dev->arch >= 6;
+   case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+      return dev->arch >= 6;
 
-        case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
-                return 1;
+   case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+      return 1;
 
-        case PIPE_SHADER_CAP_SUBROUTINES:
-                return 0;
+   case PIPE_SHADER_CAP_SUBROUTINES:
+      return 0;
 
-        case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
-                return 0;
+   case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
+      return 0;
 
-        case PIPE_SHADER_CAP_INTEGERS:
-                return 1;
+   case PIPE_SHADER_CAP_INTEGERS:
+      return 1;
 
-        /* The Bifrost compiler supports full 16-bit. Midgard could but int16
-         * support is untested, so restrict INT16 to Bifrost. Midgard
-         * architecturally cannot support fp16 derivatives. */
+      /* The Bifrost compiler supports full 16-bit. Midgard could but int16
+       * support is untested, so restrict INT16 to Bifrost. Midgard
+       * architecturally cannot support fp16 derivatives. */
 
-        case PIPE_SHADER_CAP_FP16:
-        case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
-                return !is_nofp16;
-        case PIPE_SHADER_CAP_FP16_DERIVATIVES:
-        case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
-                return dev->arch >= 6 && !is_nofp16;
-        case PIPE_SHADER_CAP_INT16:
-                /* XXX: Advertise this CAP when a proper fix to lower_precision
-                 * lands. GLSL IR validation failure in glmark2 -bterrain */
-                return dev->arch >= 6 && !is_nofp16 && is_deqp;
+   case PIPE_SHADER_CAP_FP16:
+   case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
+      return !is_nofp16;
+   case PIPE_SHADER_CAP_FP16_DERIVATIVES:
+   case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
+      return dev->arch >= 6 && !is_nofp16;
+   case PIPE_SHADER_CAP_INT16:
+      /* XXX: Advertise this CAP when a proper fix to lower_precision
+       * lands. GLSL IR validation failure in glmark2 -bterrain */
+      return dev->arch >= 6 && !is_nofp16 && is_deqp;
 
-        case PIPE_SHADER_CAP_INT64_ATOMICS:
-        case PIPE_SHADER_CAP_DROUND_SUPPORTED:
-        case PIPE_SHADER_CAP_DFRACEXP_DLDEXP_SUPPORTED:
-        case PIPE_SHADER_CAP_LDEXP_SUPPORTED:
-        case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
-                return 0;
+   case PIPE_SHADER_CAP_INT64_ATOMICS:
+   case PIPE_SHADER_CAP_DROUND_SUPPORTED:
+   case PIPE_SHADER_CAP_DFRACEXP_DLDEXP_SUPPORTED:
+   case PIPE_SHADER_CAP_LDEXP_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
+      return 0;
 
-        case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
-                STATIC_ASSERT(PIPE_MAX_SAMPLERS < 0x10000);
-                return PIPE_MAX_SAMPLERS;
+   case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
+      STATIC_ASSERT(PIPE_MAX_SAMPLERS < 0x10000);
+      return PIPE_MAX_SAMPLERS;
 
-        case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
-                STATIC_ASSERT(PIPE_MAX_SHADER_SAMPLER_VIEWS < 0x10000);
-                return PIPE_MAX_SHADER_SAMPLER_VIEWS;
+   case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
+      STATIC_ASSERT(PIPE_MAX_SHADER_SAMPLER_VIEWS < 0x10000);
+      return PIPE_MAX_SHADER_SAMPLER_VIEWS;
 
-        case PIPE_SHADER_CAP_PREFERRED_IR:
-                return PIPE_SHADER_IR_NIR;
+   case PIPE_SHADER_CAP_PREFERRED_IR:
+      return PIPE_SHADER_IR_NIR;
 
-        case PIPE_SHADER_CAP_SUPPORTED_IRS:
-                return (1 << PIPE_SHADER_IR_NIR);
+   case PIPE_SHADER_CAP_SUPPORTED_IRS:
+      return (1 << PIPE_SHADER_IR_NIR);
 
-        case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
-                return allow_side_effects ? 16 : 0;
+   case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+      return allow_side_effects ? 16 : 0;
 
-        case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
-                return allow_side_effects ? PIPE_MAX_SHADER_IMAGES : 0;
+   case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
+      return allow_side_effects ? PIPE_MAX_SHADER_IMAGES : 0;
 
-        case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
-        case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
-                return 0;
+   case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
+   case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
+      return 0;
 
-        default:
-                return 0;
-        }
+   default:
+      return 0;
+   }
 
-        return 0;
+   return 0;
 }
 
 static float
 panfrost_get_paramf(struct pipe_screen *screen, enum pipe_capf param)
 {
-        switch (param) {
-        case PIPE_CAPF_MIN_LINE_WIDTH:
-        case PIPE_CAPF_MIN_LINE_WIDTH_AA:
-        case PIPE_CAPF_MIN_POINT_SIZE:
-        case PIPE_CAPF_MIN_POINT_SIZE_AA:
-           return 1;
+   switch (param) {
+   case PIPE_CAPF_MIN_LINE_WIDTH:
+   case PIPE_CAPF_MIN_LINE_WIDTH_AA:
+   case PIPE_CAPF_MIN_POINT_SIZE:
+   case PIPE_CAPF_MIN_POINT_SIZE_AA:
+      return 1;
 
-        case PIPE_CAPF_POINT_SIZE_GRANULARITY:
-        case PIPE_CAPF_LINE_WIDTH_GRANULARITY:
-           return 0.0625;
+   case PIPE_CAPF_POINT_SIZE_GRANULARITY:
+   case PIPE_CAPF_LINE_WIDTH_GRANULARITY:
+      return 0.0625;
 
-        case PIPE_CAPF_MAX_LINE_WIDTH:
-        case PIPE_CAPF_MAX_LINE_WIDTH_AA:
-        case PIPE_CAPF_MAX_POINT_SIZE:
-        case PIPE_CAPF_MAX_POINT_SIZE_AA:
-                return 4095.9375;
+   case PIPE_CAPF_MAX_LINE_WIDTH:
+   case PIPE_CAPF_MAX_LINE_WIDTH_AA:
+   case PIPE_CAPF_MAX_POINT_SIZE:
+   case PIPE_CAPF_MAX_POINT_SIZE_AA:
+      return 4095.9375;
 
-        case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
-                return 16.0;
+   case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
+      return 16.0;
 
-        case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
-                return 16.0; /* arbitrary */
+   case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
+      return 16.0; /* arbitrary */
 
-        case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE:
-        case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE:
-        case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY:
-                return 0.0f;
+   case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE:
+   case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE:
+   case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY:
+      return 0.0f;
 
-        default:
-                debug_printf("Unexpected PIPE_CAPF %d query\n", param);
-                return 0.0;
-        }
+   default:
+      debug_printf("Unexpected PIPE_CAPF %d query\n", param);
+      return 0.0;
+   }
 }
 
 /**
@@ -540,69 +540,64 @@ panfrost_get_paramf(struct pipe_screen *screen, enum pipe_capf param)
  * \param type  one of PIPE_TEXTURE, PIPE_SURFACE
  */
 static bool
-panfrost_is_format_supported( struct pipe_screen *screen,
-                              enum pipe_format format,
-                              enum pipe_texture_target target,
-                              unsigned sample_count,
-                              unsigned storage_sample_count,
-                              unsigned bind)
+panfrost_is_format_supported(struct pipe_screen *screen,
+                             enum pipe_format format,
+                             enum pipe_texture_target target,
+                             unsigned sample_count,
+                             unsigned storage_sample_count, unsigned bind)
 {
-        struct panfrost_device *dev = pan_device(screen);
+   struct panfrost_device *dev = pan_device(screen);
 
-        assert(target == PIPE_BUFFER ||
-               target == PIPE_TEXTURE_1D ||
-               target == PIPE_TEXTURE_1D_ARRAY ||
-               target == PIPE_TEXTURE_2D ||
-               target == PIPE_TEXTURE_2D_ARRAY ||
-               target == PIPE_TEXTURE_RECT ||
-               target == PIPE_TEXTURE_3D ||
-               target == PIPE_TEXTURE_CUBE ||
-               target == PIPE_TEXTURE_CUBE_ARRAY);
+   assert(target == PIPE_BUFFER || target == PIPE_TEXTURE_1D ||
+          target == PIPE_TEXTURE_1D_ARRAY || target == PIPE_TEXTURE_2D ||
+          target == PIPE_TEXTURE_2D_ARRAY || target == PIPE_TEXTURE_RECT ||
+          target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE ||
+          target == PIPE_TEXTURE_CUBE_ARRAY);
 
-        /* MSAA 2x gets rounded up to 4x. MSAA 8x/16x only supported on v5+.
-         * TODO: debug MSAA 8x/16x */
+   /* MSAA 2x gets rounded up to 4x. MSAA 8x/16x only supported on v5+.
+    * TODO: debug MSAA 8x/16x */
 
-        switch (sample_count) {
-        case 0:
-        case 1:
-        case 4:
-                break;
-        case 8:
-        case 16:
-                if (dev->debug & PAN_DBG_MSAA16)
-                        break;
-                else
-                        return false;
-        default:
-                return false;
-        }
+   switch (sample_count) {
+   case 0:
+   case 1:
+   case 4:
+      break;
+   case 8:
+   case 16:
+      if (dev->debug & PAN_DBG_MSAA16)
+         break;
+      else
+         return false;
+   default:
+      return false;
+   }
 
-        if (MAX2(sample_count, 1) != MAX2(storage_sample_count, 1))
-                return false;
+   if (MAX2(sample_count, 1) != MAX2(storage_sample_count, 1))
+      return false;
 
-        /* Z16 causes dEQP failures on t720 */
-        if (format == PIPE_FORMAT_Z16_UNORM && dev->arch <= 4)
-                return false;
+   /* Z16 causes dEQP failures on t720 */
+   if (format == PIPE_FORMAT_Z16_UNORM && dev->arch <= 4)
+      return false;
 
-        /* Check we support the format with the given bind */
+   /* Check we support the format with the given bind */
 
-        unsigned relevant_bind = bind &
-                ( PIPE_BIND_DEPTH_STENCIL | PIPE_BIND_RENDER_TARGET
-                | PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_SAMPLER_VIEW);
+   unsigned relevant_bind =
+      bind & (PIPE_BIND_DEPTH_STENCIL | PIPE_BIND_RENDER_TARGET |
+              PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_SAMPLER_VIEW);
 
-        struct panfrost_format fmt = dev->formats[format];
+   struct panfrost_format fmt = dev->formats[format];
 
-        /* Also check that compressed texture formats are supported on this
-         * particular chip. They may not be depending on system integration
-         * differences. */
+   /* Also check that compressed texture formats are supported on this
+    * particular chip. They may not be depending on system integration
+    * differences. */
 
-        bool supported = panfrost_supports_compressed_format(dev,
-                        MALI_EXTRACT_INDEX(fmt.hw));
+   bool supported =
+      panfrost_supports_compressed_format(dev, MALI_EXTRACT_INDEX(fmt.hw));
 
-        if (!supported)
-                return false;
+   if (!supported)
+      return false;
 
-        return MALI_EXTRACT_INDEX(fmt.hw) && ((relevant_bind & ~fmt.bind) == 0);
+   return MALI_EXTRACT_INDEX(fmt.hw) && ((relevant_bind & ~fmt.bind) == 0);
 }
 
 /* We always support linear and tiled operations, both external and internal.
@@ -611,175 +606,180 @@ panfrost_is_format_supported( struct pipe_screen *screen,
 
 static void
 panfrost_walk_dmabuf_modifiers(struct pipe_screen *screen,
-                enum pipe_format format, int max, uint64_t *modifiers, unsigned
-                int *external_only, int *out_count, uint64_t test_modifier)
+                               enum pipe_format format, int max,
+                               uint64_t *modifiers, unsigned int *external_only,
+                               int *out_count, uint64_t test_modifier)
 {
-        /* Query AFBC status */
-        struct panfrost_device *dev = pan_device(screen);
-        bool afbc = dev->has_afbc && panfrost_format_supports_afbc(dev, format);
-        bool ytr = panfrost_afbc_can_ytr(format);
-        bool tiled_afbc = panfrost_afbc_can_tile(dev);
+   /* Query AFBC status */
+   struct panfrost_device *dev = pan_device(screen);
+   bool afbc = dev->has_afbc && panfrost_format_supports_afbc(dev, format);
+   bool ytr = panfrost_afbc_can_ytr(format);
+   bool tiled_afbc = panfrost_afbc_can_tile(dev);
 
-        unsigned count = 0;
+   unsigned count = 0;
 
-        for (unsigned i = 0; i < PAN_MODIFIER_COUNT; ++i) {
-                if (drm_is_afbc(pan_best_modifiers[i]) && !afbc)
-                        continue;
+   for (unsigned i = 0; i < PAN_MODIFIER_COUNT; ++i) {
+      if (drm_is_afbc(pan_best_modifiers[i]) && !afbc)
+         continue;
 
-                if ((pan_best_modifiers[i] & AFBC_FORMAT_MOD_YTR) && !ytr)
-                        continue;
+      if ((pan_best_modifiers[i] & AFBC_FORMAT_MOD_YTR) && !ytr)
+         continue;
 
-                if ((pan_best_modifiers[i] & AFBC_FORMAT_MOD_TILED) && !tiled_afbc)
-                        continue;
+      if ((pan_best_modifiers[i] & AFBC_FORMAT_MOD_TILED) && !tiled_afbc)
+         continue;
 
-                if (test_modifier != DRM_FORMAT_MOD_INVALID &&
-                    test_modifier != pan_best_modifiers[i])
-                        continue;
+      if (test_modifier != DRM_FORMAT_MOD_INVALID &&
+          test_modifier != pan_best_modifiers[i])
+         continue;
 
-                count++;
+      count++;
 
-                if (max > (int) count) {
-                        modifiers[count] = pan_best_modifiers[i];
+      if (max > (int)count) {
+         modifiers[count] = pan_best_modifiers[i];
 
-                        if (external_only)
-                                external_only[count] = false;
-                }
-        }
+         if (external_only)
+            external_only[count] = false;
+      }
+   }
 
-        *out_count = count;
+   *out_count = count;
 }
 
 static void
 panfrost_query_dmabuf_modifiers(struct pipe_screen *screen,
-                enum pipe_format format, int max, uint64_t *modifiers, unsigned
-                int *external_only, int *out_count)
+                                enum pipe_format format, int max,
+                                uint64_t *modifiers,
+                                unsigned int *external_only, int *out_count)
 {
-        panfrost_walk_dmabuf_modifiers(screen, format, max, modifiers,
-                external_only, out_count, DRM_FORMAT_MOD_INVALID);
+   panfrost_walk_dmabuf_modifiers(screen, format, max, modifiers, external_only,
+                                  out_count, DRM_FORMAT_MOD_INVALID);
 }
 
 static bool
 panfrost_is_dmabuf_modifier_supported(struct pipe_screen *screen,
-                uint64_t modifier, enum pipe_format format,
-                bool *external_only)
+                                      uint64_t modifier,
+                                      enum pipe_format format,
+                                      bool *external_only)
 {
-        uint64_t unused;
-        unsigned int uint_extern_only = 0;
-        int count;
+   uint64_t unused;
+   unsigned int uint_extern_only = 0;
+   int count;
 
-        panfrost_walk_dmabuf_modifiers(screen, format, 1, &unused,
-                &uint_extern_only, &count, modifier);
+   panfrost_walk_dmabuf_modifiers(screen, format, 1, &unused, &uint_extern_only,
+                                  &count, modifier);
 
-        if (external_only)
-           *external_only = uint_extern_only ? true : false;
+   if (external_only)
+      *external_only = uint_extern_only ? true : false;
 
-        return count > 0;
+   return count > 0;
 }
 
 static int
-panfrost_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type,
-                enum pipe_compute_cap param, void *ret)
+panfrost_get_compute_param(struct pipe_screen *pscreen,
+                           enum pipe_shader_ir ir_type,
+                           enum pipe_compute_cap param, void *ret)
 {
-        struct panfrost_device *dev = pan_device(pscreen);
-        const char * const ir = "panfrost";
+   struct panfrost_device *dev = pan_device(pscreen);
+   const char *const ir = "panfrost";
 
-#define RET(x) do {                  \
-   if (ret)                          \
-      memcpy(ret, x, sizeof(x));     \
-   return sizeof(x);                 \
-} while (0)
+#define RET(x)                                                                 \
+   do {                                                                        \
+      if (ret)                                                                 \
+         memcpy(ret, x, sizeof(x));                                            \
+      return sizeof(x);                                                        \
+   } while (0)
 
-	switch (param) {
-	case PIPE_COMPUTE_CAP_ADDRESS_BITS:
-		RET((uint32_t []){ 64 });
+   switch (param) {
+   case PIPE_COMPUTE_CAP_ADDRESS_BITS:
+      RET((uint32_t[]){64});
 
-	case PIPE_COMPUTE_CAP_IR_TARGET:
-		if (ret)
-			sprintf(ret, "%s", ir);
-		return strlen(ir) * sizeof(char);
+   case PIPE_COMPUTE_CAP_IR_TARGET:
+      if (ret)
+         sprintf(ret, "%s", ir);
+      return strlen(ir) * sizeof(char);
 
-	case PIPE_COMPUTE_CAP_GRID_DIMENSION:
-		RET((uint64_t []) { 3 });
+   case PIPE_COMPUTE_CAP_GRID_DIMENSION:
+      RET((uint64_t[]){3});
 
-	case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
-		RET(((uint64_t []) { 65535, 65535, 65535 }));
+   case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
+      RET(((uint64_t[]){65535, 65535, 65535}));
 
-        case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
-                /* Unpredictable behaviour at larger sizes. Mali-G52 advertises
-                 * 384x384x384.
-                 *
-                 * On Midgard, we don't allow more than 128 threads in each
-                 * direction to match PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK.
-                 * That still exceeds the minimum-maximum.
-                 */
-                if (dev->arch >= 6)
-                        RET(((uint64_t []) { 256, 256, 256 }));
-                else
-                        RET(((uint64_t []) { 128, 128, 128 }));
+   case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
+      /* Unpredictable behaviour at larger sizes. Mali-G52 advertises
+       * 384x384x384.
+       *
+       * On Midgard, we don't allow more than 128 threads in each
+       * direction to match PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK.
+       * That still exceeds the minimum-maximum.
+       */
+      if (dev->arch >= 6)
+         RET(((uint64_t[]){256, 256, 256}));
+      else
+         RET(((uint64_t[]){128, 128, 128}));
 
-	case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
-                /* On Bifrost and newer, all GPUs can support at least 256 threads
-                 * regardless of register usage, so we report 256.
-                 *
-                 * On Midgard, with maximum register usage, the maximum
-                 * thread count is only 64. We would like to report 64 here, but
-                 * the GLES3.1 spec minimum is 128, so we report 128 and limit
-                 * the register allocation of affected compute kernels.
-                 */
-		RET((uint64_t []) { dev->arch >= 6 ? 256 : 128 });
+   case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
+      /* On Bifrost and newer, all GPUs can support at least 256 threads
+       * regardless of register usage, so we report 256.
+       *
+       * On Midgard, with maximum register usage, the maximum
+       * thread count is only 64. We would like to report 64 here, but
+       * the GLES3.1 spec minimum is 128, so we report 128 and limit
+       * the register allocation of affected compute kernels.
+       */
+      RET((uint64_t[]){dev->arch >= 6 ? 256 : 128});
 
-	case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
-		RET((uint64_t []) { 1024*1024*512 /* Maybe get memory */ });
+   case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
+      RET((uint64_t[]){1024 * 1024 * 512 /* Maybe get memory */});
 
-	case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
-		RET((uint64_t []) { 32768 });
+   case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
+      RET((uint64_t[]){32768});
 
-	case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
-	case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
-		RET((uint64_t []) { 4096 });
+   case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
+   case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
+      RET((uint64_t[]){4096});
 
-	case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
-		RET((uint64_t []) { 1024*1024*512 /* Maybe get memory */ });
+   case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
+      RET((uint64_t[]){1024 * 1024 * 512 /* Maybe get memory */});
 
-	case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
-		RET((uint32_t []) { 800 /* MHz -- TODO */ });
+   case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
+      RET((uint32_t[]){800 /* MHz -- TODO */});
 
-	case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
-		RET((uint32_t []) { dev->core_count });
+   case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
+      RET((uint32_t[]){dev->core_count});
 
-	case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
-		RET((uint32_t []) { 1 });
+   case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
+      RET((uint32_t[]){1});
 
-	case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
-		RET((uint32_t []) { pan_subgroup_size(dev->arch) });
+   case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+      RET((uint32_t[]){pan_subgroup_size(dev->arch)});
 
-	case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
-		RET((uint64_t []) { 1024 }); // TODO
-	}
+   case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
+      RET((uint64_t[]){1024}); // TODO
+   }
 
-	return 0;
+   return 0;
 }
 
 static void
 panfrost_destroy_screen(struct pipe_screen *pscreen)
 {
-        struct panfrost_device *dev = pan_device(pscreen);
-        struct panfrost_screen *screen = pan_screen(pscreen);
+   struct panfrost_device *dev = pan_device(pscreen);
+   struct panfrost_screen *screen = pan_screen(pscreen);
 
-        panfrost_resource_screen_destroy(pscreen);
-        panfrost_pool_cleanup(&screen->blitter.bin_pool);
-        panfrost_pool_cleanup(&screen->blitter.desc_pool);
-        pan_blend_shaders_cleanup(dev);
+   panfrost_resource_screen_destroy(pscreen);
+   panfrost_pool_cleanup(&screen->blitter.bin_pool);
+   panfrost_pool_cleanup(&screen->blitter.desc_pool);
+   pan_blend_shaders_cleanup(dev);
 
-        if (screen->vtbl.screen_destroy)
-                screen->vtbl.screen_destroy(pscreen);
+   if (screen->vtbl.screen_destroy)
+      screen->vtbl.screen_destroy(pscreen);
 
-        if (dev->ro)
-                dev->ro->destroy(dev->ro);
-        panfrost_close_device(dev);
+   if (dev->ro)
+      dev->ro->destroy(dev->ro);
+   panfrost_close_device(dev);
 
-        disk_cache_destroy(screen->disk_cache);
-        ralloc_free(pscreen);
+   disk_cache_destroy(screen->disk_cache);
+   ralloc_free(pscreen);
 }
 
 static const void *
@@ -787,104 +787,104 @@ panfrost_screen_get_compiler_options(struct pipe_screen *pscreen,
                                      enum pipe_shader_ir ir,
                                      enum pipe_shader_type shader)
 {
-        return pan_screen(pscreen)->vtbl.get_compiler_options();
+   return pan_screen(pscreen)->vtbl.get_compiler_options();
 }
 
 static struct disk_cache *
 panfrost_get_disk_shader_cache(struct pipe_screen *pscreen)
 {
-        return pan_screen(pscreen)->disk_cache;
+   return pan_screen(pscreen)->disk_cache;
 }
 
 int
 panfrost_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
                                struct pipe_driver_query_info *info)
 {
-        int num_queries = ARRAY_SIZE(panfrost_driver_query_list);
+   int num_queries = ARRAY_SIZE(panfrost_driver_query_list);
 
-        if (!info)
-           return num_queries;
+   if (!info)
+      return num_queries;
 
-        if (index >= num_queries)
-           return 0;
+   if (index >= num_queries)
+      return 0;
 
-        *info = panfrost_driver_query_list[index];
+   *info = panfrost_driver_query_list[index];
 
-        return 1;
+   return 1;
 }
 
-
 struct pipe_screen *
 panfrost_create_screen(int fd, struct renderonly *ro)
 {
-        /* Create the screen */
-        struct panfrost_screen *screen = rzalloc(NULL, struct panfrost_screen);
+   /* Create the screen */
+   struct panfrost_screen *screen = rzalloc(NULL, struct panfrost_screen);
 
-        if (!screen)
-                return NULL;
+   if (!screen)
+      return NULL;
 
-        struct panfrost_device *dev = pan_device(&screen->base);
+   struct panfrost_device *dev = pan_device(&screen->base);
 
-        /* Debug must be set first for pandecode to work correctly */
-        dev->debug = debug_get_flags_option("PAN_MESA_DEBUG", panfrost_debug_options, 0);
-        panfrost_open_device(screen, fd, dev);
+   /* Debug must be set first for pandecode to work correctly */
+   dev->debug =
+      debug_get_flags_option("PAN_MESA_DEBUG", panfrost_debug_options, 0);
+   panfrost_open_device(screen, fd, dev);
 
-        if (dev->debug & PAN_DBG_NO_AFBC)
-                dev->has_afbc = false;
+   if (dev->debug & PAN_DBG_NO_AFBC)
+      dev->has_afbc = false;
 
-        /* Bail early on unsupported hardware */
-        if (dev->model == NULL) {
-                debug_printf("panfrost: Unsupported model %X", dev->gpu_id);
-                panfrost_destroy_screen(&(screen->base));
-                return NULL;
-        }
+   /* Bail early on unsupported hardware */
+   if (dev->model == NULL) {
+      debug_printf("panfrost: Unsupported model %X", dev->gpu_id);
+      panfrost_destroy_screen(&(screen->base));
+      return NULL;
+   }
 
-        dev->ro = ro;
+   dev->ro = ro;
 
-        screen->base.destroy = panfrost_destroy_screen;
+   screen->base.destroy = panfrost_destroy_screen;
 
-        screen->base.get_name = panfrost_get_name;
-        screen->base.get_vendor = panfrost_get_vendor;
-        screen->base.get_device_vendor = panfrost_get_device_vendor;
-        screen->base.get_driver_query_info = panfrost_get_driver_query_info;
-        screen->base.get_param = panfrost_get_param;
-        screen->base.get_shader_param = panfrost_get_shader_param;
-        screen->base.get_compute_param = panfrost_get_compute_param;
-        screen->base.get_paramf = panfrost_get_paramf;
-        screen->base.get_timestamp = u_default_get_timestamp;
-        screen->base.is_format_supported = panfrost_is_format_supported;
-        screen->base.query_dmabuf_modifiers = panfrost_query_dmabuf_modifiers;
-        screen->base.is_dmabuf_modifier_supported =
-               panfrost_is_dmabuf_modifier_supported;
-        screen->base.context_create = panfrost_create_context;
-        screen->base.get_compiler_options = panfrost_screen_get_compiler_options;
-        screen->base.get_disk_shader_cache = panfrost_get_disk_shader_cache;
-        screen->base.fence_reference = panfrost_fence_reference;
-        screen->base.fence_finish = panfrost_fence_finish;
-        screen->base.fence_get_fd = panfrost_fence_get_fd;
-        screen->base.set_damage_region = panfrost_resource_set_damage_region;
+   screen->base.get_name = panfrost_get_name;
+   screen->base.get_vendor = panfrost_get_vendor;
+   screen->base.get_device_vendor = panfrost_get_device_vendor;
+   screen->base.get_driver_query_info = panfrost_get_driver_query_info;
+   screen->base.get_param = panfrost_get_param;
+   screen->base.get_shader_param = panfrost_get_shader_param;
+   screen->base.get_compute_param = panfrost_get_compute_param;
+   screen->base.get_paramf = panfrost_get_paramf;
+   screen->base.get_timestamp = u_default_get_timestamp;
+   screen->base.is_format_supported = panfrost_is_format_supported;
+   screen->base.query_dmabuf_modifiers = panfrost_query_dmabuf_modifiers;
+   screen->base.is_dmabuf_modifier_supported =
+      panfrost_is_dmabuf_modifier_supported;
+   screen->base.context_create = panfrost_create_context;
+   screen->base.get_compiler_options = panfrost_screen_get_compiler_options;
+   screen->base.get_disk_shader_cache = panfrost_get_disk_shader_cache;
+   screen->base.fence_reference = panfrost_fence_reference;
+   screen->base.fence_finish = panfrost_fence_finish;
+   screen->base.fence_get_fd = panfrost_fence_get_fd;
+   screen->base.set_damage_region = panfrost_resource_set_damage_region;
 
-        panfrost_resource_screen_init(&screen->base);
-        pan_blend_shaders_init(dev);
+   panfrost_resource_screen_init(&screen->base);
+   pan_blend_shaders_init(dev);
 
-        panfrost_disk_cache_init(screen);
+   panfrost_disk_cache_init(screen);
 
-        panfrost_pool_init(&screen->blitter.bin_pool, NULL, dev, PAN_BO_EXECUTE,
-                           4096, "Blitter shaders", false, true);
-        panfrost_pool_init(&screen->blitter.desc_pool, NULL, dev, 0, 65536,
-                           "Blitter RSDs", false, true);
-        if (dev->arch == 4)
-                panfrost_cmdstream_screen_init_v4(screen);
-        else if (dev->arch == 5)
-                panfrost_cmdstream_screen_init_v5(screen);
-        else if (dev->arch == 6)
-                panfrost_cmdstream_screen_init_v6(screen);
-        else if (dev->arch == 7)
-                panfrost_cmdstream_screen_init_v7(screen);
-        else if (dev->arch == 9)
-                panfrost_cmdstream_screen_init_v9(screen);
-        else
-                unreachable("Unhandled architecture major");
+   panfrost_pool_init(&screen->blitter.bin_pool, NULL, dev, PAN_BO_EXECUTE,
+                      4096, "Blitter shaders", false, true);
+   panfrost_pool_init(&screen->blitter.desc_pool, NULL, dev, 0, 65536,
+                      "Blitter RSDs", false, true);
+   if (dev->arch == 4)
+      panfrost_cmdstream_screen_init_v4(screen);
+   else if (dev->arch == 5)
+      panfrost_cmdstream_screen_init_v5(screen);
+   else if (dev->arch == 6)
+      panfrost_cmdstream_screen_init_v6(screen);
+   else if (dev->arch == 7)
+      panfrost_cmdstream_screen_init_v7(screen);
+   else if (dev->arch == 9)
+      panfrost_cmdstream_screen_init_v9(screen);
+   else
+      unreachable("Unhandled architecture major");
 
-        return &screen->base;
+   return &screen->base;
 }
diff --git a/src/gallium/drivers/panfrost/pan_screen.h b/src/gallium/drivers/panfrost/pan_screen.h
index f3f7df41892..f813725d7d7 100644
--- a/src/gallium/drivers/panfrost/pan_screen.h
+++ b/src/gallium/drivers/panfrost/pan_screen.h
@@ -30,14 +30,14 @@
 #define PAN_SCREEN_H
 
 #include <xf86drm.h>
-#include "pipe/p_screen.h"
 #include "pipe/p_defines.h"
+#include "pipe/p_screen.h"
 #include "renderonly/renderonly.h"
-#include "util/u_dynarray.h"
 #include "util/bitset.h"
-#include "util/set.h"
-#include "util/log.h"
 #include "util/disk_cache.h"
+#include "util/log.h"
+#include "util/set.h"
+#include "util/u_dynarray.h"
 
 #include "pan_device.h"
 #include "pan_mempool.h"
@@ -45,7 +45,7 @@
 #define PAN_QUERY_DRAW_CALLS (PIPE_QUERY_DRIVER_SPECIFIC + 0)
 
 static const struct pipe_driver_query_info panfrost_driver_query_list[] = {
-        {"draw-calls", PAN_QUERY_DRAW_CALLS, { 0 }},
+   {"draw-calls", PAN_QUERY_DRAW_CALLS, {0}},
 };
 
 struct panfrost_batch;
@@ -58,77 +58,74 @@ struct pan_blend_state;
 /* Virtual table of per-generation (GenXML) functions */
 
 struct panfrost_vtable {
-        /* Prepares the renderer state descriptor or shader program descriptor
-         * for a given compiled shader, and if desired uploads it as well */
-        void (*prepare_shader)(struct panfrost_compiled_shader *,
-                            struct panfrost_pool *, bool);
+   /* Prepares the renderer state descriptor or shader program descriptor
+    * for a given compiled shader, and if desired uploads it as well */
+   void (*prepare_shader)(struct panfrost_compiled_shader *,
+                          struct panfrost_pool *, bool);
 
-        /* Emits a thread local storage descriptor */
-        void (*emit_tls)(struct panfrost_batch *);
+   /* Emits a thread local storage descriptor */
+   void (*emit_tls)(struct panfrost_batch *);
 
-        /* Emits a framebuffer descriptor */
-        void (*emit_fbd)(struct panfrost_batch *, const struct pan_fb_info *);
+   /* Emits a framebuffer descriptor */
+   void (*emit_fbd)(struct panfrost_batch *, const struct pan_fb_info *);
 
-        /* Emits a fragment job */
-        mali_ptr (*emit_fragment_job)(struct panfrost_batch *, const struct pan_fb_info *);
+   /* Emits a fragment job */
+   mali_ptr (*emit_fragment_job)(struct panfrost_batch *,
+                                 const struct pan_fb_info *);
 
-        /* General destructor */
-        void (*screen_destroy)(struct pipe_screen *);
+   /* General destructor */
+   void (*screen_destroy)(struct pipe_screen *);
 
-        /* Preload framebuffer */
-        void (*preload)(struct panfrost_batch *, struct pan_fb_info *);
+   /* Preload framebuffer */
+   void (*preload)(struct panfrost_batch *, struct pan_fb_info *);
 
-        /* Initialize a Gallium context */
-        void (*context_init)(struct pipe_context *pipe);
+   /* Initialize a Gallium context */
+   void (*context_init)(struct pipe_context *pipe);
 
-        /* Device-dependent initialization of a panfrost_batch */
-        void (*init_batch)(struct panfrost_batch *batch);
+   /* Device-dependent initialization of a panfrost_batch */
+   void (*init_batch)(struct panfrost_batch *batch);
 
-        /* Get blend shader */
-        struct pan_blend_shader_variant *
-        (*get_blend_shader)(const struct panfrost_device *,
-                            const struct pan_blend_state *,
-                            nir_alu_type, nir_alu_type,
-                            unsigned rt);
+   /* Get blend shader */
+   struct pan_blend_shader_variant *(*get_blend_shader)(
+      const struct panfrost_device *, const struct pan_blend_state *,
+      nir_alu_type, nir_alu_type, unsigned rt);
 
-        /* Initialize the polygon list */
-        void (*init_polygon_list)(struct panfrost_batch *);
+   /* Initialize the polygon list */
+   void (*init_polygon_list)(struct panfrost_batch *);
 
-        /* Shader compilation methods */
-        const nir_shader_compiler_options *(*get_compiler_options)(void);
-        void (*compile_shader)(nir_shader *s,
-                               struct panfrost_compile_inputs *inputs,
-                               struct util_dynarray *binary,
-                               struct pan_shader_info *info);
+   /* Shader compilation methods */
+   const nir_shader_compiler_options *(*get_compiler_options)(void);
+   void (*compile_shader)(nir_shader *s, struct panfrost_compile_inputs *inputs,
+                          struct util_dynarray *binary,
+                          struct pan_shader_info *info);
 };
 
 struct panfrost_screen {
-        struct pipe_screen base;
-        struct panfrost_device dev;
-        struct {
-                struct panfrost_pool bin_pool;
-                struct panfrost_pool desc_pool;
-        } blitter;
+   struct pipe_screen base;
+   struct panfrost_device dev;
+   struct {
+      struct panfrost_pool bin_pool;
+      struct panfrost_pool desc_pool;
+   } blitter;
 
-        struct panfrost_vtable vtbl;
-        struct disk_cache *disk_cache;
+   struct panfrost_vtable vtbl;
+   struct disk_cache *disk_cache;
 };
 
 static inline struct panfrost_screen *
 pan_screen(struct pipe_screen *p)
 {
-        return (struct panfrost_screen *)p;
+   return (struct panfrost_screen *)p;
 }
 
 static inline struct panfrost_device *
 pan_device(struct pipe_screen *p)
 {
-        return &(pan_screen(p)->dev);
+   return &(pan_screen(p)->dev);
 }
 
-int
-panfrost_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
-                               struct pipe_driver_query_info *info);
+int panfrost_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
+                                   struct pipe_driver_query_info *info);
 
 void panfrost_cmdstream_screen_init_v4(struct panfrost_screen *screen);
 void panfrost_cmdstream_screen_init_v5(struct panfrost_screen *screen);
@@ -136,13 +133,13 @@ void panfrost_cmdstream_screen_init_v6(struct panfrost_screen *screen);
 void panfrost_cmdstream_screen_init_v7(struct panfrost_screen *screen);
 void panfrost_cmdstream_screen_init_v9(struct panfrost_screen *screen);
 
-#define perf_debug(dev, ...) \
-        do { \
-                if (unlikely((dev)->debug & PAN_DBG_PERF)) \
-                        mesa_logw(__VA_ARGS__); \
-        } while(0)
+#define perf_debug(dev, ...)                                                   \
+   do {                                                                        \
+      if (unlikely((dev)->debug & PAN_DBG_PERF))                               \
+         mesa_logw(__VA_ARGS__);                                               \
+   } while (0)
 
-#define perf_debug_ctx(ctx, ...) \
-        perf_debug(pan_device((ctx)->base.screen), __VA_ARGS__);
+#define perf_debug_ctx(ctx, ...)                                               \
+   perf_debug(pan_device((ctx)->base.screen), __VA_ARGS__);
 
 #endif /* PAN_SCREEN_H */
diff --git a/src/gallium/drivers/panfrost/pan_shader.c b/src/gallium/drivers/panfrost/pan_shader.c
index e77343a65ac..e3935651219 100644
--- a/src/gallium/drivers/panfrost/pan_shader.c
+++ b/src/gallium/drivers/panfrost/pan_shader.c
@@ -28,103 +28,96 @@
  *
  */
 
-#include "pan_context.h"
-#include "pan_bo.h"
 #include "pan_shader.h"
-#include "util/u_memory.h"
 #include "nir/tgsi_to_nir.h"
+#include "util/u_memory.h"
 #include "nir_serialize.h"
+#include "pan_bo.h"
+#include "pan_context.h"
 
 static struct panfrost_uncompiled_shader *
 panfrost_alloc_shader(const nir_shader *nir)
 {
-        struct panfrost_uncompiled_shader *so =
-                rzalloc(NULL, struct panfrost_uncompiled_shader);
+   struct panfrost_uncompiled_shader *so =
+      rzalloc(NULL, struct panfrost_uncompiled_shader);
 
-        simple_mtx_init(&so->lock, mtx_plain);
-        util_dynarray_init(&so->variants, so);
+   simple_mtx_init(&so->lock, mtx_plain);
+   util_dynarray_init(&so->variants, so);
 
-        so->nir = nir;
+   so->nir = nir;
 
-        /* Serialize the NIR to a binary blob that we can hash for the disk
-         * cache. Drop unnecessary information (like variable names) so the
-         * serialized NIR is smaller, and also to let us detect more isomorphic
-         * shaders when hashing, increasing cache hits.
-         */
-        struct blob blob;
-        blob_init(&blob);
-        nir_serialize(&blob, nir, true);
-        _mesa_sha1_compute(blob.data, blob.size, so->nir_sha1);
-        blob_finish(&blob);
+   /* Serialize the NIR to a binary blob that we can hash for the disk
+    * cache. Drop unnecessary information (like variable names) so the
+    * serialized NIR is smaller, and also to let us detect more isomorphic
+    * shaders when hashing, increasing cache hits.
+    */
+   struct blob blob;
+   blob_init(&blob);
+   nir_serialize(&blob, nir, true);
+   _mesa_sha1_compute(blob.data, blob.size, so->nir_sha1);
+   blob_finish(&blob);
 
-        return so;
+   return so;
 }
 
 static struct panfrost_compiled_shader *
 panfrost_alloc_variant(struct panfrost_uncompiled_shader *so)
 {
-        return util_dynarray_grow(&so->variants, struct panfrost_compiled_shader, 1);
+   return util_dynarray_grow(&so->variants, struct panfrost_compiled_shader, 1);
 }
 
 static void
-panfrost_shader_compile(struct panfrost_screen *screen,
-                        const nir_shader *ir,
+panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
                         struct util_debug_callback *dbg,
-                        struct panfrost_shader_key *key,
-                        unsigned req_local_mem,
+                        struct panfrost_shader_key *key, unsigned req_local_mem,
                         unsigned fixed_varying_mask,
                         struct panfrost_shader_binary *out)
 {
-        struct panfrost_device *dev = pan_device(&screen->base);
+   struct panfrost_device *dev = pan_device(&screen->base);
 
-        nir_shader *s = nir_shader_clone(NULL, ir);
+   nir_shader *s = nir_shader_clone(NULL, ir);
 
-        struct panfrost_compile_inputs inputs = {
-                .debug = dbg,
-                .gpu_id = dev->gpu_id,
-                .fixed_sysval_ubo = -1,
-        };
+   struct panfrost_compile_inputs inputs = {
+      .debug = dbg,
+      .gpu_id = dev->gpu_id,
+      .fixed_sysval_ubo = -1,
+   };
 
-        /* Lower this early so the backends don't have to worry about it */
-        if (s->info.stage == MESA_SHADER_FRAGMENT) {
-                inputs.fixed_varying_mask = key->fs.fixed_varying_mask;
+   /* Lower this early so the backends don't have to worry about it */
+   if (s->info.stage == MESA_SHADER_FRAGMENT) {
+      inputs.fixed_varying_mask = key->fs.fixed_varying_mask;
 
-                if (s->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
-                        NIR_PASS_V(s, nir_lower_fragcolor,
-                                   key->fs.nr_cbufs_for_fragcolor);
-                }
+      if (s->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
+         NIR_PASS_V(s, nir_lower_fragcolor, key->fs.nr_cbufs_for_fragcolor);
+      }
 
-                if (key->fs.sprite_coord_enable) {
-                        NIR_PASS_V(s, nir_lower_texcoord_replace,
-                                   key->fs.sprite_coord_enable,
-                                   true /* point coord is sysval */,
-                                   false /* Y-invert */);
-                }
+      if (key->fs.sprite_coord_enable) {
+         NIR_PASS_V(s, nir_lower_texcoord_replace, key->fs.sprite_coord_enable,
+                    true /* point coord is sysval */, false /* Y-invert */);
+      }
 
-                if (key->fs.clip_plane_enable) {
-                        NIR_PASS_V(s, nir_lower_clip_fs,
-                                   key->fs.clip_plane_enable,
-                                   false);
-                }
+      if (key->fs.clip_plane_enable) {
+         NIR_PASS_V(s, nir_lower_clip_fs, key->fs.clip_plane_enable, false);
+      }
 
-                memcpy(inputs.rt_formats, key->fs.rt_formats, sizeof(inputs.rt_formats));
-        } else if (s->info.stage == MESA_SHADER_VERTEX) {
-                inputs.fixed_varying_mask = fixed_varying_mask;
+      memcpy(inputs.rt_formats, key->fs.rt_formats, sizeof(inputs.rt_formats));
+   } else if (s->info.stage == MESA_SHADER_VERTEX) {
+      inputs.fixed_varying_mask = fixed_varying_mask;
 
-                /* No IDVS for internal XFB shaders */
-                inputs.no_idvs = s->info.has_transform_feedback_varyings;
-        }
+      /* No IDVS for internal XFB shaders */
+      inputs.no_idvs = s->info.has_transform_feedback_varyings;
+   }
 
-        util_dynarray_init(&out->binary, NULL);
-        screen->vtbl.compile_shader(s, &inputs, &out->binary, &out->info);
+   util_dynarray_init(&out->binary, NULL);
+   screen->vtbl.compile_shader(s, &inputs, &out->binary, &out->info);
 
-        assert(req_local_mem >= out->info.wls_size);
-        out->info.wls_size = req_local_mem;
+   assert(req_local_mem >= out->info.wls_size);
+   out->info.wls_size = req_local_mem;
 
-        /* In both clone and tgsi_to_nir paths, the shader is ralloc'd against
-         * a NULL context
-         */
-        ralloc_free(s);
+   /* In both clone and tgsi_to_nir paths, the shader is ralloc'd against
+    * a NULL context
+    */
+   ralloc_free(s);
 }
 
 static void
@@ -136,287 +129,288 @@ panfrost_shader_get(struct pipe_screen *pscreen,
                     struct panfrost_compiled_shader *state,
                     unsigned req_local_mem)
 {
-        struct panfrost_screen *screen = pan_screen(pscreen);
-        struct panfrost_device *dev = pan_device(pscreen);
+   struct panfrost_screen *screen = pan_screen(pscreen);
+   struct panfrost_device *dev = pan_device(pscreen);
 
-        struct panfrost_shader_binary res = { 0 };
+   struct panfrost_shader_binary res = {0};
 
-        /* Try to retrieve the variant from the disk cache. If that fails,
-         * compile a new variant and store in the disk cache for later reuse.
-         */
-        if (!panfrost_disk_cache_retrieve(screen->disk_cache, uncompiled, &state->key, &res)) {
-                panfrost_shader_compile(screen, uncompiled->nir, dbg, &state->key,
-                                        req_local_mem,
-                                        uncompiled->fixed_varying_mask, &res);
+   /* Try to retrieve the variant from the disk cache. If that fails,
+    * compile a new variant and store in the disk cache for later reuse.
+    */
+   if (!panfrost_disk_cache_retrieve(screen->disk_cache, uncompiled,
+                                     &state->key, &res)) {
+      panfrost_shader_compile(screen, uncompiled->nir, dbg, &state->key,
+                              req_local_mem, uncompiled->fixed_varying_mask,
+                              &res);
 
-                panfrost_disk_cache_store(screen->disk_cache, uncompiled, &state->key, &res);
-        }
+      panfrost_disk_cache_store(screen->disk_cache, uncompiled, &state->key,
+                                &res);
+   }
 
-        state->info = res.info;
+   state->info = res.info;
 
-        if (res.binary.size) {
-                state->bin = panfrost_pool_take_ref(shader_pool,
-                        pan_pool_upload_aligned(&shader_pool->base,
-                                res.binary.data, res.binary.size, 128));
-        }
+   if (res.binary.size) {
+      state->bin = panfrost_pool_take_ref(
+         shader_pool,
+         pan_pool_upload_aligned(&shader_pool->base, res.binary.data,
+                                 res.binary.size, 128));
+   }
 
-        util_dynarray_fini(&res.binary);
+   util_dynarray_fini(&res.binary);
 
-        /* Don't upload RSD for fragment shaders since they need draw-time
-         * merging for e.g. depth/stencil/alpha. RSDs are replaced by simpler
-         * shader program descriptors on Valhall, which can be preuploaded even
-         * for fragment shaders. */
-        bool upload = !(uncompiled->nir->info.stage == MESA_SHADER_FRAGMENT && dev->arch <= 7);
-        screen->vtbl.prepare_shader(state, desc_pool, upload);
+   /* Don't upload RSD for fragment shaders since they need draw-time
+    * merging for e.g. depth/stencil/alpha. RSDs are replaced by simpler
+    * shader program descriptors on Valhall, which can be preuploaded even
+    * for fragment shaders. */
+   bool upload =
+      !(uncompiled->nir->info.stage == MESA_SHADER_FRAGMENT && dev->arch <= 7);
+   screen->vtbl.prepare_shader(state, desc_pool, upload);
 
-        panfrost_analyze_sysvals(state);
+   panfrost_analyze_sysvals(state);
 }
 
 static void
 panfrost_build_key(struct panfrost_context *ctx,
-                   struct panfrost_shader_key *key,
-                   const nir_shader *nir)
+                   struct panfrost_shader_key *key, const nir_shader *nir)
 {
-        /* We don't currently have vertex shader variants */
-        if (nir->info.stage != MESA_SHADER_FRAGMENT)
-               return;
+   /* We don't currently have vertex shader variants */
+   if (nir->info.stage != MESA_SHADER_FRAGMENT)
+      return;
 
-        struct panfrost_device *dev = pan_device(ctx->base.screen);
-        struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;
-        struct pipe_rasterizer_state *rast = (void *) ctx->rasterizer;
-        struct panfrost_uncompiled_shader *vs = ctx->uncompiled[MESA_SHADER_VERTEX];
+   struct panfrost_device *dev = pan_device(ctx->base.screen);
+   struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;
+   struct pipe_rasterizer_state *rast = (void *)ctx->rasterizer;
+   struct panfrost_uncompiled_shader *vs = ctx->uncompiled[MESA_SHADER_VERTEX];
 
-        /* gl_FragColor lowering needs the number of colour buffers */
-        if (nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
-                key->fs.nr_cbufs_for_fragcolor = fb->nr_cbufs;
-        }
+   /* gl_FragColor lowering needs the number of colour buffers */
+   if (nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
+      key->fs.nr_cbufs_for_fragcolor = fb->nr_cbufs;
+   }
 
-        /* Point sprite lowering needed on Bifrost and newer */
-        if (dev->arch >= 6 && rast && ctx->active_prim == PIPE_PRIM_POINTS) {
-                key->fs.sprite_coord_enable = rast->sprite_coord_enable;
-        }
+   /* Point sprite lowering needed on Bifrost and newer */
+   if (dev->arch >= 6 && rast && ctx->active_prim == PIPE_PRIM_POINTS) {
+      key->fs.sprite_coord_enable = rast->sprite_coord_enable;
+   }
 
-        /* User clip plane lowering needed everywhere */
-        if (rast) {
-                key->fs.clip_plane_enable = rast->clip_plane_enable;
-        }
+   /* User clip plane lowering needed everywhere */
+   if (rast) {
+      key->fs.clip_plane_enable = rast->clip_plane_enable;
+   }
 
-        if (dev->arch <= 5) {
-                u_foreach_bit(i, (nir->info.outputs_read >> FRAG_RESULT_DATA0)) {
-                        enum pipe_format fmt = PIPE_FORMAT_R8G8B8A8_UNORM;
+   if (dev->arch <= 5) {
+      u_foreach_bit(i, (nir->info.outputs_read >> FRAG_RESULT_DATA0)) {
+         enum pipe_format fmt = PIPE_FORMAT_R8G8B8A8_UNORM;
 
-                        if ((fb->nr_cbufs > i) && fb->cbufs[i])
-                                fmt = fb->cbufs[i]->format;
+         if ((fb->nr_cbufs > i) && fb->cbufs[i])
+            fmt = fb->cbufs[i]->format;
 
-                        if (panfrost_blendable_formats_v6[fmt].internal)
-                                fmt = PIPE_FORMAT_NONE;
+         if (panfrost_blendable_formats_v6[fmt].internal)
+            fmt = PIPE_FORMAT_NONE;
 
-                        key->fs.rt_formats[i] = fmt;
-                }
-        }
+         key->fs.rt_formats[i] = fmt;
+      }
+   }
 
-        /* Funny desktop GL varying lowering on Valhall */
-        if (dev->arch >= 9) {
-                assert(vs != NULL && "too early");
-                key->fs.fixed_varying_mask = vs->fixed_varying_mask;
-        }
+   /* Funny desktop GL varying lowering on Valhall */
+   if (dev->arch >= 9) {
+      assert(vs != NULL && "too early");
+      key->fs.fixed_varying_mask = vs->fixed_varying_mask;
+   }
 }
 
 static struct panfrost_compiled_shader *
-panfrost_new_variant_locked(
-        struct panfrost_context *ctx,
-        struct panfrost_uncompiled_shader *uncompiled,
-        struct panfrost_shader_key *key)
+panfrost_new_variant_locked(struct panfrost_context *ctx,
+                            struct panfrost_uncompiled_shader *uncompiled,
+                            struct panfrost_shader_key *key)
 {
-        struct panfrost_compiled_shader *prog = panfrost_alloc_variant(uncompiled);
+   struct panfrost_compiled_shader *prog = panfrost_alloc_variant(uncompiled);
 
-        *prog = (struct panfrost_compiled_shader) {
-                .key = *key,
-                .stream_output = uncompiled->stream_output,
-        };
+   *prog = (struct panfrost_compiled_shader){
+      .key = *key,
+      .stream_output = uncompiled->stream_output,
+   };
 
-        panfrost_shader_get(ctx->base.screen, &ctx->shaders, &ctx->descs,
-                            uncompiled, &ctx->base.debug, prog, 0);
+   panfrost_shader_get(ctx->base.screen, &ctx->shaders, &ctx->descs, uncompiled,
+                       &ctx->base.debug, prog, 0);
 
-        prog->earlyzs = pan_earlyzs_analyze(&prog->info);
+   prog->earlyzs = pan_earlyzs_analyze(&prog->info);
 
-        return prog;
+   return prog;
 }
 
 static void
-panfrost_bind_shader_state(
-        struct pipe_context *pctx,
-        void *hwcso,
-        enum pipe_shader_type type)
+panfrost_bind_shader_state(struct pipe_context *pctx, void *hwcso,
+                           enum pipe_shader_type type)
 {
-        struct panfrost_context *ctx = pan_context(pctx);
-        ctx->uncompiled[type] = hwcso;
-        ctx->prog[type] = NULL;
+   struct panfrost_context *ctx = pan_context(pctx);
+   ctx->uncompiled[type] = hwcso;
+   ctx->prog[type] = NULL;
 
-        ctx->dirty |= PAN_DIRTY_TLS_SIZE;
-        ctx->dirty_shader[type] |= PAN_DIRTY_STAGE_SHADER;
+   ctx->dirty |= PAN_DIRTY_TLS_SIZE;
+   ctx->dirty_shader[type] |= PAN_DIRTY_STAGE_SHADER;
 
-        if (hwcso)
-                panfrost_update_shader_variant(ctx, type);
+   if (hwcso)
+      panfrost_update_shader_variant(ctx, type);
 }
 
 void
 panfrost_update_shader_variant(struct panfrost_context *ctx,
                                enum pipe_shader_type type)
 {
-        /* No shader variants for compute */
-        if (type == PIPE_SHADER_COMPUTE)
-                return;
+   /* No shader variants for compute */
+   if (type == PIPE_SHADER_COMPUTE)
+      return;
 
-        /* We need linking information, defer this */
-        if (type == PIPE_SHADER_FRAGMENT && !ctx->uncompiled[PIPE_SHADER_VERTEX])
-                return;
+   /* We need linking information, defer this */
+   if (type == PIPE_SHADER_FRAGMENT && !ctx->uncompiled[PIPE_SHADER_VERTEX])
+      return;
 
-        /* Also defer, happens with GALLIUM_HUD */
-        if (!ctx->uncompiled[type])
-                return;
+   /* Also defer, happens with GALLIUM_HUD */
+   if (!ctx->uncompiled[type])
+      return;
 
-        /* Match the appropriate variant */
-        struct panfrost_uncompiled_shader *uncompiled = ctx->uncompiled[type];
-        struct panfrost_compiled_shader *compiled = NULL;
+   /* Match the appropriate variant */
+   struct panfrost_uncompiled_shader *uncompiled = ctx->uncompiled[type];
+   struct panfrost_compiled_shader *compiled = NULL;
 
-        simple_mtx_lock(&uncompiled->lock);
+   simple_mtx_lock(&uncompiled->lock);
 
-        struct panfrost_shader_key key = { 0 };
-        panfrost_build_key(ctx, &key, uncompiled->nir);
+   struct panfrost_shader_key key = {0};
+   panfrost_build_key(ctx, &key, uncompiled->nir);
 
-        util_dynarray_foreach(&uncompiled->variants, struct panfrost_compiled_shader, so) {
-                if (memcmp(&key, &so->key, sizeof(key)) == 0) {
-                        compiled = so;
-                        break;
-                }
-        }
+   util_dynarray_foreach(&uncompiled->variants, struct panfrost_compiled_shader,
+                         so) {
+      if (memcmp(&key, &so->key, sizeof(key)) == 0) {
+         compiled = so;
+         break;
+      }
+   }
 
-        if (compiled == NULL)
-                compiled = panfrost_new_variant_locked(ctx, uncompiled, &key);
+   if (compiled == NULL)
+      compiled = panfrost_new_variant_locked(ctx, uncompiled, &key);
 
-        ctx->prog[type] = compiled;
+   ctx->prog[type] = compiled;
 
-        /* TODO: it would be more efficient to release the lock before
-         * compiling instead of after, but that can race if thread A compiles a
-         * variant while thread B searches for that same variant */
-        simple_mtx_unlock(&uncompiled->lock);
+   /* TODO: it would be more efficient to release the lock before
+    * compiling instead of after, but that can race if thread A compiles a
+    * variant while thread B searches for that same variant */
+   simple_mtx_unlock(&uncompiled->lock);
 }
 
 static void
 panfrost_bind_vs_state(struct pipe_context *pctx, void *hwcso)
 {
-        panfrost_bind_shader_state(pctx, hwcso, PIPE_SHADER_VERTEX);
+   panfrost_bind_shader_state(pctx, hwcso, PIPE_SHADER_VERTEX);
 
-        /* Fragment shaders are linked with vertex shaders */
-        struct panfrost_context *ctx = pan_context(pctx);
-        panfrost_update_shader_variant(ctx, PIPE_SHADER_FRAGMENT);
+   /* Fragment shaders are linked with vertex shaders */
+   struct panfrost_context *ctx = pan_context(pctx);
+   panfrost_update_shader_variant(ctx, PIPE_SHADER_FRAGMENT);
 }
 
 static void
 panfrost_bind_fs_state(struct pipe_context *pctx, void *hwcso)
 {
-        panfrost_bind_shader_state(pctx, hwcso, PIPE_SHADER_FRAGMENT);
+   panfrost_bind_shader_state(pctx, hwcso, PIPE_SHADER_FRAGMENT);
 }
 
 static void *
-panfrost_create_shader_state(
-        struct pipe_context *pctx,
-        const struct pipe_shader_state *cso)
+panfrost_create_shader_state(struct pipe_context *pctx,
+                             const struct pipe_shader_state *cso)
 {
-        nir_shader *nir = (cso->type == PIPE_SHADER_IR_TGSI) ?
-                          tgsi_to_nir(cso->tokens, pctx->screen, false) :
-                          cso->ir.nir;
+   nir_shader *nir = (cso->type == PIPE_SHADER_IR_TGSI)
+                        ? tgsi_to_nir(cso->tokens, pctx->screen, false)
+                        : cso->ir.nir;
 
-        struct panfrost_uncompiled_shader *so = panfrost_alloc_shader(nir);
+   struct panfrost_uncompiled_shader *so = panfrost_alloc_shader(nir);
 
-        /* The driver gets ownership of the nir_shader for graphics. The NIR is
-         * ralloc'd. Free the NIR when we free the uncompiled shader.
-         */
-        ralloc_steal(so, nir);
+   /* The driver gets ownership of the nir_shader for graphics. The NIR is
+    * ralloc'd. Free the NIR when we free the uncompiled shader.
+    */
+   ralloc_steal(so, nir);
 
-        so->stream_output = cso->stream_output;
-        so->nir = nir;
+   so->stream_output = cso->stream_output;
+   so->nir = nir;
 
-        /* Fix linkage early */
-        if (so->nir->info.stage == MESA_SHADER_VERTEX) {
-                so->fixed_varying_mask =
-                        (so->nir->info.outputs_written & BITFIELD_MASK(VARYING_SLOT_VAR0)) &
-                        ~VARYING_BIT_POS & ~VARYING_BIT_PSIZ;
-        }
+   /* Fix linkage early */
+   if (so->nir->info.stage == MESA_SHADER_VERTEX) {
+      so->fixed_varying_mask =
+         (so->nir->info.outputs_written & BITFIELD_MASK(VARYING_SLOT_VAR0)) &
+         ~VARYING_BIT_POS & ~VARYING_BIT_PSIZ;
+   }
 
-        /* If this shader uses transform feedback, compile the transform
-         * feedback program. This is a special shader variant.
-         */
-        struct panfrost_context *ctx = pan_context(pctx);
+   /* If this shader uses transform feedback, compile the transform
+    * feedback program. This is a special shader variant.
+    */
+   struct panfrost_context *ctx = pan_context(pctx);
 
-        if (so->nir->xfb_info) {
-                nir_shader *xfb = nir_shader_clone(NULL, so->nir);
-                xfb->info.name = ralloc_asprintf(xfb, "%s@xfb", xfb->info.name);
-                xfb->info.internal = true;
+   if (so->nir->xfb_info) {
+      nir_shader *xfb = nir_shader_clone(NULL, so->nir);
+      xfb->info.name = ralloc_asprintf(xfb, "%s@xfb", xfb->info.name);
+      xfb->info.internal = true;
 
-                so->xfb = calloc(1, sizeof(struct panfrost_compiled_shader));
-                so->xfb->key.vs_is_xfb = true;
+      so->xfb = calloc(1, sizeof(struct panfrost_compiled_shader));
+      so->xfb->key.vs_is_xfb = true;
 
-                panfrost_shader_get(ctx->base.screen, &ctx->shaders, &ctx->descs,
-                                    so, &ctx->base.debug, so->xfb, 0);
+      panfrost_shader_get(ctx->base.screen, &ctx->shaders, &ctx->descs, so,
+                          &ctx->base.debug, so->xfb, 0);
 
-                /* Since transform feedback is handled via the transform
-                 * feedback program, the original program no longer uses XFB
-                 */
-                nir->info.has_transform_feedback_varyings = false;
-        }
+      /* Since transform feedback is handled via the transform
+       * feedback program, the original program no longer uses XFB
+       */
+      nir->info.has_transform_feedback_varyings = false;
+   }
 
-        /* Compile the program. We don't use vertex shader keys, so there will
-         * be no further vertex shader variants. We do have fragment shader
-         * keys, but we can still compile with a default key that will work most
-         * of the time.
-         */
-        struct panfrost_shader_key key = { 0 };
+   /* Compile the program. We don't use vertex shader keys, so there will
+    * be no further vertex shader variants. We do have fragment shader
+    * keys, but we can still compile with a default key that will work most
+    * of the time.
+    */
+   struct panfrost_shader_key key = {0};
 
-        /* gl_FragColor lowering needs the number of colour buffers on desktop
-         * GL, where it acts as an implicit broadcast to all colour buffers.
-         *
-         * However, gl_FragColor is a legacy feature, so assume that if
-         * gl_FragColor is used, there is only a single render target. The
-         * implicit broadcast is neither especially useful nor required by GLES.
-         */
-        if (so->nir->info.stage == MESA_SHADER_FRAGMENT &&
-            so->nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
+   /* gl_FragColor lowering needs the number of colour buffers on desktop
+    * GL, where it acts as an implicit broadcast to all colour buffers.
+    *
+    * However, gl_FragColor is a legacy feature, so assume that if
+    * gl_FragColor is used, there is only a single render target. The
+    * implicit broadcast is neither especially useful nor required by GLES.
+    */
+   if (so->nir->info.stage == MESA_SHADER_FRAGMENT &&
+       so->nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
 
-                key.fs.nr_cbufs_for_fragcolor = 1;
-        }
+      key.fs.nr_cbufs_for_fragcolor = 1;
+   }
 
-        /* Creating a CSO is single-threaded, so it's ok to use the
-         * locked function without explicitly taking the lock. Creating a
-         * default variant acts as a precompile.
-         */
-        panfrost_new_variant_locked(ctx, so, &key);
+   /* Creating a CSO is single-threaded, so it's ok to use the
+    * locked function without explicitly taking the lock. Creating a
+    * default variant acts as a precompile.
+    */
+   panfrost_new_variant_locked(ctx, so, &key);
 
-        return so;
+   return so;
 }
 
 static void
 panfrost_delete_shader_state(struct pipe_context *pctx, void *so)
 {
-        struct panfrost_uncompiled_shader *cso = (struct panfrost_uncompiled_shader *) so;
+   struct panfrost_uncompiled_shader *cso =
+      (struct panfrost_uncompiled_shader *)so;
 
-        util_dynarray_foreach(&cso->variants, struct panfrost_compiled_shader, so) {
-                panfrost_bo_unreference(so->bin.bo);
-                panfrost_bo_unreference(so->state.bo);
-                panfrost_bo_unreference(so->linkage.bo);
-        }
+   util_dynarray_foreach(&cso->variants, struct panfrost_compiled_shader, so) {
+      panfrost_bo_unreference(so->bin.bo);
+      panfrost_bo_unreference(so->state.bo);
+      panfrost_bo_unreference(so->linkage.bo);
+   }
 
-        if (cso->xfb) {
-                panfrost_bo_unreference(cso->xfb->bin.bo);
-                panfrost_bo_unreference(cso->xfb->state.bo);
-                panfrost_bo_unreference(cso->xfb->linkage.bo);
-                free(cso->xfb);
-        }
+   if (cso->xfb) {
+      panfrost_bo_unreference(cso->xfb->bin.bo);
+      panfrost_bo_unreference(cso->xfb->state.bo);
+      panfrost_bo_unreference(cso->xfb->linkage.bo);
+      free(cso->xfb);
+   }
 
-        simple_mtx_destroy(&cso->lock);
+   simple_mtx_destroy(&cso->lock);
 
-        ralloc_free(so);
+   ralloc_free(so);
 }
 
 /*
@@ -424,52 +418,51 @@ panfrost_delete_shader_state(struct pipe_context *pctx, void *so)
  * precompiled, creating both the uncompiled and compiled shaders now.
  */
 static void *
-panfrost_create_compute_state(
-        struct pipe_context *pctx,
-        const struct pipe_compute_state *cso)
+panfrost_create_compute_state(struct pipe_context *pctx,
+                              const struct pipe_compute_state *cso)
 {
-        struct panfrost_context *ctx = pan_context(pctx);
-        struct panfrost_uncompiled_shader *so = panfrost_alloc_shader(cso->prog);
-        struct panfrost_compiled_shader *v = panfrost_alloc_variant(so);
-        memset(v, 0, sizeof *v);
+   struct panfrost_context *ctx = pan_context(pctx);
+   struct panfrost_uncompiled_shader *so = panfrost_alloc_shader(cso->prog);
+   struct panfrost_compiled_shader *v = panfrost_alloc_variant(so);
+   memset(v, 0, sizeof *v);
 
-        assert(cso->ir_type == PIPE_SHADER_IR_NIR && "TGSI kernels unsupported");
+   assert(cso->ir_type == PIPE_SHADER_IR_NIR && "TGSI kernels unsupported");
 
-        panfrost_shader_get(pctx->screen, &ctx->shaders, &ctx->descs,
-                            so, &ctx->base.debug, v, cso->static_shared_mem);
+   panfrost_shader_get(pctx->screen, &ctx->shaders, &ctx->descs, so,
+                       &ctx->base.debug, v, cso->static_shared_mem);
 
-        /* The NIR becomes invalid after this. For compute kernels, we never
-         * need to access it again. Don't keep a dangling pointer around.
-         */
-        so->nir = NULL;
+   /* The NIR becomes invalid after this. For compute kernels, we never
+    * need to access it again. Don't keep a dangling pointer around.
+    */
+   so->nir = NULL;
 
-        return so;
+   return so;
 }
 
 static void
 panfrost_bind_compute_state(struct pipe_context *pipe, void *cso)
 {
-        struct panfrost_context *ctx = pan_context(pipe);
-        struct panfrost_uncompiled_shader *uncompiled = cso;
+   struct panfrost_context *ctx = pan_context(pipe);
+   struct panfrost_uncompiled_shader *uncompiled = cso;
 
-        ctx->uncompiled[PIPE_SHADER_COMPUTE] = uncompiled;
+   ctx->uncompiled[PIPE_SHADER_COMPUTE] = uncompiled;
 
-        ctx->prog[PIPE_SHADER_COMPUTE] =
-                uncompiled ? util_dynarray_begin(&uncompiled->variants) : NULL;
+   ctx->prog[PIPE_SHADER_COMPUTE] =
+      uncompiled ? util_dynarray_begin(&uncompiled->variants) : NULL;
 }
 
 void
 panfrost_shader_context_init(struct pipe_context *pctx)
 {
-        pctx->create_vs_state = panfrost_create_shader_state;
-        pctx->delete_vs_state = panfrost_delete_shader_state;
-        pctx->bind_vs_state = panfrost_bind_vs_state;
+   pctx->create_vs_state = panfrost_create_shader_state;
+   pctx->delete_vs_state = panfrost_delete_shader_state;
+   pctx->bind_vs_state = panfrost_bind_vs_state;
 
-        pctx->create_fs_state = panfrost_create_shader_state;
-        pctx->delete_fs_state = panfrost_delete_shader_state;
-        pctx->bind_fs_state = panfrost_bind_fs_state;
+   pctx->create_fs_state = panfrost_create_shader_state;
+   pctx->delete_fs_state = panfrost_delete_shader_state;
+   pctx->bind_fs_state = panfrost_bind_fs_state;
 
-        pctx->create_compute_state = panfrost_create_compute_state;
-        pctx->bind_compute_state = panfrost_bind_compute_state;
-        pctx->delete_compute_state = panfrost_delete_shader_state;
+   pctx->create_compute_state = panfrost_create_compute_state;
+   pctx->bind_compute_state = panfrost_bind_compute_state;
+   pctx->delete_compute_state = panfrost_delete_shader_state;
 }
diff --git a/src/panfrost/bifrost/bi_helper_invocations.c b/src/panfrost/bifrost/bi_helper_invocations.c
index f5207d9afa8..f266cbf172f 100644
--- a/src/panfrost/bifrost/bi_helper_invocations.c
+++ b/src/panfrost/bifrost/bi_helper_invocations.c
@@ -64,20 +64,20 @@
 static bool
 bi_has_skip_bit(enum bi_opcode op)
 {
-        switch (op) {
-        case BI_OPCODE_TEX_SINGLE:
-        case BI_OPCODE_TEXC:
-        case BI_OPCODE_TEXC_DUAL:
-        case BI_OPCODE_TEXS_2D_F16:
-        case BI_OPCODE_TEXS_2D_F32:
-        case BI_OPCODE_TEXS_CUBE_F16:
-        case BI_OPCODE_TEXS_CUBE_F32:
-        case BI_OPCODE_VAR_TEX_F16:
-        case BI_OPCODE_VAR_TEX_F32:
-                return true;
-        default:
-                return false;
-        }
+   switch (op) {
+   case BI_OPCODE_TEX_SINGLE:
+   case BI_OPCODE_TEXC:
+   case BI_OPCODE_TEXC_DUAL:
+   case BI_OPCODE_TEXS_2D_F16:
+   case BI_OPCODE_TEXS_2D_F32:
+   case BI_OPCODE_TEXS_CUBE_F16:
+   case BI_OPCODE_TEXS_CUBE_F32:
+   case BI_OPCODE_VAR_TEX_F16:
+   case BI_OPCODE_VAR_TEX_F32:
+      return true;
+   default:
+      return false;
+   }
 }
 
 /* Does a given instruction require helper threads to be active (because it
@@ -87,52 +87,52 @@ bi_has_skip_bit(enum bi_opcode op)
 bool
 bi_instr_uses_helpers(bi_instr *I)
 {
-        switch (I->op) {
-        case BI_OPCODE_TEXC:
-        case BI_OPCODE_TEXC_DUAL:
-        case BI_OPCODE_TEXS_2D_F16:
-        case BI_OPCODE_TEXS_2D_F32:
-        case BI_OPCODE_TEXS_CUBE_F16:
-        case BI_OPCODE_TEXS_CUBE_F32:
-        case BI_OPCODE_VAR_TEX_F16:
-        case BI_OPCODE_VAR_TEX_F32:
-                return !I->lod_mode; /* set for zero, clear for computed */
-        case BI_OPCODE_TEX_SINGLE:
-                return (I->va_lod_mode == BI_VA_LOD_MODE_COMPUTED_LOD) ||
-                       (I->va_lod_mode == BI_VA_LOD_MODE_COMPUTED_BIAS);
-        case BI_OPCODE_CLPER_I32:
-        case BI_OPCODE_CLPER_OLD_I32:
-                /* Fragment shaders require helpers to implement derivatives.
-                 * Other shader stages don't have helpers at all */
-                return true;
-        default:
-                return false;
-        }
+   switch (I->op) {
+   case BI_OPCODE_TEXC:
+   case BI_OPCODE_TEXC_DUAL:
+   case BI_OPCODE_TEXS_2D_F16:
+   case BI_OPCODE_TEXS_2D_F32:
+   case BI_OPCODE_TEXS_CUBE_F16:
+   case BI_OPCODE_TEXS_CUBE_F32:
+   case BI_OPCODE_VAR_TEX_F16:
+   case BI_OPCODE_VAR_TEX_F32:
+      return !I->lod_mode; /* set for zero, clear for computed */
+   case BI_OPCODE_TEX_SINGLE:
+      return (I->va_lod_mode == BI_VA_LOD_MODE_COMPUTED_LOD) ||
+             (I->va_lod_mode == BI_VA_LOD_MODE_COMPUTED_BIAS);
+   case BI_OPCODE_CLPER_I32:
+   case BI_OPCODE_CLPER_OLD_I32:
+      /* Fragment shaders require helpers to implement derivatives.
+       * Other shader stages don't have helpers at all */
+      return true;
+   default:
+      return false;
+   }
 }
 
 /* Does a block use helpers directly */
 static bool
 bi_block_uses_helpers(bi_block *block)
 {
-        bi_foreach_instr_in_block(block, I) {
-                if (bi_instr_uses_helpers(I))
-                        return true;
-        }
+   bi_foreach_instr_in_block(block, I) {
+      if (bi_instr_uses_helpers(I))
+         return true;
+   }
 
-        return false;
+   return false;
 }
 
 bool
 bi_block_terminates_helpers(bi_block *block)
 {
-        /* Can't terminate if a successor needs helpers */
-        bi_foreach_successor(block, succ) {
-                if (succ->pass_flags & 1)
-                        return false;
-        }
+   /* Can't terminate if a successor needs helpers */
+   bi_foreach_successor(block, succ) {
+      if (succ->pass_flags & 1)
+         return false;
+   }
 
-        /* Otherwise we terminate */
-        return true;
+   /* Otherwise we terminate */
+   return true;
 }
 
 /*
@@ -142,128 +142,130 @@ bi_block_terminates_helpers(bi_block *block)
 static void
 bi_propagate_pass_flag(bi_block *block)
 {
-        block->pass_flags = 1;
+   block->pass_flags = 1;
 
-        bi_foreach_predecessor(block, pred) {
-                if ((*pred)->pass_flags == 0)
-                        bi_propagate_pass_flag(*pred);
-        }
+   bi_foreach_predecessor(block, pred) {
+      if ((*pred)->pass_flags == 0)
+         bi_propagate_pass_flag(*pred);
+   }
 }
 
 void
 bi_analyze_helper_terminate(bi_context *ctx)
 {
-        /* Other shader stages do not have a notion of helper threads, so we
-         * can skip the analysis. Don't run for blend shaders, either, since
-         * they run in the context of another shader that we don't see. */
-        if (ctx->stage != MESA_SHADER_FRAGMENT || ctx->inputs->is_blend)
-                return;
+   /* Other shader stages do not have a notion of helper threads, so we
+    * can skip the analysis. Don't run for blend shaders, either, since
+    * they run in the context of another shader that we don't see. */
+   if (ctx->stage != MESA_SHADER_FRAGMENT || ctx->inputs->is_blend)
+      return;
 
-        /* Clear flags */
-        bi_foreach_block(ctx, block)
-                block->pass_flags = 0;
+   /* Clear flags */
+   bi_foreach_block(ctx, block)
+      block->pass_flags = 0;
 
-        /* For each block, check if it uses helpers and propagate that fact if
-         * so. We walk in reverse order to minimize the number of blocks tested:
-         * if the (unique) last block uses helpers, only that block is tested.
-         */
-        bi_foreach_block_rev(ctx, block) {
-                if (block->pass_flags == 0 && bi_block_uses_helpers(block))
-                        bi_propagate_pass_flag(block);
-        }
+   /* For each block, check if it uses helpers and propagate that fact if
+    * so. We walk in reverse order to minimize the number of blocks tested:
+    * if the (unique) last block uses helpers, only that block is tested.
+    */
+   bi_foreach_block_rev(ctx, block) {
+      if (block->pass_flags == 0 && bi_block_uses_helpers(block))
+         bi_propagate_pass_flag(block);
+   }
 }
 
 void
 bi_mark_clauses_td(bi_context *ctx)
 {
-        if (ctx->stage != MESA_SHADER_FRAGMENT || ctx->inputs->is_blend)
-                return;
+   if (ctx->stage != MESA_SHADER_FRAGMENT || ctx->inputs->is_blend)
+      return;
 
-        /* Finally, mark clauses requiring helpers */
-        bi_foreach_block(ctx, block) {
-                /* At the end, there are helpers iff we don't terminate */
-                bool helpers = !bi_block_terminates_helpers(block);
+   /* Finally, mark clauses requiring helpers */
+   bi_foreach_block(ctx, block) {
+      /* At the end, there are helpers iff we don't terminate */
+      bool helpers = !bi_block_terminates_helpers(block);
 
-                bi_foreach_clause_in_block_rev(block, clause) {
-                        bi_foreach_instr_in_clause_rev(block, clause, I) {
-                                helpers |= bi_instr_uses_helpers(I);
-                        }
+      bi_foreach_clause_in_block_rev(block, clause) {
+         bi_foreach_instr_in_clause_rev(block, clause, I) {
+            helpers |= bi_instr_uses_helpers(I);
+         }
 
-                        clause->td = !helpers;
-                }
-        }
+         clause->td = !helpers;
+      }
+   }
 }
 
 static bool
 bi_helper_block_update(BITSET_WORD *deps, bi_block *block)
 {
-        bool progress = false;
+   bool progress = false;
 
-        bi_foreach_instr_in_block_rev(block, I) {
-                /* If a destination is required by helper invocation... */
-                bi_foreach_dest(I, d) {
-                        if (!BITSET_TEST(deps, I->dest[d].value))
-                                continue;
+   bi_foreach_instr_in_block_rev(block, I) {
+      /* If a destination is required by helper invocation... */
+      bi_foreach_dest(I, d) {
+         if (!BITSET_TEST(deps, I->dest[d].value))
+            continue;
 
-                        /* ...so are the sources */
-                        bi_foreach_ssa_src(I, s) {
-                                progress |= !BITSET_TEST(deps, I->src[s].value);
-                                BITSET_SET(deps, I->src[s].value);
-                        }
+         /* ...so are the sources */
+         bi_foreach_ssa_src(I, s) {
+            progress |= !BITSET_TEST(deps, I->src[s].value);
+            BITSET_SET(deps, I->src[s].value);
+         }
 
-                        break;
-                }
-        }
+         break;
+      }
+   }
 
-        return progress;
+   return progress;
 }
 
 void
 bi_analyze_helper_requirements(bi_context *ctx)
 {
-        BITSET_WORD *deps = calloc(sizeof(BITSET_WORD), ctx->ssa_alloc);
+   BITSET_WORD *deps = calloc(sizeof(BITSET_WORD), ctx->ssa_alloc);
 
-        /* Initialize with the sources of instructions consuming
-         * derivatives */
+   /* Initialize with the sources of instructions consuming
+    * derivatives */
 
-        bi_foreach_instr_global(ctx, I) {
-                if (!bi_instr_uses_helpers(I)) continue;
+   bi_foreach_instr_global(ctx, I) {
+      if (!bi_instr_uses_helpers(I))
+         continue;
 
-                bi_foreach_ssa_src(I, s)
-                        BITSET_SET(deps, I->src[s].value);
-        }
+      bi_foreach_ssa_src(I, s)
+         BITSET_SET(deps, I->src[s].value);
+   }
 
-        /* Propagate that up */
-        u_worklist worklist;
-        bi_worklist_init(ctx, &worklist);
+   /* Propagate that up */
+   u_worklist worklist;
+   bi_worklist_init(ctx, &worklist);
 
-        bi_foreach_block(ctx, block) {
-                bi_worklist_push_tail(&worklist, block);
-        }
+   bi_foreach_block(ctx, block) {
+      bi_worklist_push_tail(&worklist, block);
+   }
 
-        while (!u_worklist_is_empty(&worklist)) {
-                bi_block *blk = bi_worklist_pop_tail(&worklist);
+   while (!u_worklist_is_empty(&worklist)) {
+      bi_block *blk = bi_worklist_pop_tail(&worklist);
 
-                if (bi_helper_block_update(deps, blk)) {
-                        bi_foreach_predecessor(blk, pred)
-                                bi_worklist_push_head(&worklist, *pred);
-                }
-        }
+      if (bi_helper_block_update(deps, blk)) {
+         bi_foreach_predecessor(blk, pred)
+            bi_worklist_push_head(&worklist, *pred);
+      }
+   }
 
-        u_worklist_fini(&worklist);
+   u_worklist_fini(&worklist);
 
-        /* Set the execute bits */
+   /* Set the execute bits */
 
-        bi_foreach_instr_global(ctx, I) {
-                if (!bi_has_skip_bit(I->op)) continue;
+   bi_foreach_instr_global(ctx, I) {
+      if (!bi_has_skip_bit(I->op))
+         continue;
 
-                bool exec = false;
+      bool exec = false;
 
-                bi_foreach_dest(I, d)
-                        exec |= BITSET_TEST(deps, I->dest[d].value);
+      bi_foreach_dest(I, d)
+         exec |= BITSET_TEST(deps, I->dest[d].value);
 
-                I->skip = !exec;
-        }
+      I->skip = !exec;
+   }
 
-        free(deps);
+   free(deps);
 }
diff --git a/src/panfrost/bifrost/bi_layout.c b/src/panfrost/bifrost/bi_layout.c
index 7c034cb31be..e90a3603d8d 100644
--- a/src/panfrost/bifrost/bi_layout.c
+++ b/src/panfrost/bifrost/bi_layout.c
@@ -37,10 +37,8 @@
 bool
 bi_ec0_packed(unsigned tuple_count)
 {
-        return (tuple_count == 3) ||
-                (tuple_count == 5) ||
-                (tuple_count == 6) ||
-                (tuple_count == 8);
+   return (tuple_count == 3) || (tuple_count == 5) || (tuple_count == 6) ||
+          (tuple_count == 8);
 }
 
 /* Helper to calculate the number of quadwords in a clause. This is a function
@@ -60,7 +58,7 @@ bi_ec0_packed(unsigned tuple_count)
  *   6 | 5*
  *   7 | 5
  *   8 | 6*
- * 
+ *
  * Y = { X      if X <= 3
  *     { X - 1  if 4 <= X <= 6
  *     { X - 2  if 7 <= X <= 8
@@ -72,15 +70,15 @@ bi_ec0_packed(unsigned tuple_count)
 static unsigned
 bi_clause_quadwords(bi_clause *clause)
 {
-        unsigned X = clause->tuple_count;
-        unsigned Y = X - ((X >= 7) ? 2 : (X >= 4) ? 1 : 0);
+   unsigned X = clause->tuple_count;
+   unsigned Y = X - ((X >= 7) ? 2 : (X >= 4) ? 1 : 0);
 
-        unsigned constants = clause->constant_count;
+   unsigned constants = clause->constant_count;
 
-        if ((X != 4) && (X != 7) && (X >= 3) && constants)
-                constants--;
+   if ((X != 4) && (X != 7) && (X >= 3) && constants)
+      constants--;
 
-        return Y + DIV_ROUND_UP(constants, 2);
+   return Y + DIV_ROUND_UP(constants, 2);
 }
 
 /* Measures the number of quadwords a branch jumps. Bifrost relative offsets
@@ -90,62 +88,62 @@ bi_clause_quadwords(bi_clause *clause)
 signed
 bi_block_offset(bi_context *ctx, bi_clause *start, bi_block *target)
 {
-        /* Signed since we might jump backwards */
-        signed ret = 0;
+   /* Signed since we might jump backwards */
+   signed ret = 0;
 
-        /* Determine if the block we're branching to is strictly greater in
-         * source order */
-        bool forwards = target->index > start->block->index;
+   /* Determine if the block we're branching to is strictly greater in
+    * source order */
+   bool forwards = target->index > start->block->index;
 
-        if (forwards) {
-                /* We have to jump through this block from the start of this
-                 * clause to the end */
-                bi_foreach_clause_in_block_from(start->block, clause, start) {
-                        ret += bi_clause_quadwords(clause);
-                }
+   if (forwards) {
+      /* We have to jump through this block from the start of this
+       * clause to the end */
+      bi_foreach_clause_in_block_from(start->block, clause, start) {
+         ret += bi_clause_quadwords(clause);
+      }
 
-                /* We then need to jump through every clause of every following
-                 * block until the target */
-                bi_foreach_block_from(ctx, start->block, blk) {
-                        /* Don't double-count the first block */
-                        if (blk == start->block)
-                                continue;
+      /* We then need to jump through every clause of every following
+       * block until the target */
+      bi_foreach_block_from(ctx, start->block, blk) {
+         /* Don't double-count the first block */
+         if (blk == start->block)
+            continue;
 
-                        /* End just before the target */
-                        if (blk == target)
-                                break;
+         /* End just before the target */
+         if (blk == target)
+            break;
 
-                        /* Count every clause in the block */
-                        bi_foreach_clause_in_block(blk, clause) {
-                                ret += bi_clause_quadwords(clause);
-                        }
-                }
-        } else {
-                /* We start at the beginning of the clause but have to jump
-                 * through the clauses before us in the block */
-                bi_foreach_clause_in_block_from_rev(start->block, clause, start) {
-                        if (clause == start)
-                                continue;
+         /* Count every clause in the block */
+         bi_foreach_clause_in_block(blk, clause) {
+            ret += bi_clause_quadwords(clause);
+         }
+      }
+   } else {
+      /* We start at the beginning of the clause but have to jump
+       * through the clauses before us in the block */
+      bi_foreach_clause_in_block_from_rev(start->block, clause, start) {
+         if (clause == start)
+            continue;
 
-                        ret -= bi_clause_quadwords(clause);
-                }
+         ret -= bi_clause_quadwords(clause);
+      }
 
-                /* And jump back every clause of preceding blocks up through
-                 * and including the target to get to the beginning of the
-                 * target */
-                bi_foreach_block_from_rev(ctx, start->block, blk) {
-                        if (blk == start->block)
-                                continue;
+      /* And jump back every clause of preceding blocks up through
+       * and including the target to get to the beginning of the
+       * target */
+      bi_foreach_block_from_rev(ctx, start->block, blk) {
+         if (blk == start->block)
+            continue;
 
-                        bi_foreach_clause_in_block(blk, clause) {
-                                ret -= bi_clause_quadwords(clause);
-                        }
+         bi_foreach_clause_in_block(blk, clause) {
+            ret -= bi_clause_quadwords(clause);
+         }
 
-                        /* End just after the target */
-                        if (blk == target)
-                                break;
-                }
-        }
+         /* End just after the target */
+         if (blk == target)
+            break;
+      }
+   }
 
-        return ret;
+   return ret;
 }
diff --git a/src/panfrost/bifrost/bi_liveness.c b/src/panfrost/bifrost/bi_liveness.c
index 1dc759e5911..52e0450877b 100644
--- a/src/panfrost/bifrost/bi_liveness.c
+++ b/src/panfrost/bifrost/bi_liveness.c
@@ -23,98 +23,100 @@
  * SOFTWARE.
  */
 
-#include "compiler.h"
 #include "util/u_memory.h"
+#include "compiler.h"
 
 void
 bi_liveness_ins_update_ssa(BITSET_WORD *live, const bi_instr *I)
 {
-        bi_foreach_dest(I, d)
-                BITSET_CLEAR(live, I->dest[d].value);
+   bi_foreach_dest(I, d)
+      BITSET_CLEAR(live, I->dest[d].value);
 
-        bi_foreach_ssa_src(I, s)
-                BITSET_SET(live, I->src[s].value);
+   bi_foreach_ssa_src(I, s)
+      BITSET_SET(live, I->src[s].value);
 }
 
 void
 bi_compute_liveness_ssa(bi_context *ctx)
 {
-        u_worklist worklist;
-        u_worklist_init(&worklist, ctx->num_blocks, NULL);
+   u_worklist worklist;
+   u_worklist_init(&worklist, ctx->num_blocks, NULL);
 
-        /* Free any previous liveness, and allocate */
-        unsigned words = BITSET_WORDS(ctx->ssa_alloc);
+   /* Free any previous liveness, and allocate */
+   unsigned words = BITSET_WORDS(ctx->ssa_alloc);
 
-        bi_foreach_block(ctx, block) {
-                if (block->ssa_live_in)
-                        ralloc_free(block->ssa_live_in);
+   bi_foreach_block(ctx, block) {
+      if (block->ssa_live_in)
+         ralloc_free(block->ssa_live_in);
 
-                if (block->ssa_live_out)
-                        ralloc_free(block->ssa_live_out);
+      if (block->ssa_live_out)
+         ralloc_free(block->ssa_live_out);
 
-                block->ssa_live_in = rzalloc_array(block, BITSET_WORD, words);
-                block->ssa_live_out = rzalloc_array(block, BITSET_WORD, words);
+      block->ssa_live_in = rzalloc_array(block, BITSET_WORD, words);
+      block->ssa_live_out = rzalloc_array(block, BITSET_WORD, words);
 
-                bi_worklist_push_head(&worklist, block);
-        }
+      bi_worklist_push_head(&worklist, block);
+   }
 
-        /* Iterate the work list */
-        while(!u_worklist_is_empty(&worklist)) {
-                /* Pop in reverse order since liveness is a backwards pass */
-                bi_block *blk = bi_worklist_pop_head(&worklist);
+   /* Iterate the work list */
+   while (!u_worklist_is_empty(&worklist)) {
+      /* Pop in reverse order since liveness is a backwards pass */
+      bi_block *blk = bi_worklist_pop_head(&worklist);
 
-                /* Update its liveness information */
-                memcpy(blk->ssa_live_in, blk->ssa_live_out, words * sizeof(BITSET_WORD));
+      /* Update its liveness information */
+      memcpy(blk->ssa_live_in, blk->ssa_live_out, words * sizeof(BITSET_WORD));
 
-                bi_foreach_instr_in_block_rev(blk, I) {
-                        /* Phi nodes are handled separately, so we skip them. As phi nodes are
-                         * at the beginning and we're iterating backwards, we stop as soon as
-                         * we hit a phi node.
-                         */
-                        if (I->op == BI_OPCODE_PHI)
-                                break;
+      bi_foreach_instr_in_block_rev(blk, I) {
+         /* Phi nodes are handled separately, so we skip them. As phi nodes are
+          * at the beginning and we're iterating backwards, we stop as soon as
+          * we hit a phi node.
+          */
+         if (I->op == BI_OPCODE_PHI)
+            break;
 
-                        bi_liveness_ins_update_ssa(blk->ssa_live_in, I);
-                }
+         bi_liveness_ins_update_ssa(blk->ssa_live_in, I);
+      }
 
-                /* Propagate the live in of the successor (blk) to the live out of
-                 * predecessors.
-                 *
-                 * Phi nodes are logically on the control flow edge and act in parallel.
-                 * To handle when propagating, we kill writes from phis and make live the
-                 * corresponding sources.
-                 */
-                bi_foreach_predecessor(blk, pred) {
-                        BITSET_WORD *live = ralloc_array(blk, BITSET_WORD, words);
-                        memcpy(live, blk->ssa_live_in, words * sizeof(BITSET_WORD));
+      /* Propagate the live in of the successor (blk) to the live out of
+       * predecessors.
+       *
+       * Phi nodes are logically on the control flow edge and act in parallel.
+       * To handle when propagating, we kill writes from phis and make live the
+       * corresponding sources.
+       */
+      bi_foreach_predecessor(blk, pred) {
+         BITSET_WORD *live = ralloc_array(blk, BITSET_WORD, words);
+         memcpy(live, blk->ssa_live_in, words * sizeof(BITSET_WORD));
 
-                        /* Kill write */
-                        bi_foreach_instr_in_block(blk, I) {
-                                if (I->op != BI_OPCODE_PHI) break;
+         /* Kill write */
+         bi_foreach_instr_in_block(blk, I) {
+            if (I->op != BI_OPCODE_PHI)
+               break;
 
-                                BITSET_CLEAR(live, I->dest[0].value);
-                        }
+            BITSET_CLEAR(live, I->dest[0].value);
+         }
 
-                        /* Make live the corresponding source */
-                        bi_foreach_instr_in_block(blk, I) {
-                                if (I->op != BI_OPCODE_PHI) break;
+         /* Make live the corresponding source */
+         bi_foreach_instr_in_block(blk, I) {
+            if (I->op != BI_OPCODE_PHI)
+               break;
 
-                                bi_index operand = I->src[bi_predecessor_index(blk, *pred)];
-                                if (bi_is_ssa(operand))
-                                        BITSET_SET(live, operand.value);
-                        }
+            bi_index operand = I->src[bi_predecessor_index(blk, *pred)];
+            if (bi_is_ssa(operand))
+               BITSET_SET(live, operand.value);
+         }
 
-                        BITSET_WORD progress = 0;
+         BITSET_WORD progress = 0;
 
-                        for (unsigned i = 0; i < words; ++i) {
-                                progress |= live[i] & ~((*pred)->ssa_live_out[i]);
-                                (*pred)->ssa_live_out[i] |= live[i];
-                        }
+         for (unsigned i = 0; i < words; ++i) {
+            progress |= live[i] & ~((*pred)->ssa_live_out[i]);
+            (*pred)->ssa_live_out[i] |= live[i];
+         }
 
-                        if (progress != 0)
-                                bi_worklist_push_tail(&worklist, *pred);
-                }
-        }
+         if (progress != 0)
+            bi_worklist_push_tail(&worklist, *pred);
+      }
+   }
 
-        u_worklist_fini(&worklist);
+   u_worklist_fini(&worklist);
 }
diff --git a/src/panfrost/bifrost/bi_lower_divergent_indirects.c b/src/panfrost/bifrost/bi_lower_divergent_indirects.c
index 1b52040608d..e8453baaa79 100644
--- a/src/panfrost/bifrost/bi_lower_divergent_indirects.c
+++ b/src/panfrost/bifrost/bi_lower_divergent_indirects.c
@@ -21,8 +21,8 @@
  * SOFTWARE.
  */
 
-#include "compiler.h"
 #include "compiler/nir/nir_builder.h"
+#include "compiler.h"
 
 /* Divergent attribute access is undefined behaviour. To avoid divergence,
  * lower to an if-chain like:
@@ -40,89 +40,88 @@
 static bool
 bi_lower_divergent_indirects_impl(nir_builder *b, nir_instr *instr, void *data)
 {
-        if (instr->type != nir_instr_type_intrinsic)
-                return false;
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
 
-        nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-        gl_shader_stage stage = b->shader->info.stage;
-        nir_src *offset;
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   gl_shader_stage stage = b->shader->info.stage;
+   nir_src *offset;
 
-        /* Not all indirect access needs this workaround */
-        switch (intr->intrinsic) {
-        case nir_intrinsic_load_input:
-        case nir_intrinsic_load_interpolated_input:
-                /* Attributes and varyings */
-                offset = nir_get_io_offset_src(intr);
-                break;
+   /* Not all indirect access needs this workaround */
+   switch (intr->intrinsic) {
+   case nir_intrinsic_load_input:
+   case nir_intrinsic_load_interpolated_input:
+      /* Attributes and varyings */
+      offset = nir_get_io_offset_src(intr);
+      break;
 
-        case nir_intrinsic_store_output:
-                /* Varyings only */
-                if (stage == MESA_SHADER_FRAGMENT)
-                        return false;
+   case nir_intrinsic_store_output:
+      /* Varyings only */
+      if (stage == MESA_SHADER_FRAGMENT)
+         return false;
 
-                offset = nir_get_io_offset_src(intr);
-                break;
+      offset = nir_get_io_offset_src(intr);
+      break;
 
-        case nir_intrinsic_image_atomic_add:
-        case nir_intrinsic_image_atomic_imin:
-        case nir_intrinsic_image_atomic_umin:
-        case nir_intrinsic_image_atomic_imax:
-        case nir_intrinsic_image_atomic_umax:
-        case nir_intrinsic_image_atomic_and:
-        case nir_intrinsic_image_atomic_or:
-        case nir_intrinsic_image_atomic_xor:
-        case nir_intrinsic_image_load:
-        case nir_intrinsic_image_store:
-                /* Any image access */
-                offset = &intr->src[0];
-                break;
-        default:
-                return false;
-        }
+   case nir_intrinsic_image_atomic_add:
+   case nir_intrinsic_image_atomic_imin:
+   case nir_intrinsic_image_atomic_umin:
+   case nir_intrinsic_image_atomic_imax:
+   case nir_intrinsic_image_atomic_umax:
+   case nir_intrinsic_image_atomic_and:
+   case nir_intrinsic_image_atomic_or:
+   case nir_intrinsic_image_atomic_xor:
+   case nir_intrinsic_image_load:
+   case nir_intrinsic_image_store:
+      /* Any image access */
+      offset = &intr->src[0];
+      break;
+   default:
+      return false;
+   }
 
-        if (!nir_src_is_divergent(*offset))
-                return false;
+   if (!nir_src_is_divergent(*offset))
+      return false;
 
-        /* This indirect does need it */
+   /* This indirect does need it */
 
-        b->cursor = nir_before_instr(instr);
-        nir_ssa_def *lane = nir_load_subgroup_invocation(b);
-        unsigned *lanes = data;
+   b->cursor = nir_before_instr(instr);
+   nir_ssa_def *lane = nir_load_subgroup_invocation(b);
+   unsigned *lanes = data;
 
-        /* Write zero in a funny way to bypass lower_load_const_to_scalar */
-        bool has_dest = nir_intrinsic_infos[intr->intrinsic].has_dest;
-        unsigned size = has_dest ? nir_dest_bit_size(intr->dest) : 32;
-        nir_ssa_def *zero = has_dest ? nir_imm_zero(b, 1, size) : NULL;
-        nir_ssa_def *zeroes[4] = { zero, zero, zero, zero };
-        nir_ssa_def *res = has_dest ?
-                nir_vec(b, zeroes, nir_dest_num_components(intr->dest)) : NULL;
+   /* Write zero in a funny way to bypass lower_load_const_to_scalar */
+   bool has_dest = nir_intrinsic_infos[intr->intrinsic].has_dest;
+   unsigned size = has_dest ? nir_dest_bit_size(intr->dest) : 32;
+   nir_ssa_def *zero = has_dest ? nir_imm_zero(b, 1, size) : NULL;
+   nir_ssa_def *zeroes[4] = {zero, zero, zero, zero};
+   nir_ssa_def *res =
+      has_dest ? nir_vec(b, zeroes, nir_dest_num_components(intr->dest)) : NULL;
 
-        for (unsigned i = 0; i < (*lanes); ++i) {
-                nir_push_if(b, nir_ieq_imm(b, lane, i));
+   for (unsigned i = 0; i < (*lanes); ++i) {
+      nir_push_if(b, nir_ieq_imm(b, lane, i));
 
-                nir_instr *c = nir_instr_clone(b->shader, instr);
-                nir_intrinsic_instr *c_intr = nir_instr_as_intrinsic(c);
-                nir_builder_instr_insert(b, c);
-                nir_pop_if(b, NULL);
+      nir_instr *c = nir_instr_clone(b->shader, instr);
+      nir_intrinsic_instr *c_intr = nir_instr_as_intrinsic(c);
+      nir_builder_instr_insert(b, c);
+      nir_pop_if(b, NULL);
 
-                if (has_dest) {
-                        assert(c_intr->dest.is_ssa);
-                        nir_ssa_def *c_ssa = &c_intr->dest.ssa;
-                        res = nir_if_phi(b, c_ssa, res);
-                }
-        }
+      if (has_dest) {
+         assert(c_intr->dest.is_ssa);
+         nir_ssa_def *c_ssa = &c_intr->dest.ssa;
+         res = nir_if_phi(b, c_ssa, res);
+      }
+   }
 
-        if (has_dest)
-                nir_ssa_def_rewrite_uses(&intr->dest.ssa, res);
+   if (has_dest)
+      nir_ssa_def_rewrite_uses(&intr->dest.ssa, res);
 
-        nir_instr_remove(instr);
-        return true;
+   nir_instr_remove(instr);
+   return true;
 }
 
 bool
 bi_lower_divergent_indirects(nir_shader *shader, unsigned lanes)
 {
-        return nir_shader_instructions_pass(shader,
-                        bi_lower_divergent_indirects_impl,
-                        nir_metadata_none, &lanes);
+   return nir_shader_instructions_pass(
+      shader, bi_lower_divergent_indirects_impl, nir_metadata_none, &lanes);
 }
diff --git a/src/panfrost/bifrost/bi_lower_swizzle.c b/src/panfrost/bifrost/bi_lower_swizzle.c
index 988618036b3..5cc6ca2a21f 100644
--- a/src/panfrost/bifrost/bi_lower_swizzle.c
+++ b/src/panfrost/bifrost/bi_lower_swizzle.c
@@ -21,8 +21,8 @@
  * SOFTWARE.
  */
 
-#include "compiler.h"
 #include "bi_builder.h"
+#include "compiler.h"
 
 /* Not all 8-bit and 16-bit instructions support all swizzles on all sources.
  * These passes, intended to run after NIR->BIR but before scheduling/RA, lower
@@ -33,270 +33,269 @@
 static bool
 bi_swizzle_replicates_8(enum bi_swizzle swz)
 {
-        switch (swz) {
-        case BI_SWIZZLE_B0000:
-        case BI_SWIZZLE_B1111:
-        case BI_SWIZZLE_B2222:
-        case BI_SWIZZLE_B3333:
-                return true;
-        default:
-                return false;
-        }
+   switch (swz) {
+   case BI_SWIZZLE_B0000:
+   case BI_SWIZZLE_B1111:
+   case BI_SWIZZLE_B2222:
+   case BI_SWIZZLE_B3333:
+      return true;
+   default:
+      return false;
+   }
 }
 
 static void
 lower_swizzle(bi_context *ctx, bi_instr *ins, unsigned src)
 {
-        /* TODO: Use the opcode table and be a lot more methodical about this... */
-        switch (ins->op) {
-        /* Some instructions used with 16-bit data never have swizzles */
-        case BI_OPCODE_CSEL_V2F16:
-        case BI_OPCODE_CSEL_V2I16:
-        case BI_OPCODE_CSEL_V2S16:
-        case BI_OPCODE_CSEL_V2U16:
+   /* TODO: Use the opcode table and be a lot more methodical about this... */
+   switch (ins->op) {
+   /* Some instructions used with 16-bit data never have swizzles */
+   case BI_OPCODE_CSEL_V2F16:
+   case BI_OPCODE_CSEL_V2I16:
+   case BI_OPCODE_CSEL_V2S16:
+   case BI_OPCODE_CSEL_V2U16:
 
-        /* Despite ostensibly being 32-bit instructions, CLPER does not
-         * inherently interpret the data, so it can be used for v2f16
-         * derivatives, which might require swizzle lowering */
-        case BI_OPCODE_CLPER_I32:
-        case BI_OPCODE_CLPER_OLD_I32:
+   /* Despite ostensibly being 32-bit instructions, CLPER does not
+    * inherently interpret the data, so it can be used for v2f16
+    * derivatives, which might require swizzle lowering */
+   case BI_OPCODE_CLPER_I32:
+   case BI_OPCODE_CLPER_OLD_I32:
 
-        /* Similarly, CSEL.i32 consumes a boolean as a 32-bit argument. If the
-         * boolean is implemented as a 16-bit integer, the swizzle is needed
-         * for correct operation if the instruction producing the 16-bit
-         * boolean does not replicate to both halves of the containing 32-bit
-         * register. As such, we may need to lower a swizzle.
-         *
-         * This is a silly hack. Ideally, code gen would be smart enough to
-         * avoid this case (by replicating). In practice, silly hardware design
-         * decisions force our hand here.
-         */
-        case BI_OPCODE_MUX_I32:
-        case BI_OPCODE_CSEL_I32:
-            break;
+   /* Similarly, CSEL.i32 consumes a boolean as a 32-bit argument. If the
+    * boolean is implemented as a 16-bit integer, the swizzle is needed
+    * for correct operation if the instruction producing the 16-bit
+    * boolean does not replicate to both halves of the containing 32-bit
+    * register. As such, we may need to lower a swizzle.
+    *
+    * This is a silly hack. Ideally, code gen would be smart enough to
+    * avoid this case (by replicating). In practice, silly hardware design
+    * decisions force our hand here.
+    */
+   case BI_OPCODE_MUX_I32:
+   case BI_OPCODE_CSEL_I32:
+      break;
 
-        case BI_OPCODE_IADD_V2S16:
-        case BI_OPCODE_IADD_V2U16:
-        case BI_OPCODE_ISUB_V2S16:
-        case BI_OPCODE_ISUB_V2U16:
-            if (src == 0 && ins->src[src].swizzle != BI_SWIZZLE_H10)
-                    break;
-            else
-                    return;
-        case BI_OPCODE_LSHIFT_AND_V2I16:
-        case BI_OPCODE_LSHIFT_OR_V2I16:
-        case BI_OPCODE_LSHIFT_XOR_V2I16:
-        case BI_OPCODE_RSHIFT_AND_V2I16:
-        case BI_OPCODE_RSHIFT_OR_V2I16:
-        case BI_OPCODE_RSHIFT_XOR_V2I16:
-            if (src == 2)
-                    return;
-            else
-                    break;
+   case BI_OPCODE_IADD_V2S16:
+   case BI_OPCODE_IADD_V2U16:
+   case BI_OPCODE_ISUB_V2S16:
+   case BI_OPCODE_ISUB_V2U16:
+      if (src == 0 && ins->src[src].swizzle != BI_SWIZZLE_H10)
+         break;
+      else
+         return;
+   case BI_OPCODE_LSHIFT_AND_V2I16:
+   case BI_OPCODE_LSHIFT_OR_V2I16:
+   case BI_OPCODE_LSHIFT_XOR_V2I16:
+   case BI_OPCODE_RSHIFT_AND_V2I16:
+   case BI_OPCODE_RSHIFT_OR_V2I16:
+   case BI_OPCODE_RSHIFT_XOR_V2I16:
+      if (src == 2)
+         return;
+      else
+         break;
 
-        /* For some reason MUX.v2i16 allows swaps but not replication */
-        case BI_OPCODE_MUX_V2I16:
-                if (ins->src[src].swizzle == BI_SWIZZLE_H10)
-                        return;
-                else
-                        break;
+   /* For some reason MUX.v2i16 allows swaps but not replication */
+   case BI_OPCODE_MUX_V2I16:
+      if (ins->src[src].swizzle == BI_SWIZZLE_H10)
+         return;
+      else
+         break;
 
-        /* No swizzles supported */
-        case BI_OPCODE_HADD_V4U8:
-        case BI_OPCODE_HADD_V4S8:
-        case BI_OPCODE_CLZ_V4U8:
-        case BI_OPCODE_IDP_V4I8:
-        case BI_OPCODE_IABS_V4S8:
-        case BI_OPCODE_ICMP_V4I8:
-        case BI_OPCODE_ICMP_V4U8:
-        case BI_OPCODE_MUX_V4I8:
-        case BI_OPCODE_IADD_IMM_V4I8:
-                break;
+   /* No swizzles supported */
+   case BI_OPCODE_HADD_V4U8:
+   case BI_OPCODE_HADD_V4S8:
+   case BI_OPCODE_CLZ_V4U8:
+   case BI_OPCODE_IDP_V4I8:
+   case BI_OPCODE_IABS_V4S8:
+   case BI_OPCODE_ICMP_V4I8:
+   case BI_OPCODE_ICMP_V4U8:
+   case BI_OPCODE_MUX_V4I8:
+   case BI_OPCODE_IADD_IMM_V4I8:
+      break;
 
-        case BI_OPCODE_LSHIFT_AND_V4I8:
-        case BI_OPCODE_LSHIFT_OR_V4I8:
-        case BI_OPCODE_LSHIFT_XOR_V4I8:
-        case BI_OPCODE_RSHIFT_AND_V4I8:
-        case BI_OPCODE_RSHIFT_OR_V4I8:
-        case BI_OPCODE_RSHIFT_XOR_V4I8:
-                /* Last source allows identity or replication */
-                if (src == 2 && bi_swizzle_replicates_8(ins->src[src].swizzle))
-                        return;
+   case BI_OPCODE_LSHIFT_AND_V4I8:
+   case BI_OPCODE_LSHIFT_OR_V4I8:
+   case BI_OPCODE_LSHIFT_XOR_V4I8:
+   case BI_OPCODE_RSHIFT_AND_V4I8:
+   case BI_OPCODE_RSHIFT_OR_V4I8:
+   case BI_OPCODE_RSHIFT_XOR_V4I8:
+      /* Last source allows identity or replication */
+      if (src == 2 && bi_swizzle_replicates_8(ins->src[src].swizzle))
+         return;
 
-                /* Others do not allow swizzles */
-                break;
+      /* Others do not allow swizzles */
+      break;
 
-        /* We don't want to deal with reswizzling logic in modifier prop. Move
-         * the swizzle outside, it's easier for clamp propagation. */
-        case BI_OPCODE_FCLAMP_V2F16:
-        {
-                bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));
-                bi_index dest = ins->dest[0];
-                bi_index tmp = bi_temp(ctx);
+   /* We don't want to deal with reswizzling logic in modifier prop. Move
+    * the swizzle outside, it's easier for clamp propagation. */
+   case BI_OPCODE_FCLAMP_V2F16: {
+      bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));
+      bi_index dest = ins->dest[0];
+      bi_index tmp = bi_temp(ctx);
 
-                ins->dest[0] = tmp;
-                bi_swz_v2i16_to(&b, dest, bi_replace_index(ins->src[0], tmp));
-                return;
-        }
+      ins->dest[0] = tmp;
+      bi_swz_v2i16_to(&b, dest, bi_replace_index(ins->src[0], tmp));
+      return;
+   }
 
-        default:
-            return;
-        }
+   default:
+      return;
+   }
 
-        /* First, try to apply a given swizzle to a constant to clear the
-         * runtime swizzle. This is less heavy-handed than ignoring the
-         * swizzle for scalar destinations, since it maintains
-         * replication of the destination.
-         */
-        if (ins->src[src].type == BI_INDEX_CONSTANT) {
-                ins->src[src].value = bi_apply_swizzle(ins->src[src].value,
-                                                       ins->src[src].swizzle);
-                ins->src[src].swizzle = BI_SWIZZLE_H01;
-                return;
-        }
+   /* First, try to apply a given swizzle to a constant to clear the
+    * runtime swizzle. This is less heavy-handed than ignoring the
+    * swizzle for scalar destinations, since it maintains
+    * replication of the destination.
+    */
+   if (ins->src[src].type == BI_INDEX_CONSTANT) {
+      ins->src[src].value =
+         bi_apply_swizzle(ins->src[src].value, ins->src[src].swizzle);
+      ins->src[src].swizzle = BI_SWIZZLE_H01;
+      return;
+   }
 
-        /* Even if the source does not replicate, if the consuming instruction
-         * produces a 16-bit scalar, we can ignore the other component.
-         */
-        if (ins->dest[0].swizzle == BI_SWIZZLE_H00 &&
-                        ins->src[src].swizzle == BI_SWIZZLE_H00)
-        {
-                ins->src[src].swizzle = BI_SWIZZLE_H01;
-                return;
-        }
+   /* Even if the source does not replicate, if the consuming instruction
+    * produces a 16-bit scalar, we can ignore the other component.
+    */
+   if (ins->dest[0].swizzle == BI_SWIZZLE_H00 &&
+       ins->src[src].swizzle == BI_SWIZZLE_H00) {
+      ins->src[src].swizzle = BI_SWIZZLE_H01;
+      return;
+   }
 
-        /* Lower it away */
-        bi_builder b = bi_init_builder(ctx, bi_before_instr(ins));
+   /* Lower it away */
+   bi_builder b = bi_init_builder(ctx, bi_before_instr(ins));
 
-        bool is_8 = (bi_opcode_props[ins->op].size == BI_SIZE_8);
-        bi_index orig = ins->src[src];
-        bi_index stripped = bi_replace_index(bi_null(), orig);
-        stripped.swizzle = ins->src[src].swizzle;
+   bool is_8 = (bi_opcode_props[ins->op].size == BI_SIZE_8);
+   bi_index orig = ins->src[src];
+   bi_index stripped = bi_replace_index(bi_null(), orig);
+   stripped.swizzle = ins->src[src].swizzle;
 
-        bi_index swz = is_8 ? bi_swz_v4i8(&b, stripped) : bi_swz_v2i16(&b, stripped);
+   bi_index swz = is_8 ? bi_swz_v4i8(&b, stripped) : bi_swz_v2i16(&b, stripped);
 
-        bi_replace_src(ins, src, swz);
-        ins->src[src].swizzle = BI_SWIZZLE_H01;
+   bi_replace_src(ins, src, swz);
+   ins->src[src].swizzle = BI_SWIZZLE_H01;
 }
 
 static bool
 bi_swizzle_replicates_16(enum bi_swizzle swz)
 {
-        switch (swz) {
-        case BI_SWIZZLE_H00:
-        case BI_SWIZZLE_H11:
-                return true;
-        default:
-                /* If a swizzle replicates every 8-bits, it also replicates
-                 * every 16-bits, so allow 8-bit replicating swizzles.
-                 */
-                return bi_swizzle_replicates_8(swz);
-        }
+   switch (swz) {
+   case BI_SWIZZLE_H00:
+   case BI_SWIZZLE_H11:
+      return true;
+   default:
+      /* If a swizzle replicates every 8-bits, it also replicates
+       * every 16-bits, so allow 8-bit replicating swizzles.
+       */
+      return bi_swizzle_replicates_8(swz);
+   }
 }
 
 static bool
 bi_instr_replicates(bi_instr *I, BITSET_WORD *replicates_16)
 {
-        switch (I->op) {
+   switch (I->op) {
 
-        /* Instructions that construct vectors have replicated output if their
-         * sources are identical. Check this case first.
-         */
-        case BI_OPCODE_MKVEC_V2I16:
-        case BI_OPCODE_V2F16_TO_V2S16:
-        case BI_OPCODE_V2F16_TO_V2U16:
-        case BI_OPCODE_V2F32_TO_V2F16:
-        case BI_OPCODE_V2S16_TO_V2F16:
-        case BI_OPCODE_V2S8_TO_V2F16:
-        case BI_OPCODE_V2S8_TO_V2S16:
-        case BI_OPCODE_V2U16_TO_V2F16:
-        case BI_OPCODE_V2U8_TO_V2F16:
-        case BI_OPCODE_V2U8_TO_V2U16:
-                return bi_is_value_equiv(I->src[0], I->src[1]);
+   /* Instructions that construct vectors have replicated output if their
+    * sources are identical. Check this case first.
+    */
+   case BI_OPCODE_MKVEC_V2I16:
+   case BI_OPCODE_V2F16_TO_V2S16:
+   case BI_OPCODE_V2F16_TO_V2U16:
+   case BI_OPCODE_V2F32_TO_V2F16:
+   case BI_OPCODE_V2S16_TO_V2F16:
+   case BI_OPCODE_V2S8_TO_V2F16:
+   case BI_OPCODE_V2S8_TO_V2S16:
+   case BI_OPCODE_V2U16_TO_V2F16:
+   case BI_OPCODE_V2U8_TO_V2F16:
+   case BI_OPCODE_V2U8_TO_V2U16:
+      return bi_is_value_equiv(I->src[0], I->src[1]);
 
-        /* 16-bit transcendentals are defined to output zero in their
-         * upper half, so they do not replicate
-         */
-        case BI_OPCODE_FRCP_F16:
-        case BI_OPCODE_FRSQ_F16:
-                return false;
+   /* 16-bit transcendentals are defined to output zero in their
+    * upper half, so they do not replicate
+    */
+   case BI_OPCODE_FRCP_F16:
+   case BI_OPCODE_FRSQ_F16:
+      return false;
 
-        /* Not sure, be conservative, we don't use these.. */
-        case BI_OPCODE_VN_ASST1_F16:
-        case BI_OPCODE_FPCLASS_F16:
-        case BI_OPCODE_FPOW_SC_DET_F16:
-                return false;
+   /* Not sure, be conservative, we don't use these.. */
+   case BI_OPCODE_VN_ASST1_F16:
+   case BI_OPCODE_FPCLASS_F16:
+   case BI_OPCODE_FPOW_SC_DET_F16:
+      return false;
 
-        default:
-                break;
-        }
+   default:
+      break;
+   }
 
-        /* Replication analysis only makes sense for ALU instructions */
-        if (bi_opcode_props[I->op].message != BIFROST_MESSAGE_NONE)
-                return false;
+   /* Replication analysis only makes sense for ALU instructions */
+   if (bi_opcode_props[I->op].message != BIFROST_MESSAGE_NONE)
+      return false;
 
-        /* We only analyze 16-bit instructions for 16-bit replication. We could
-         * maybe do better.
-         */
-        if (bi_opcode_props[I->op].size != BI_SIZE_16)
-                return false;
+   /* We only analyze 16-bit instructions for 16-bit replication. We could
+    * maybe do better.
+    */
+   if (bi_opcode_props[I->op].size != BI_SIZE_16)
+      return false;
 
-        bi_foreach_src(I, s) {
-                if (bi_is_null(I->src[s]))
-                        continue;
+   bi_foreach_src(I, s) {
+      if (bi_is_null(I->src[s]))
+         continue;
 
-                /* Replicated swizzles */
-                if (bi_swizzle_replicates_16(I->src[s].swizzle))
-                        continue;
+      /* Replicated swizzles */
+      if (bi_swizzle_replicates_16(I->src[s].swizzle))
+         continue;
 
-                /* Replicated values */
-                if (bi_is_ssa(I->src[s]) &&
-                    BITSET_TEST(replicates_16, I->src[s].value))
-                        continue;
+      /* Replicated values */
+      if (bi_is_ssa(I->src[s]) && BITSET_TEST(replicates_16, I->src[s].value))
+         continue;
 
-                /* Replicated constants */
-                if (I->src[s].type == BI_INDEX_CONSTANT &&
-                    (I->src[s].value & 0xFFFF) == (I->src[s].value >> 16))
-                        continue;
+      /* Replicated constants */
+      if (I->src[s].type == BI_INDEX_CONSTANT &&
+          (I->src[s].value & 0xFFFF) == (I->src[s].value >> 16))
+         continue;
 
-                return false;
-        }
+      return false;
+   }
 
-        return true;
+   return true;
 }
 
 void
 bi_lower_swizzle(bi_context *ctx)
 {
-        bi_foreach_instr_global_safe(ctx, ins) {
-                bi_foreach_src(ins, s) {
-                        if (bi_is_null(ins->src[s])) continue;
-                        if (ins->src[s].swizzle == BI_SWIZZLE_H01) continue;
+   bi_foreach_instr_global_safe(ctx, ins) {
+      bi_foreach_src(ins, s) {
+         if (bi_is_null(ins->src[s]))
+            continue;
+         if (ins->src[s].swizzle == BI_SWIZZLE_H01)
+            continue;
 
-                        lower_swizzle(ctx, ins, s);
-                }
-        }
+         lower_swizzle(ctx, ins, s);
+      }
+   }
 
-        /* Now that we've lowered swizzles, clean up the mess */
-        BITSET_WORD *replicates_16 = calloc(sizeof(bi_index), ctx->ssa_alloc);
+   /* Now that we've lowered swizzles, clean up the mess */
+   BITSET_WORD *replicates_16 = calloc(sizeof(bi_index), ctx->ssa_alloc);
 
-        bi_foreach_instr_global(ctx, ins) {
-                if (ins->nr_dests && bi_instr_replicates(ins, replicates_16))
-                        BITSET_SET(replicates_16, ins->dest[0].value);
+   bi_foreach_instr_global(ctx, ins) {
+      if (ins->nr_dests && bi_instr_replicates(ins, replicates_16))
+         BITSET_SET(replicates_16, ins->dest[0].value);
 
-                if (ins->op == BI_OPCODE_SWZ_V2I16 && bi_is_ssa(ins->src[0]) &&
-                    BITSET_TEST(replicates_16, ins->src[0].value)) {
-                        ins->op = BI_OPCODE_MOV_I32;
-                        ins->src[0].swizzle = BI_SWIZZLE_H01;
-                }
+      if (ins->op == BI_OPCODE_SWZ_V2I16 && bi_is_ssa(ins->src[0]) &&
+          BITSET_TEST(replicates_16, ins->src[0].value)) {
+         ins->op = BI_OPCODE_MOV_I32;
+         ins->src[0].swizzle = BI_SWIZZLE_H01;
+      }
 
-                /* The above passes rely on replicating destinations.  For
-                 * Valhall, we will want to optimize this. For now, default
-                 * to Bifrost compatible behaviour.
-                 */
-                if (ins->nr_dests)
-                        ins->dest[0].swizzle = BI_SWIZZLE_H01;
-        }
+      /* The above passes rely on replicating destinations.  For
+       * Valhall, we will want to optimize this. For now, default
+       * to Bifrost compatible behaviour.
+       */
+      if (ins->nr_dests)
+         ins->dest[0].swizzle = BI_SWIZZLE_H01;
+   }
 
-        free(replicates_16);
+   free(replicates_16);
 }
diff --git a/src/panfrost/bifrost/bi_opt_constant_fold.c b/src/panfrost/bifrost/bi_opt_constant_fold.c
index 03c370e8132..92776fdcbac 100644
--- a/src/panfrost/bifrost/bi_opt_constant_fold.c
+++ b/src/panfrost/bifrost/bi_opt_constant_fold.c
@@ -21,8 +21,8 @@
  * SOFTWARE.
  */
 
-#include "compiler.h"
 #include "bi_builder.h"
+#include "compiler.h"
 
 /* Dead simple constant folding to cleanup compiler frontend patterns. Before
  * adding a new pattern here, check why you need it and whether we can avoid
@@ -31,83 +31,84 @@
 static inline uint32_t
 bi_source_value(const bi_instr *I, unsigned s)
 {
-        if (s < I->nr_srcs)
-                return bi_apply_swizzle(I->src[s].value, I->src[s].swizzle);
-        else
-                return 0;
+   if (s < I->nr_srcs)
+      return bi_apply_swizzle(I->src[s].value, I->src[s].swizzle);
+   else
+      return 0;
 }
 
 uint32_t
 bi_fold_constant(bi_instr *I, bool *unsupported)
 {
-        /* We can only fold instructions where all sources are constant */
-        bi_foreach_src(I, s) {
-                if (I->src[s].type != BI_INDEX_CONSTANT) {
-                        *unsupported = true;
-                        return 0;
-                }
-        }
+   /* We can only fold instructions where all sources are constant */
+   bi_foreach_src(I, s) {
+      if (I->src[s].type != BI_INDEX_CONSTANT) {
+         *unsupported = true;
+         return 0;
+      }
+   }
 
-        /* Grab the sources */
-        uint32_t a = bi_source_value(I, 0);
-        uint32_t b = bi_source_value(I, 1);
-        uint32_t c = bi_source_value(I, 2);
-        uint32_t d = bi_source_value(I, 3);
+   /* Grab the sources */
+   uint32_t a = bi_source_value(I, 0);
+   uint32_t b = bi_source_value(I, 1);
+   uint32_t c = bi_source_value(I, 2);
+   uint32_t d = bi_source_value(I, 3);
 
-        /* Evaluate the instruction */
-        switch (I->op) {
-        case BI_OPCODE_SWZ_V2I16:
-                return a;
+   /* Evaluate the instruction */
+   switch (I->op) {
+   case BI_OPCODE_SWZ_V2I16:
+      return a;
 
-        case BI_OPCODE_MKVEC_V2I16:
-                return (b << 16) | (a & 0xFFFF);
+   case BI_OPCODE_MKVEC_V2I16:
+      return (b << 16) | (a & 0xFFFF);
 
-        case BI_OPCODE_MKVEC_V4I8:
-                return (d << 24) | ((c & 0xFF) << 16) | ((b & 0xFF) << 8) | (a & 0xFF);
+   case BI_OPCODE_MKVEC_V4I8:
+      return (d << 24) | ((c & 0xFF) << 16) | ((b & 0xFF) << 8) | (a & 0xFF);
 
-        case BI_OPCODE_MKVEC_V2I8:
-                return (c << 16) | ((b & 0xFF) << 8) | (a & 0xFF);
+   case BI_OPCODE_MKVEC_V2I8:
+      return (c << 16) | ((b & 0xFF) << 8) | (a & 0xFF);
 
-        case BI_OPCODE_LSHIFT_OR_I32:
-                if (I->not_result || I->src[0].neg || I->src[1].neg)
-                        break;
+   case BI_OPCODE_LSHIFT_OR_I32:
+      if (I->not_result || I->src[0].neg || I->src[1].neg)
+         break;
 
-                return (a << c) | b;
+      return (a << c) | b;
 
-        case BI_OPCODE_F32_TO_U32:
-                if (I->round == BI_ROUND_NONE) {
-                        /* Explicitly clamp to prevent undefined behaviour and
-                         * match hardware rules */
-                        float f = uif(a);
-                        return (f >= 0.0) ? (uint32_t) f : 0;
-                } else
-                        break;
+   case BI_OPCODE_F32_TO_U32:
+      if (I->round == BI_ROUND_NONE) {
+         /* Explicitly clamp to prevent undefined behaviour and
+          * match hardware rules */
+         float f = uif(a);
+         return (f >= 0.0) ? (uint32_t)f : 0;
+      } else
+         break;
 
-        default:
-                break;
-        }
+   default:
+      break;
+   }
 
-        *unsupported = true;
-        return 0;
+   *unsupported = true;
+   return 0;
 }
 
 bool
 bi_opt_constant_fold(bi_context *ctx)
 {
-        bool progress = false;
+   bool progress = false;
 
-        bi_foreach_instr_global_safe(ctx, ins) {
-                bool unsupported = false;
-                uint32_t replace = bi_fold_constant(ins, &unsupported);
-                if (unsupported) continue;
+   bi_foreach_instr_global_safe(ctx, ins) {
+      bool unsupported = false;
+      uint32_t replace = bi_fold_constant(ins, &unsupported);
+      if (unsupported)
+         continue;
 
-                /* Replace with constant move, to be copypropped */
-                assert(ins->nr_dests == 1);
-                bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));
-                bi_mov_i32_to(&b, ins->dest[0], bi_imm_u32(replace));
-                bi_remove_instruction(ins);
-                progress = true;
-        }
+      /* Replace with constant move, to be copypropped */
+      assert(ins->nr_dests == 1);
+      bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));
+      bi_mov_i32_to(&b, ins->dest[0], bi_imm_u32(replace));
+      bi_remove_instruction(ins);
+      progress = true;
+   }
 
-        return progress;
+   return progress;
 }
diff --git a/src/panfrost/bifrost/bi_opt_copy_prop.c b/src/panfrost/bifrost/bi_opt_copy_prop.c
index 13b9b0d2b83..1a3bc5ae042 100644
--- a/src/panfrost/bifrost/bi_opt_copy_prop.c
+++ b/src/panfrost/bifrost/bi_opt_copy_prop.c
@@ -22,92 +22,95 @@
  * SOFTWARE.
  */
 
-#include "compiler.h"
 #include "bi_builder.h"
+#include "compiler.h"
 
 /* SSA copy propagation */
 
 static bool
 bi_reads_fau(bi_instr *ins)
 {
-        bi_foreach_src(ins, s) {
-                if (ins->src[s].type == BI_INDEX_FAU)
-                        return true;
-        }
+   bi_foreach_src(ins, s) {
+      if (ins->src[s].type == BI_INDEX_FAU)
+         return true;
+   }
 
-        return false;
+   return false;
 }
 
 void
 bi_opt_copy_prop(bi_context *ctx)
 {
-        /* Chase SPLIT of COLLECT. Instruction selection usually avoids this
-         * pattern (due to the split cache), but it is inevitably generated by
-         * the UBO pushing pass.
-         */
-        bi_instr **collects = calloc(sizeof(bi_instr *), ctx->ssa_alloc);
-        bi_foreach_instr_global_safe(ctx, I) {
-                if (I->op == BI_OPCODE_COLLECT_I32) {
-                        /* Rewrite trivial collects while we're at it */
-                        if (I->nr_srcs == 1)
-                                I->op = BI_OPCODE_MOV_I32;
+   /* Chase SPLIT of COLLECT. Instruction selection usually avoids this
+    * pattern (due to the split cache), but it is inevitably generated by
+    * the UBO pushing pass.
+    */
+   bi_instr **collects = calloc(sizeof(bi_instr *), ctx->ssa_alloc);
+   bi_foreach_instr_global_safe(ctx, I) {
+      if (I->op == BI_OPCODE_COLLECT_I32) {
+         /* Rewrite trivial collects while we're at it */
+         if (I->nr_srcs == 1)
+            I->op = BI_OPCODE_MOV_I32;
 
-                        collects[I->dest[0].value] = I;
-                } else if (I->op == BI_OPCODE_SPLIT_I32) {
-                        /* Rewrite trivial splits while we're at it */
-                        if (I->nr_dests == 1)
-                                I->op = BI_OPCODE_MOV_I32;
+         collects[I->dest[0].value] = I;
+      } else if (I->op == BI_OPCODE_SPLIT_I32) {
+         /* Rewrite trivial splits while we're at it */
+         if (I->nr_dests == 1)
+            I->op = BI_OPCODE_MOV_I32;
 
-                        bi_instr *collect = collects[I->src[0].value];
-                        if (!collect)
-                                continue;
+         bi_instr *collect = collects[I->src[0].value];
+         if (!collect)
+            continue;
 
-                        /* Lower the split to moves, copyprop cleans up */
-                        bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
+         /* Lower the split to moves, copyprop cleans up */
+         bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
 
-                        bi_foreach_dest(I, d)
-                                bi_mov_i32_to(&b, I->dest[d], collect->src[d]);
+         bi_foreach_dest(I, d)
+            bi_mov_i32_to(&b, I->dest[d], collect->src[d]);
 
-                        bi_remove_instruction(I);
-                }
-        }
+         bi_remove_instruction(I);
+      }
+   }
 
-        free(collects);
+   free(collects);
 
-        bi_index *replacement = calloc(sizeof(bi_index), ctx->ssa_alloc);
+   bi_index *replacement = calloc(sizeof(bi_index), ctx->ssa_alloc);
 
-        bi_foreach_instr_global_safe(ctx, ins) {
-                if (ins->op == BI_OPCODE_MOV_I32 && ins->src[0].type != BI_INDEX_REGISTER) {
-                        bi_index replace = ins->src[0];
+   bi_foreach_instr_global_safe(ctx, ins) {
+      if (ins->op == BI_OPCODE_MOV_I32 &&
+          ins->src[0].type != BI_INDEX_REGISTER) {
+         bi_index replace = ins->src[0];
 
-                        /* Peek through one layer so copyprop converges in one
-                         * iteration for chained moves */
-                        if (bi_is_ssa(replace)) {
-                                bi_index chained = replacement[replace.value];
+         /* Peek through one layer so copyprop converges in one
+          * iteration for chained moves */
+         if (bi_is_ssa(replace)) {
+            bi_index chained = replacement[replace.value];
 
-                                if (!bi_is_null(chained))
-                                        replace = chained;
-                        }
+            if (!bi_is_null(chained))
+               replace = chained;
+         }
 
-                        assert(ins->nr_dests == 1);
-                        replacement[ins->dest[0].value] = replace;
-                }
+         assert(ins->nr_dests == 1);
+         replacement[ins->dest[0].value] = replace;
+      }
 
-                bi_foreach_src(ins, s) {
-                        bi_index use = ins->src[s];
+      bi_foreach_src(ins, s) {
+         bi_index use = ins->src[s];
 
-                        if (use.type != BI_INDEX_NORMAL) continue;
-                        if (bi_is_staging_src(ins, s)) continue;
+         if (use.type != BI_INDEX_NORMAL)
+            continue;
+         if (bi_is_staging_src(ins, s))
+            continue;
 
-                        bi_index repl = replacement[use.value];
+         bi_index repl = replacement[use.value];
 
-                        if (repl.type == BI_INDEX_CONSTANT && bi_reads_fau(ins))
-                                continue;
+         if (repl.type == BI_INDEX_CONSTANT && bi_reads_fau(ins))
+            continue;
 
-                        if (!bi_is_null(repl))
-                                bi_replace_src(ins, s, repl);
-                }
-        }
+         if (!bi_is_null(repl))
+            bi_replace_src(ins, s, repl);
+      }
+   }
 
-        free(replacement);
+   free(replacement);
 }
diff --git a/src/panfrost/bifrost/bi_opt_cse.c b/src/panfrost/bifrost/bi_opt_cse.c
index 4ffc9475e5a..40ef1877ab1 100644
--- a/src/panfrost/bifrost/bi_opt_cse.c
+++ b/src/panfrost/bifrost/bi_opt_cse.c
@@ -22,8 +22,8 @@
  * SOFTWARE.
  */
 
-#include "compiler.h"
 #include "bi_builder.h"
+#include "compiler.h"
 
 #define XXH_INLINE_ALL
 #include "util/xxhash.h"
@@ -36,85 +36,88 @@
 static inline uint32_t
 HASH(uint32_t hash, unsigned data)
 {
-        return XXH32(&data, sizeof(data), hash);
+   return XXH32(&data, sizeof(data), hash);
 }
 
 static uint32_t
 hash_index(uint32_t hash, bi_index index)
 {
-        hash = HASH(hash, index.value);
-        hash = HASH(hash, index.abs);
-        hash = HASH(hash, index.neg);
-        hash = HASH(hash, index.swizzle);
-        hash = HASH(hash, index.offset);
-        hash = HASH(hash, index.type);
-        return hash;
+   hash = HASH(hash, index.value);
+   hash = HASH(hash, index.abs);
+   hash = HASH(hash, index.neg);
+   hash = HASH(hash, index.swizzle);
+   hash = HASH(hash, index.offset);
+   hash = HASH(hash, index.type);
+   return hash;
 }
 
 /* Hash an ALU instruction. */
 static uint32_t
 hash_instr(const void *data)
 {
-        const bi_instr *I = data;
-        uint32_t hash = 0;
+   const bi_instr *I = data;
+   uint32_t hash = 0;
 
-        hash = HASH(hash, I->op);
-        hash = HASH(hash, I->nr_dests);
-        hash = HASH(hash, I->nr_srcs);
+   hash = HASH(hash, I->op);
+   hash = HASH(hash, I->nr_dests);
+   hash = HASH(hash, I->nr_srcs);
 
-        assert(!I->flow && !I->slot && "CSE must be early");
+   assert(!I->flow && !I->slot && "CSE must be early");
 
-        /* Explcitly skip destinations, except for size details */
-        bi_foreach_dest(I, d) {
-                hash = HASH(hash, I->dest[d].swizzle);
-        }
+   /* Explcitly skip destinations, except for size details */
+   bi_foreach_dest(I, d) {
+      hash = HASH(hash, I->dest[d].swizzle);
+   }
 
-        bi_foreach_src(I, s) {
-                hash = hash_index(hash, I->src[s]);
-        }
+   bi_foreach_src(I, s) {
+      hash = hash_index(hash, I->src[s]);
+   }
 
-        /* Explicitly skip branch, regfmt, vecsize, no_spill, tdd, table */
-        hash = HASH(hash, I->dest_mod);
+   /* Explicitly skip branch, regfmt, vecsize, no_spill, tdd, table */
+   hash = HASH(hash, I->dest_mod);
 
-        /* Explicitly skip other immediates */
-        hash = HASH(hash, I->shift);
+   /* Explicitly skip other immediates */
+   hash = HASH(hash, I->shift);
 
-        for (unsigned i = 0; i < ARRAY_SIZE(I->flags); ++i)
-                hash = HASH(hash, I->flags[i]);
+   for (unsigned i = 0; i < ARRAY_SIZE(I->flags); ++i)
+      hash = HASH(hash, I->flags[i]);
 
-        return hash;
+   return hash;
 }
 
 static bool
 instrs_equal(const void *_i1, const void *_i2)
 {
-        const bi_instr *i1 = _i1, *i2 = _i2;
+   const bi_instr *i1 = _i1, *i2 = _i2;
 
-        if (i1->op != i2->op) return false;
-        if (i1->nr_srcs != i2->nr_srcs) return false;
-        if (i1->nr_dests != i2->nr_dests) return false;
+   if (i1->op != i2->op)
+      return false;
+   if (i1->nr_srcs != i2->nr_srcs)
+      return false;
+   if (i1->nr_dests != i2->nr_dests)
+      return false;
 
-        /* Explicitly skip destinations */
+   /* Explicitly skip destinations */
 
-        bi_foreach_src(i1, s) {
-                bi_index s1 = i1->src[s], s2 = i2->src[s];
+   bi_foreach_src(i1, s) {
+      bi_index s1 = i1->src[s], s2 = i2->src[s];
 
-                if (memcmp(&s1, &s2, sizeof(s1)) != 0)
-                        return false;
-	}
+      if (memcmp(&s1, &s2, sizeof(s1)) != 0)
+         return false;
+   }
 
-        if (i1->dest_mod != i2->dest_mod)
-                return false;
+   if (i1->dest_mod != i2->dest_mod)
+      return false;
 
-        if (i1->shift != i2->shift)
-                return false;
+   if (i1->shift != i2->shift)
+      return false;
 
-        for (unsigned i = 0; i < ARRAY_SIZE(i1->flags); ++i) {
-                if (i1->flags[i] != i2->flags[i])
-                        return false;
-        }
+   for (unsigned i = 0; i < ARRAY_SIZE(i1->flags); ++i) {
+      if (i1->flags[i] != i2->flags[i])
+         return false;
+   }
 
-	return true;
+   return true;
 }
 
 /* Determines what instructions the above routines have to handle */
@@ -122,64 +125,64 @@ instrs_equal(const void *_i1, const void *_i2)
 static bool
 instr_can_cse(const bi_instr *I)
 {
-        switch (I->op)  {
-        case BI_OPCODE_DTSEL_IMM:
-        case BI_OPCODE_DISCARD_F32:
-                return false;
-        default:
-                break;
-        }
+   switch (I->op) {
+   case BI_OPCODE_DTSEL_IMM:
+   case BI_OPCODE_DISCARD_F32:
+      return false;
+   default:
+      break;
+   }
 
-        /* Be conservative about which message-passing instructions we CSE,
-         * since most are not pure even within a thread.
-         */
-        if (bi_opcode_props[I->op].message && I->op != BI_OPCODE_LEA_BUF_IMM)
-                return false;
+   /* Be conservative about which message-passing instructions we CSE,
+    * since most are not pure even within a thread.
+    */
+   if (bi_opcode_props[I->op].message && I->op != BI_OPCODE_LEA_BUF_IMM)
+      return false;
 
-        if (I->branch_target)
-                return false;
+   if (I->branch_target)
+      return false;
 
-        return true;
+   return true;
 }
 
 void
 bi_opt_cse(bi_context *ctx)
 {
-        struct set *instr_set = _mesa_set_create(NULL, hash_instr, instrs_equal);
+   struct set *instr_set = _mesa_set_create(NULL, hash_instr, instrs_equal);
 
-        bi_foreach_block(ctx, block) {
-                bi_index *replacement = calloc(sizeof(bi_index), ctx->ssa_alloc);
-                _mesa_set_clear(instr_set, NULL);
+   bi_foreach_block(ctx, block) {
+      bi_index *replacement = calloc(sizeof(bi_index), ctx->ssa_alloc);
+      _mesa_set_clear(instr_set, NULL);
 
-                bi_foreach_instr_in_block(block, instr) {
-                        /* Rewrite before trying to CSE anything so we converge
-                         * locally in one iteration */
-                        bi_foreach_ssa_src(instr, s) {
-                                if (bi_is_staging_src(instr, s))
-                                        continue;
+      bi_foreach_instr_in_block(block, instr) {
+         /* Rewrite before trying to CSE anything so we converge
+          * locally in one iteration */
+         bi_foreach_ssa_src(instr, s) {
+            if (bi_is_staging_src(instr, s))
+               continue;
 
-                                bi_index repl = replacement[instr->src[s].value];
-                                if (!bi_is_null(repl))
-                                        bi_replace_src(instr, s, repl);
-                        }
+            bi_index repl = replacement[instr->src[s].value];
+            if (!bi_is_null(repl))
+               bi_replace_src(instr, s, repl);
+         }
 
-                        if (!instr_can_cse(instr))
-                                continue;
+         if (!instr_can_cse(instr))
+            continue;
 
-                        bool found;
-                        struct set_entry *entry =
-                                _mesa_set_search_or_add(instr_set, instr, &found);
-                        if (found) {
-                                const bi_instr *match = entry->key;
+         bool found;
+         struct set_entry *entry =
+            _mesa_set_search_or_add(instr_set, instr, &found);
+         if (found) {
+            const bi_instr *match = entry->key;
 
-                                bi_foreach_dest(instr, d) {
-                                        replacement[instr->dest[d].value] = match->dest[d];
-                                }
-                        }
-                }
+            bi_foreach_dest(instr, d) {
+               replacement[instr->dest[d].value] = match->dest[d];
+            }
+         }
+      }
 
-                free(replacement);
-        }
+      free(replacement);
+   }
 
-        _mesa_set_destroy(instr_set, NULL);
+   _mesa_set_destroy(instr_set, NULL);
 }
diff --git a/src/panfrost/bifrost/bi_opt_dce.c b/src/panfrost/bifrost/bi_opt_dce.c
index d9668f207a9..e8e12dd9525 100644
--- a/src/panfrost/bifrost/bi_opt_dce.c
+++ b/src/panfrost/bifrost/bi_opt_dce.c
@@ -22,66 +22,67 @@
  * SOFTWARE.
  */
 
-#include "compiler.h"
 #include "util/u_memory.h"
+#include "compiler.h"
 
 /* A simple SSA-based mark-and-sweep dead code elimination pass. */
 
 void
 bi_opt_dead_code_eliminate(bi_context *ctx)
 {
-        /* Mark live values */
-        BITSET_WORD *mark = calloc(sizeof(BITSET_WORD), BITSET_WORDS(ctx->ssa_alloc));
+   /* Mark live values */
+   BITSET_WORD *mark =
+      calloc(sizeof(BITSET_WORD), BITSET_WORDS(ctx->ssa_alloc));
 
-        u_worklist worklist;
-        u_worklist_init(&worklist, ctx->num_blocks, NULL);
+   u_worklist worklist;
+   u_worklist_init(&worklist, ctx->num_blocks, NULL);
 
-        bi_foreach_block(ctx, block) {
-                bi_worklist_push_head(&worklist, block);
-        }
+   bi_foreach_block(ctx, block) {
+      bi_worklist_push_head(&worklist, block);
+   }
 
-        while(!u_worklist_is_empty(&worklist)) {
-                /* Pop in reverse order for backwards pass */
-                bi_block *blk = bi_worklist_pop_head(&worklist);
+   while (!u_worklist_is_empty(&worklist)) {
+      /* Pop in reverse order for backwards pass */
+      bi_block *blk = bi_worklist_pop_head(&worklist);
 
-                bool progress = false;
+      bool progress = false;
 
-                bi_foreach_instr_in_block_rev(blk, I) {
-                        bool needed = bi_side_effects(I);
+      bi_foreach_instr_in_block_rev(blk, I) {
+         bool needed = bi_side_effects(I);
 
-                        bi_foreach_dest(I, d)
-                                needed |= BITSET_TEST(mark, I->dest[d].value);
+         bi_foreach_dest(I, d)
+            needed |= BITSET_TEST(mark, I->dest[d].value);
 
-                        if (!needed)
-                                continue;
+         if (!needed)
+            continue;
 
-                        bi_foreach_ssa_src(I, s) {
-                                progress |= !BITSET_TEST(mark, I->src[s].value);
-                                BITSET_SET(mark, I->src[s].value);
-                        }
-                }
+         bi_foreach_ssa_src(I, s) {
+            progress |= !BITSET_TEST(mark, I->src[s].value);
+            BITSET_SET(mark, I->src[s].value);
+         }
+      }
 
-                /* XXX: slow */
-                if (progress) {
-                        bi_foreach_block(ctx, block)
-                                bi_worklist_push_head(&worklist, block);
-                }
-        }
+      /* XXX: slow */
+      if (progress) {
+         bi_foreach_block(ctx, block)
+            bi_worklist_push_head(&worklist, block);
+      }
+   }
 
-        u_worklist_fini(&worklist);
+   u_worklist_fini(&worklist);
 
-        /* Sweep */
-        bi_foreach_instr_global_safe(ctx, I) {
-                bool needed = bi_side_effects(I);
+   /* Sweep */
+   bi_foreach_instr_global_safe(ctx, I) {
+      bool needed = bi_side_effects(I);
 
-                bi_foreach_dest(I, d)
-                        needed |= BITSET_TEST(mark, I->dest[d].value);
+      bi_foreach_dest(I, d)
+         needed |= BITSET_TEST(mark, I->dest[d].value);
 
-                if (!needed)
-                        bi_remove_instruction(I);
-        }
+      if (!needed)
+         bi_remove_instruction(I);
+   }
 
-        free(mark);
+   free(mark);
 }
 
 /* Post-RA liveness-based dead code analysis to clean up results of bundling */
@@ -89,39 +90,39 @@ bi_opt_dead_code_eliminate(bi_context *ctx)
 uint64_t MUST_CHECK
 bi_postra_liveness_ins(uint64_t live, bi_instr *ins)
 {
-        bi_foreach_dest(ins, d) {
-                if (ins->dest[d].type == BI_INDEX_REGISTER) {
-                        unsigned nr = bi_count_write_registers(ins, d);
-                        unsigned reg = ins->dest[d].value;
-                        live &= ~(BITFIELD64_MASK(nr) << reg);
-                }
-        }
+   bi_foreach_dest(ins, d) {
+      if (ins->dest[d].type == BI_INDEX_REGISTER) {
+         unsigned nr = bi_count_write_registers(ins, d);
+         unsigned reg = ins->dest[d].value;
+         live &= ~(BITFIELD64_MASK(nr) << reg);
+      }
+   }
 
-        bi_foreach_src(ins, s) {
-                if (ins->src[s].type == BI_INDEX_REGISTER) {
-                        unsigned nr = bi_count_read_registers(ins, s);
-                        unsigned reg = ins->src[s].value;
-                        live |= (BITFIELD64_MASK(nr) << reg);
-                }
-        }
+   bi_foreach_src(ins, s) {
+      if (ins->src[s].type == BI_INDEX_REGISTER) {
+         unsigned nr = bi_count_read_registers(ins, s);
+         unsigned reg = ins->src[s].value;
+         live |= (BITFIELD64_MASK(nr) << reg);
+      }
+   }
 
-        return live;
+   return live;
 }
 
 static bool
 bi_postra_liveness_block(bi_block *blk)
 {
-        bi_foreach_successor(blk, succ)
-                blk->reg_live_out |= succ->reg_live_in;
+   bi_foreach_successor(blk, succ)
+      blk->reg_live_out |= succ->reg_live_in;
 
-        uint64_t live = blk->reg_live_out;
+   uint64_t live = blk->reg_live_out;
 
-        bi_foreach_instr_in_block_rev(blk, ins)
-                live = bi_postra_liveness_ins(live, ins);
+   bi_foreach_instr_in_block_rev(blk, ins)
+      live = bi_postra_liveness_ins(live, ins);
 
-        bool progress = blk->reg_live_in != live;
-        blk->reg_live_in = live;
-        return progress;
+   bool progress = blk->reg_live_in != live;
+   blk->reg_live_in = live;
+   return progress;
 }
 
 /* Globally, liveness analysis uses a fixed-point algorithm based on a
@@ -133,58 +134,58 @@ bi_postra_liveness_block(bi_block *blk)
 void
 bi_postra_liveness(bi_context *ctx)
 {
-        u_worklist worklist;
-        bi_worklist_init(ctx, &worklist);
+   u_worklist worklist;
+   bi_worklist_init(ctx, &worklist);
 
-        bi_foreach_block(ctx, block) {
-                block->reg_live_out = block->reg_live_in = 0;
+   bi_foreach_block(ctx, block) {
+      block->reg_live_out = block->reg_live_in = 0;
 
-                bi_worklist_push_tail(&worklist, block);
-        }
+      bi_worklist_push_tail(&worklist, block);
+   }
 
-        while (!u_worklist_is_empty(&worklist)) {
-                /* Pop off in reverse order since liveness is backwards */
-                bi_block *blk = bi_worklist_pop_tail(&worklist);
+   while (!u_worklist_is_empty(&worklist)) {
+      /* Pop off in reverse order since liveness is backwards */
+      bi_block *blk = bi_worklist_pop_tail(&worklist);
 
-                /* Update liveness information. If we made progress, we need to
-                 * reprocess the predecessors
-                 */
-                if (bi_postra_liveness_block(blk)) {
-                        bi_foreach_predecessor(blk, pred)
-                                bi_worklist_push_head(&worklist, *pred);
-                }
-        }
+      /* Update liveness information. If we made progress, we need to
+       * reprocess the predecessors
+       */
+      if (bi_postra_liveness_block(blk)) {
+         bi_foreach_predecessor(blk, pred)
+            bi_worklist_push_head(&worklist, *pred);
+      }
+   }
 
-        u_worklist_fini(&worklist);
+   u_worklist_fini(&worklist);
 }
 
 void
 bi_opt_dce_post_ra(bi_context *ctx)
 {
-        bi_postra_liveness(ctx);
+   bi_postra_liveness(ctx);
 
-        bi_foreach_block_rev(ctx, block) {
-                uint64_t live = block->reg_live_out;
+   bi_foreach_block_rev(ctx, block) {
+      uint64_t live = block->reg_live_out;
 
-                bi_foreach_instr_in_block_rev(block, ins) {
-                        if (ins->op == BI_OPCODE_DTSEL_IMM)
-                                ins->dest[0] = bi_null();
+      bi_foreach_instr_in_block_rev(block, ins) {
+         if (ins->op == BI_OPCODE_DTSEL_IMM)
+            ins->dest[0] = bi_null();
 
-                        bi_foreach_dest(ins, d) {
-                                if (ins->dest[d].type != BI_INDEX_REGISTER)
-                                        continue;
+         bi_foreach_dest(ins, d) {
+            if (ins->dest[d].type != BI_INDEX_REGISTER)
+               continue;
 
-                                unsigned nr = bi_count_write_registers(ins, d);
-                                unsigned reg = ins->dest[d].value;
-                                uint64_t mask = (BITFIELD64_MASK(nr) << reg);
-                                bool cullable = (ins->op != BI_OPCODE_BLEND);
-                                cullable &= !bi_opcode_props[ins->op].sr_write;
+            unsigned nr = bi_count_write_registers(ins, d);
+            unsigned reg = ins->dest[d].value;
+            uint64_t mask = (BITFIELD64_MASK(nr) << reg);
+            bool cullable = (ins->op != BI_OPCODE_BLEND);
+            cullable &= !bi_opcode_props[ins->op].sr_write;
 
-                                if (!(live & mask) && cullable)
-                                        ins->dest[d] = bi_null();
-                        }
+            if (!(live & mask) && cullable)
+               ins->dest[d] = bi_null();
+         }
 
-                        live = bi_postra_liveness_ins(live, ins);
-                }
-        }
+         live = bi_postra_liveness_ins(live, ins);
+      }
+   }
 }
diff --git a/src/panfrost/bifrost/bi_opt_dual_tex.c b/src/panfrost/bifrost/bi_opt_dual_tex.c
index 65fbd355949..44f4dddffba 100644
--- a/src/panfrost/bifrost/bi_opt_dual_tex.c
+++ b/src/panfrost/bifrost/bi_opt_dual_tex.c
@@ -21,8 +21,8 @@
  * SOFTWARE.
  */
 
-#include "compiler.h"
 #include "bi_builder.h"
+#include "compiler.h"
 
 #define XXH_INLINE_ALL
 #include "util/xxhash.h"
@@ -51,58 +51,60 @@
 static inline bool
 bi_can_fuse_dual_tex(bi_instr *I, bool fuse_zero_lod)
 {
-        return (I->op == BI_OPCODE_TEXS_2D_F32 || I->op == BI_OPCODE_TEXS_2D_F16) &&
-               (I->texture_index < 4 && I->sampler_index < 4) &&
-               (I->lod_mode == fuse_zero_lod);
+   return (I->op == BI_OPCODE_TEXS_2D_F32 || I->op == BI_OPCODE_TEXS_2D_F16) &&
+          (I->texture_index < 4 && I->sampler_index < 4) &&
+          (I->lod_mode == fuse_zero_lod);
 }
 
 static enum bifrost_texture_format
 bi_format_for_texs_2d(enum bi_opcode op)
 {
-        switch (op) {
-        case BI_OPCODE_TEXS_2D_F32: return BIFROST_TEXTURE_FORMAT_F32;
-        case BI_OPCODE_TEXS_2D_F16: return BIFROST_TEXTURE_FORMAT_F16;
-        default:                    unreachable("Invalid TEXS_2D instruction");
-        }
+   switch (op) {
+   case BI_OPCODE_TEXS_2D_F32:
+      return BIFROST_TEXTURE_FORMAT_F32;
+   case BI_OPCODE_TEXS_2D_F16:
+      return BIFROST_TEXTURE_FORMAT_F16;
+   default:
+      unreachable("Invalid TEXS_2D instruction");
+   }
 }
 
 static void
 bi_fuse_dual(bi_context *ctx, bi_instr *I1, bi_instr *I2)
 {
-        /* Construct a texture operation descriptor for the dual texture */
-        struct bifrost_dual_texture_operation desc = {
-                .mode = BIFROST_TEXTURE_OPERATION_DUAL,
+   /* Construct a texture operation descriptor for the dual texture */
+   struct bifrost_dual_texture_operation desc = {
+      .mode = BIFROST_TEXTURE_OPERATION_DUAL,
 
-                .primary_texture_index = I1->texture_index,
-                .primary_sampler_index = I1->sampler_index,
-                .primary_format = bi_format_for_texs_2d(I1->op),
-                .primary_mask = 0xF,
+      .primary_texture_index = I1->texture_index,
+      .primary_sampler_index = I1->sampler_index,
+      .primary_format = bi_format_for_texs_2d(I1->op),
+      .primary_mask = 0xF,
 
-                .secondary_texture_index = I2->texture_index,
-                .secondary_sampler_index = I2->sampler_index,
-                .secondary_format = bi_format_for_texs_2d(I2->op),
-                .secondary_mask = 0xF,
-        };
+      .secondary_texture_index = I2->texture_index,
+      .secondary_sampler_index = I2->sampler_index,
+      .secondary_format = bi_format_for_texs_2d(I2->op),
+      .secondary_mask = 0xF,
+   };
 
-        /* LOD mode is implied in a shader stage */
-        assert(I1->lod_mode == I2->lod_mode);
+   /* LOD mode is implied in a shader stage */
+   assert(I1->lod_mode == I2->lod_mode);
 
-        /* Insert before the earlier instruction in case its result is consumed
-         * before the later instruction
-         */
-        bi_builder b = bi_init_builder(ctx, bi_before_instr(I1));
+   /* Insert before the earlier instruction in case its result is consumed
+    * before the later instruction
+    */
+   bi_builder b = bi_init_builder(ctx, bi_before_instr(I1));
 
-        bi_instr *I = bi_texc_dual_to(&b,
-                        I1->dest[0], I2->dest[0], bi_null(), /* staging */
-                        I1->src[0], I1->src[1], /* coordinates */
-                        bi_imm_u32(bi_dual_tex_as_u32(desc)), I1->lod_mode,
-                        bi_count_write_registers(I1, 0),
-                        bi_count_write_registers(I2, 0));
+   bi_instr *I = bi_texc_dual_to(
+      &b, I1->dest[0], I2->dest[0], bi_null(), /* staging */
+      I1->src[0], I1->src[1],                  /* coordinates */
+      bi_imm_u32(bi_dual_tex_as_u32(desc)), I1->lod_mode,
+      bi_count_write_registers(I1, 0), bi_count_write_registers(I2, 0));
 
-        I->skip = I1->skip && I2->skip;
+   I->skip = I1->skip && I2->skip;
 
-        bi_remove_instruction(I1);
-        bi_remove_instruction(I2);
+   bi_remove_instruction(I1);
+   bi_remove_instruction(I2);
 }
 
 #define HASH(hash, data) XXH32(&(data), sizeof(data), hash)
@@ -110,45 +112,45 @@ bi_fuse_dual(bi_context *ctx, bi_instr *I1, bi_instr *I2)
 static uint32_t
 coord_hash(const void *key)
 {
-        const bi_instr *I = key;
+   const bi_instr *I = key;
 
-        return XXH32(&I->src[0], sizeof(I->src[0]) + sizeof(I->src[1]), 0);
+   return XXH32(&I->src[0], sizeof(I->src[0]) + sizeof(I->src[1]), 0);
 }
 
 static bool
 coord_equal(const void *key1, const void *key2)
 {
-        const bi_instr *I = key1;
-        const bi_instr *J = key2;
+   const bi_instr *I = key1;
+   const bi_instr *J = key2;
 
-        return memcmp(&I->src[0], &J->src[0],
-                      sizeof(I->src[0]) + sizeof(I->src[1])) == 0;
+   return memcmp(&I->src[0], &J->src[0],
+                 sizeof(I->src[0]) + sizeof(I->src[1])) == 0;
 }
 
 static void
 bi_opt_fuse_dual_texture_block(bi_context *ctx, bi_block *block)
 {
-        struct set *set = _mesa_set_create(ctx, coord_hash, coord_equal);
-        bool fuse_zero_lod = (ctx->stage != MESA_SHADER_FRAGMENT);
-        bool found = false;
+   struct set *set = _mesa_set_create(ctx, coord_hash, coord_equal);
+   bool fuse_zero_lod = (ctx->stage != MESA_SHADER_FRAGMENT);
+   bool found = false;
 
-        bi_foreach_instr_in_block_safe(block, I) {
-                if (!bi_can_fuse_dual_tex(I, fuse_zero_lod)) continue;
+   bi_foreach_instr_in_block_safe(block, I) {
+      if (!bi_can_fuse_dual_tex(I, fuse_zero_lod))
+         continue;
 
-                struct set_entry *ent = _mesa_set_search_or_add(set, I, &found);
+      struct set_entry *ent = _mesa_set_search_or_add(set, I, &found);
 
-                if (found) {
-                        bi_fuse_dual(ctx, (bi_instr *) ent->key, I);
-                        _mesa_set_remove(set, ent);
-                }
-        }
+      if (found) {
+         bi_fuse_dual(ctx, (bi_instr *)ent->key, I);
+         _mesa_set_remove(set, ent);
+      }
+   }
 }
 
 void
 bi_opt_fuse_dual_texture(bi_context *ctx)
 {
-        bi_foreach_block(ctx, block) {
-                bi_opt_fuse_dual_texture_block(ctx, block);
-        }
-
+   bi_foreach_block(ctx, block) {
+      bi_opt_fuse_dual_texture_block(ctx, block);
+   }
 }
diff --git a/src/panfrost/bifrost/bi_opt_message_preload.c b/src/panfrost/bifrost/bi_opt_message_preload.c
index 1ca283db1c0..80ebc57ff31 100644
--- a/src/panfrost/bifrost/bi_opt_message_preload.c
+++ b/src/panfrost/bifrost/bi_opt_message_preload.c
@@ -21,8 +21,8 @@
  * SOFTWARE.
  */
 
-#include "compiler.h"
 #include "bi_builder.h"
+#include "compiler.h"
 
 /* Bifrost v7 can preload up to two messages of the form:
  *
@@ -35,8 +35,8 @@
 static bool
 bi_is_regfmt_float(enum bi_register_format regfmt)
 {
-        return (regfmt == BI_REGISTER_FORMAT_F32) ||
-                (regfmt == BI_REGISTER_FORMAT_F16);
+   return (regfmt == BI_REGISTER_FORMAT_F32) ||
+          (regfmt == BI_REGISTER_FORMAT_F16);
 }
 
 /*
@@ -46,107 +46,107 @@ bi_is_regfmt_float(enum bi_register_format regfmt)
 static bool
 bi_can_interp_at_sample(bi_instr *I)
 {
-        /* .sample mode with r61 corresponds to per-sample interpolation */
-        if (I->sample == BI_SAMPLE_SAMPLE)
-                return bi_is_value_equiv(I->src[0], bi_register(61));
+   /* .sample mode with r61 corresponds to per-sample interpolation */
+   if (I->sample == BI_SAMPLE_SAMPLE)
+      return bi_is_value_equiv(I->src[0], bi_register(61));
 
-        /* If the shader runs with pixel-frequency shading, .sample is
-         * equivalent to .center, so allow .center
-         *
-         * If the shader runs with sample-frequency shading, .sample and .center
-         * are not equivalent. However, the ESSL 3.20 specification
-         * stipulates in section 4.5 ("Interpolation Qualifiers"):
-         *
-         *    for fragment shader input variables qualified with neither
-         *    centroid nor sample, the value of the assigned variable may be
-         *    interpolated anywhere within the pixel and a single value may be
-         *    assigned to each sample within the pixel, to the extent permitted
-         *    by the OpenGL ES Specification.
-         *
-         * We only produce .center for variables qualified with neither centroid
-         * nor sample, so if .center is specified this section applies. This
-         * suggests that, although per-pixel interpolation is allowed, it is not
-         * mandated ("may" rather than "must" or "should"). Therefore it appears
-         * safe to substitute sample.
-         */
-        return (I->sample == BI_SAMPLE_CENTER);
+   /* If the shader runs with pixel-frequency shading, .sample is
+    * equivalent to .center, so allow .center
+    *
+    * If the shader runs with sample-frequency shading, .sample and .center
+    * are not equivalent. However, the ESSL 3.20 specification
+    * stipulates in section 4.5 ("Interpolation Qualifiers"):
+    *
+    *    for fragment shader input variables qualified with neither
+    *    centroid nor sample, the value of the assigned variable may be
+    *    interpolated anywhere within the pixel and a single value may be
+    *    assigned to each sample within the pixel, to the extent permitted
+    *    by the OpenGL ES Specification.
+    *
+    * We only produce .center for variables qualified with neither centroid
+    * nor sample, so if .center is specified this section applies. This
+    * suggests that, although per-pixel interpolation is allowed, it is not
+    * mandated ("may" rather than "must" or "should"). Therefore it appears
+    * safe to substitute sample.
+    */
+   return (I->sample == BI_SAMPLE_CENTER);
 }
 
 static bool
 bi_can_preload_ld_var(bi_instr *I)
 {
-        return (I->op == BI_OPCODE_LD_VAR_IMM) &&
-                bi_can_interp_at_sample(I) &&
-                bi_is_regfmt_float(I->register_format);
+   return (I->op == BI_OPCODE_LD_VAR_IMM) && bi_can_interp_at_sample(I) &&
+          bi_is_regfmt_float(I->register_format);
 }
 
 static bool
 bi_is_var_tex(enum bi_opcode op)
 {
-        return (op == BI_OPCODE_VAR_TEX_F32) || (op == BI_OPCODE_VAR_TEX_F16);
+   return (op == BI_OPCODE_VAR_TEX_F32) || (op == BI_OPCODE_VAR_TEX_F16);
 }
 
 void
 bi_opt_message_preload(bi_context *ctx)
 {
-        unsigned nr_preload = 0;
+   unsigned nr_preload = 0;
 
-        /* We only preload from the first block */
-        bi_block *block = bi_start_block(&ctx->blocks);
-        bi_builder b = bi_init_builder(ctx, bi_before_nonempty_block(block));
+   /* We only preload from the first block */
+   bi_block *block = bi_start_block(&ctx->blocks);
+   bi_builder b = bi_init_builder(ctx, bi_before_nonempty_block(block));
 
-        bi_foreach_instr_in_block_safe(block, I) {
-                if (I->nr_dests != 1) continue;
+   bi_foreach_instr_in_block_safe(block, I) {
+      if (I->nr_dests != 1)
+         continue;
 
-                struct bifrost_message_preload msg;
+      struct bifrost_message_preload msg;
 
-                if (bi_can_preload_ld_var(I)) {
-                        msg = (struct bifrost_message_preload) {
-                                .enabled = true,
-                                .varying_index = I->varying_index,
-                                .fp16 = (I->register_format == BI_REGISTER_FORMAT_F16),
-                                .num_components = I->vecsize + 1,
-                        };
-                } else if (bi_is_var_tex(I->op)) {
-                        msg = (struct bifrost_message_preload) {
-                                .enabled = true,
-                                .texture = true,
-                                .varying_index = I->varying_index,
-                                .texture_index = I->texture_index,
-                                .fp16 = (I->op == BI_OPCODE_VAR_TEX_F16),
-                                .skip = I->skip,
-                                .zero_lod = I->lod_mode,
-                        };
-                } else {
-                        continue;
-                }
+      if (bi_can_preload_ld_var(I)) {
+         msg = (struct bifrost_message_preload){
+            .enabled = true,
+            .varying_index = I->varying_index,
+            .fp16 = (I->register_format == BI_REGISTER_FORMAT_F16),
+            .num_components = I->vecsize + 1,
+         };
+      } else if (bi_is_var_tex(I->op)) {
+         msg = (struct bifrost_message_preload){
+            .enabled = true,
+            .texture = true,
+            .varying_index = I->varying_index,
+            .texture_index = I->texture_index,
+            .fp16 = (I->op == BI_OPCODE_VAR_TEX_F16),
+            .skip = I->skip,
+            .zero_lod = I->lod_mode,
+         };
+      } else {
+         continue;
+      }
 
-                /* Report the preloading */
-                ctx->info.bifrost->messages[nr_preload] = msg;
+      /* Report the preloading */
+      ctx->info.bifrost->messages[nr_preload] = msg;
 
-                /* Replace with a collect of preloaded registers. The collect
-                 * kills the moves, so the collect is free (it is coalesced).
-                 */
-                b.cursor = bi_before_instr(I);
+      /* Replace with a collect of preloaded registers. The collect
+       * kills the moves, so the collect is free (it is coalesced).
+       */
+      b.cursor = bi_before_instr(I);
 
-                unsigned nr = bi_count_write_registers(I, 0);
-                bi_instr *collect = bi_collect_i32_to(&b, I->dest[0], nr);
+      unsigned nr = bi_count_write_registers(I, 0);
+      bi_instr *collect = bi_collect_i32_to(&b, I->dest[0], nr);
 
-                /* The registers themselves must be preloaded at the start of
-                 * the program. Preloaded registers are coalesced, so these
-                 * moves are free.
-                 */
-                b.cursor = bi_before_block(block);
-                bi_foreach_src(collect, i) {
-                        unsigned reg = (nr_preload * 4) + i;
+      /* The registers themselves must be preloaded at the start of
+       * the program. Preloaded registers are coalesced, so these
+       * moves are free.
+       */
+      b.cursor = bi_before_block(block);
+      bi_foreach_src(collect, i) {
+         unsigned reg = (nr_preload * 4) + i;
 
-                        collect->src[i] = bi_mov_i32(&b, bi_register(reg));
-                }
+         collect->src[i] = bi_mov_i32(&b, bi_register(reg));
+      }
 
-                bi_remove_instruction(I);
+      bi_remove_instruction(I);
 
-                /* Maximum number of preloaded messages */
-                if ((++nr_preload) == 2)
-                        break;
-        }
+      /* Maximum number of preloaded messages */
+      if ((++nr_preload) == 2)
+         break;
+   }
 }
diff --git a/src/panfrost/bifrost/bi_opt_mod_props.c b/src/panfrost/bifrost/bi_opt_mod_props.c
index 4888972353b..2784a326feb 100644
--- a/src/panfrost/bifrost/bi_opt_mod_props.c
+++ b/src/panfrost/bifrost/bi_opt_mod_props.c
@@ -22,8 +22,8 @@
  * SOFTWARE.
  */
 
-#include "compiler.h"
 #include "bi_builder.h"
+#include "compiler.h"
 
 /*
  * Due to a Bifrost encoding restriction, some instructions cannot have an abs
@@ -33,76 +33,76 @@
 static bool
 bi_would_impact_abs(unsigned arch, bi_instr *I, bi_index repl, unsigned s)
 {
-        return (arch <= 8) && I->src[1 - s].abs &&
-               bi_is_word_equiv(I->src[1 - s], repl);
+   return (arch <= 8) && I->src[1 - s].abs &&
+          bi_is_word_equiv(I->src[1 - s], repl);
 }
 
 static bool
 bi_takes_fabs(unsigned arch, bi_instr *I, bi_index repl, unsigned s)
 {
-        switch (I->op) {
-        case BI_OPCODE_FCMP_V2F16:
-        case BI_OPCODE_FMAX_V2F16:
-        case BI_OPCODE_FMIN_V2F16:
-                return !bi_would_impact_abs(arch, I, repl, s);
-        case BI_OPCODE_FADD_V2F16:
-                /*
-                 * For FADD.v2f16, the FMA pipe has the abs encoding hazard,
-                 * while the FADD pipe cannot encode a clamp. Either case in
-                 * isolation can be worked around in the scheduler, but both
-                 * together is impossible to encode. Avoid the hazard.
-                 */
-                return !(I->clamp && bi_would_impact_abs(arch, I, repl, s));
-        case BI_OPCODE_V2F32_TO_V2F16:
-                /* TODO: Needs both match or lower */
-                return false;
-        case BI_OPCODE_FLOG_TABLE_F32:
-                /* TODO: Need to check mode */
-                return false;
-        default:
-                return bi_opcode_props[I->op].abs & BITFIELD_BIT(s);
-        }
+   switch (I->op) {
+   case BI_OPCODE_FCMP_V2F16:
+   case BI_OPCODE_FMAX_V2F16:
+   case BI_OPCODE_FMIN_V2F16:
+      return !bi_would_impact_abs(arch, I, repl, s);
+   case BI_OPCODE_FADD_V2F16:
+      /*
+       * For FADD.v2f16, the FMA pipe has the abs encoding hazard,
+       * while the FADD pipe cannot encode a clamp. Either case in
+       * isolation can be worked around in the scheduler, but both
+       * together is impossible to encode. Avoid the hazard.
+       */
+      return !(I->clamp && bi_would_impact_abs(arch, I, repl, s));
+   case BI_OPCODE_V2F32_TO_V2F16:
+      /* TODO: Needs both match or lower */
+      return false;
+   case BI_OPCODE_FLOG_TABLE_F32:
+      /* TODO: Need to check mode */
+      return false;
+   default:
+      return bi_opcode_props[I->op].abs & BITFIELD_BIT(s);
+   }
 }
 
 static bool
 bi_takes_fneg(unsigned arch, bi_instr *I, unsigned s)
 {
-        switch (I->op) {
-        case BI_OPCODE_CUBE_SSEL:
-        case BI_OPCODE_CUBE_TSEL:
-        case BI_OPCODE_CUBEFACE:
-                /* TODO: Bifrost encoding restriction: need to match or lower */
-                return arch >= 9;
-        case BI_OPCODE_FREXPE_F32:
-        case BI_OPCODE_FREXPE_V2F16:
-        case BI_OPCODE_FLOG_TABLE_F32:
-                /* TODO: Need to check mode */
-                return false;
-        default:
-                return bi_opcode_props[I->op].neg & BITFIELD_BIT(s);
-        }
+   switch (I->op) {
+   case BI_OPCODE_CUBE_SSEL:
+   case BI_OPCODE_CUBE_TSEL:
+   case BI_OPCODE_CUBEFACE:
+      /* TODO: Bifrost encoding restriction: need to match or lower */
+      return arch >= 9;
+   case BI_OPCODE_FREXPE_F32:
+   case BI_OPCODE_FREXPE_V2F16:
+   case BI_OPCODE_FLOG_TABLE_F32:
+      /* TODO: Need to check mode */
+      return false;
+   default:
+      return bi_opcode_props[I->op].neg & BITFIELD_BIT(s);
+   }
 }
 
 static bool
 bi_is_fabsneg(enum bi_opcode op, enum bi_size size)
 {
-        return (size == BI_SIZE_32 && op == BI_OPCODE_FABSNEG_F32) ||
-               (size == BI_SIZE_16 && op == BI_OPCODE_FABSNEG_V2F16);
+   return (size == BI_SIZE_32 && op == BI_OPCODE_FABSNEG_F32) ||
+          (size == BI_SIZE_16 && op == BI_OPCODE_FABSNEG_V2F16);
 }
 
 static enum bi_swizzle
 bi_compose_swizzle_16(enum bi_swizzle a, enum bi_swizzle b)
 {
-        assert(a <= BI_SWIZZLE_H11);
-        assert(b <= BI_SWIZZLE_H11);
+   assert(a <= BI_SWIZZLE_H11);
+   assert(b <= BI_SWIZZLE_H11);
 
-        bool al = (a & BI_SWIZZLE_H10);
-        bool ar = (a & BI_SWIZZLE_H01);
-        bool bl = (b & BI_SWIZZLE_H10);
-        bool br = (b & BI_SWIZZLE_H01);
+   bool al = (a & BI_SWIZZLE_H10);
+   bool ar = (a & BI_SWIZZLE_H01);
+   bool bl = (b & BI_SWIZZLE_H10);
+   bool br = (b & BI_SWIZZLE_H01);
 
-        return ((al ? br : bl) ? BI_SWIZZLE_H10 : 0) |
-               ((ar ? br : bl) ? BI_SWIZZLE_H01 : 0);
+   return ((al ? br : bl) ? BI_SWIZZLE_H10 : 0) |
+          ((ar ? br : bl) ? BI_SWIZZLE_H01 : 0);
 }
 
 /* Like bi_replace_index, but composes instead of overwrites */
@@ -110,17 +110,17 @@ bi_compose_swizzle_16(enum bi_swizzle a, enum bi_swizzle b)
 static inline bi_index
 bi_compose_float_index(bi_index old, bi_index repl)
 {
-        /* abs(-x) = abs(+x) so ignore repl.neg if old.abs is set, otherwise
-         * -(-x) = x but -(+x) = +(-x) so need to exclusive-or the negates */
-        repl.neg = old.neg ^ (repl.neg && !old.abs);
+   /* abs(-x) = abs(+x) so ignore repl.neg if old.abs is set, otherwise
+    * -(-x) = x but -(+x) = +(-x) so need to exclusive-or the negates */
+   repl.neg = old.neg ^ (repl.neg && !old.abs);
 
-        /* +/- abs(+/- abs(x)) = +/- abs(x), etc so just or the two */
-        repl.abs |= old.abs;
+   /* +/- abs(+/- abs(x)) = +/- abs(x), etc so just or the two */
+   repl.abs |= old.abs;
 
-        /* Use the old swizzle to select from the replacement swizzle */
-        repl.swizzle = bi_compose_swizzle_16(old.swizzle, repl.swizzle);
+   /* Use the old swizzle to select from the replacement swizzle */
+   repl.swizzle = bi_compose_swizzle_16(old.swizzle, repl.swizzle);
 
-        return repl;
+   return repl;
 }
 
 /* DISCARD.b32(FCMP.f(x, y)) --> DISCARD.f(x, y) */
@@ -128,30 +128,35 @@ bi_compose_float_index(bi_index old, bi_index repl)
 static inline bool
 bi_fuse_discard_fcmp(bi_context *ctx, bi_instr *I, bi_instr *mod)
 {
-        if (!mod) return false;
-        if (I->op != BI_OPCODE_DISCARD_B32) return false;
-        if (mod->op != BI_OPCODE_FCMP_F32 && mod->op != BI_OPCODE_FCMP_V2F16) return false;
-        if (mod->cmpf >= BI_CMPF_GTLT) return false;
+   if (!mod)
+      return false;
+   if (I->op != BI_OPCODE_DISCARD_B32)
+      return false;
+   if (mod->op != BI_OPCODE_FCMP_F32 && mod->op != BI_OPCODE_FCMP_V2F16)
+      return false;
+   if (mod->cmpf >= BI_CMPF_GTLT)
+      return false;
 
-        /* result_type doesn't matter */
+   /* result_type doesn't matter */
 
-        /* .abs and .neg modifiers allowed on Valhall DISCARD but not Bifrost */
-        bool absneg = mod->src[0].neg || mod->src[0].abs;
-        absneg     |= mod->src[1].neg || mod->src[1].abs;
+   /* .abs and .neg modifiers allowed on Valhall DISCARD but not Bifrost */
+   bool absneg = mod->src[0].neg || mod->src[0].abs;
+   absneg |= mod->src[1].neg || mod->src[1].abs;
 
-        if (ctx->arch <= 8 && absneg) return false;
+   if (ctx->arch <= 8 && absneg)
+      return false;
 
-        enum bi_swizzle r = I->src[0].swizzle;
+   enum bi_swizzle r = I->src[0].swizzle;
 
-        bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
-        I = bi_discard_f32(&b, mod->src[0], mod->src[1], mod->cmpf);
+   bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
+   I = bi_discard_f32(&b, mod->src[0], mod->src[1], mod->cmpf);
 
-        if (mod->op == BI_OPCODE_FCMP_V2F16) {
-                I->src[0].swizzle = bi_compose_swizzle_16(r, I->src[0].swizzle);
-                I->src[1].swizzle = bi_compose_swizzle_16(r, I->src[1].swizzle);
-        }
+   if (mod->op == BI_OPCODE_FCMP_V2F16) {
+      I->src[0].swizzle = bi_compose_swizzle_16(r, I->src[0].swizzle);
+      I->src[1].swizzle = bi_compose_swizzle_16(r, I->src[1].swizzle);
+   }
 
-        return true;
+   return true;
 }
 
 /*
@@ -159,80 +164,80 @@ bi_fuse_discard_fcmp(bi_context *ctx, bi_instr *I, bi_instr *mod)
  * because all 8-bit and 16-bit integers may be represented exactly as fp32.
  */
 struct {
-        enum bi_opcode inner;
-        enum bi_opcode outer;
-        enum bi_opcode replacement;
+   enum bi_opcode inner;
+   enum bi_opcode outer;
+   enum bi_opcode replacement;
 } bi_small_int_patterns[] = {
-        { BI_OPCODE_S8_TO_S32, BI_OPCODE_S32_TO_F32, BI_OPCODE_S8_TO_F32 },
-        { BI_OPCODE_U8_TO_U32, BI_OPCODE_U32_TO_F32, BI_OPCODE_U8_TO_F32 },
-        { BI_OPCODE_U8_TO_U32, BI_OPCODE_S32_TO_F32, BI_OPCODE_U8_TO_F32 },
-        { BI_OPCODE_S16_TO_S32, BI_OPCODE_S32_TO_F32, BI_OPCODE_S16_TO_F32 },
-        { BI_OPCODE_U16_TO_U32, BI_OPCODE_U32_TO_F32, BI_OPCODE_U16_TO_F32 },
-        { BI_OPCODE_U16_TO_U32, BI_OPCODE_S32_TO_F32, BI_OPCODE_U16_TO_F32 },
+   {BI_OPCODE_S8_TO_S32, BI_OPCODE_S32_TO_F32, BI_OPCODE_S8_TO_F32},
+   {BI_OPCODE_U8_TO_U32, BI_OPCODE_U32_TO_F32, BI_OPCODE_U8_TO_F32},
+   {BI_OPCODE_U8_TO_U32, BI_OPCODE_S32_TO_F32, BI_OPCODE_U8_TO_F32},
+   {BI_OPCODE_S16_TO_S32, BI_OPCODE_S32_TO_F32, BI_OPCODE_S16_TO_F32},
+   {BI_OPCODE_U16_TO_U32, BI_OPCODE_U32_TO_F32, BI_OPCODE_U16_TO_F32},
+   {BI_OPCODE_U16_TO_U32, BI_OPCODE_S32_TO_F32, BI_OPCODE_U16_TO_F32},
 };
 
 static inline void
 bi_fuse_small_int_to_f32(bi_instr *I, bi_instr *mod)
 {
-        for (unsigned i = 0; i < ARRAY_SIZE(bi_small_int_patterns); ++i) {
-                if (I->op != bi_small_int_patterns[i].outer)
-                        continue;
-                if (mod->op != bi_small_int_patterns[i].inner)
-                        continue;
+   for (unsigned i = 0; i < ARRAY_SIZE(bi_small_int_patterns); ++i) {
+      if (I->op != bi_small_int_patterns[i].outer)
+         continue;
+      if (mod->op != bi_small_int_patterns[i].inner)
+         continue;
 
-                assert(I->src[0].swizzle == BI_SWIZZLE_H01);
-                I->src[0] = mod->src[0];
-                I->round = BI_ROUND_NONE;
-                I->op = bi_small_int_patterns[i].replacement;
-        }
+      assert(I->src[0].swizzle == BI_SWIZZLE_H01);
+      I->src[0] = mod->src[0];
+      I->round = BI_ROUND_NONE;
+      I->op = bi_small_int_patterns[i].replacement;
+   }
 }
 
 void
 bi_opt_mod_prop_forward(bi_context *ctx)
 {
-        bi_instr **lut = calloc(sizeof(bi_instr *), ctx->ssa_alloc);
+   bi_instr **lut = calloc(sizeof(bi_instr *), ctx->ssa_alloc);
 
-        bi_foreach_instr_global_safe(ctx, I) {
-                /* Try fusing FCMP into DISCARD.b32, building a new DISCARD.f32
-                 * instruction. As this is the only optimization DISCARD is
-                 * involved in, this shortcircuits other processing.
-                 */
-                if (I->op == BI_OPCODE_DISCARD_B32) {
-                        if (bi_is_ssa(I->src[0]) &&
-                            bi_fuse_discard_fcmp(ctx, I, lut[I->src[0].value])) {
-                                bi_remove_instruction(I);
-                        }
+   bi_foreach_instr_global_safe(ctx, I) {
+      /* Try fusing FCMP into DISCARD.b32, building a new DISCARD.f32
+       * instruction. As this is the only optimization DISCARD is
+       * involved in, this shortcircuits other processing.
+       */
+      if (I->op == BI_OPCODE_DISCARD_B32) {
+         if (bi_is_ssa(I->src[0]) &&
+             bi_fuse_discard_fcmp(ctx, I, lut[I->src[0].value])) {
+            bi_remove_instruction(I);
+         }
 
-                        continue;
-                }
+         continue;
+      }
 
-                bi_foreach_dest(I, d) {
-                        lut[I->dest[d].value] = I;
-                }
+      bi_foreach_dest(I, d) {
+         lut[I->dest[d].value] = I;
+      }
 
-                bi_foreach_ssa_src(I, s) {
-                        bi_instr *mod = lut[I->src[s].value];
+      bi_foreach_ssa_src(I, s) {
+         bi_instr *mod = lut[I->src[s].value];
 
-                        if (!mod)
-                                continue;
+         if (!mod)
+            continue;
 
-                        unsigned size = bi_opcode_props[I->op].size;
+         unsigned size = bi_opcode_props[I->op].size;
 
-                        bi_fuse_small_int_to_f32(I, mod);
+         bi_fuse_small_int_to_f32(I, mod);
 
-                        if (bi_is_fabsneg(mod->op, size)) {
-                                if (mod->src[0].abs && !bi_takes_fabs(ctx->arch, I, mod->src[0], s))
-                                        continue;
+         if (bi_is_fabsneg(mod->op, size)) {
+            if (mod->src[0].abs && !bi_takes_fabs(ctx->arch, I, mod->src[0], s))
+               continue;
 
-                                if (mod->src[0].neg && !bi_takes_fneg(ctx->arch, I, s))
-                                        continue;
+            if (mod->src[0].neg && !bi_takes_fneg(ctx->arch, I, s))
+               continue;
 
-                                I->src[s] = bi_compose_float_index(I->src[s], mod->src[0]);
-                        }
-                }
-        }
+            I->src[s] = bi_compose_float_index(I->src[s], mod->src[0]);
+         }
+      }
+   }
 
-        free(lut);
+   free(lut);
 }
 
 /* RSCALE has restrictions on how the clamp may be used, only used for
@@ -241,199 +246,207 @@ bi_opt_mod_prop_forward(bi_context *ctx)
 static bool
 bi_takes_clamp(bi_instr *I)
 {
-        switch (I->op) {
-        case BI_OPCODE_FMA_RSCALE_F32:
-        case BI_OPCODE_FMA_RSCALE_V2F16:
-        case BI_OPCODE_FADD_RSCALE_F32:
-                return false;
-        case BI_OPCODE_FADD_V2F16:
-                /* Encoding restriction */
-                return !(I->src[0].abs && I->src[1].abs &&
-                         bi_is_word_equiv(I->src[0], I->src[1]));
-        default:
-                return bi_opcode_props[I->op].clamp;
-        }
+   switch (I->op) {
+   case BI_OPCODE_FMA_RSCALE_F32:
+   case BI_OPCODE_FMA_RSCALE_V2F16:
+   case BI_OPCODE_FADD_RSCALE_F32:
+      return false;
+   case BI_OPCODE_FADD_V2F16:
+      /* Encoding restriction */
+      return !(I->src[0].abs && I->src[1].abs &&
+               bi_is_word_equiv(I->src[0], I->src[1]));
+   default:
+      return bi_opcode_props[I->op].clamp;
+   }
 }
 
 static bool
 bi_is_fclamp(enum bi_opcode op, enum bi_size size)
 {
-        return (size == BI_SIZE_32 && op == BI_OPCODE_FCLAMP_F32) ||
-               (size == BI_SIZE_16 && op == BI_OPCODE_FCLAMP_V2F16);
+   return (size == BI_SIZE_32 && op == BI_OPCODE_FCLAMP_F32) ||
+          (size == BI_SIZE_16 && op == BI_OPCODE_FCLAMP_V2F16);
 }
 
 static bool
 bi_optimizer_clamp(bi_instr *I, bi_instr *use)
 {
-        if (!bi_is_fclamp(use->op, bi_opcode_props[I->op].size)) return false;
-        if (!bi_takes_clamp(I)) return false;
+   if (!bi_is_fclamp(use->op, bi_opcode_props[I->op].size))
+      return false;
+   if (!bi_takes_clamp(I))
+      return false;
 
-        /* Clamps are bitfields (clamp_m1_1/clamp_0_inf) so composition is OR */
-        I->clamp |= use->clamp;
-        I->dest[0] = use->dest[0];
-        return true;
+   /* Clamps are bitfields (clamp_m1_1/clamp_0_inf) so composition is OR */
+   I->clamp |= use->clamp;
+   I->dest[0] = use->dest[0];
+   return true;
 }
 
 static enum bi_opcode
 bi_sized_mux_op(unsigned size)
 {
-        switch (size) {
-        case  8: return BI_OPCODE_MUX_V4I8;
-        case 16: return BI_OPCODE_MUX_V2I16;
-        case 32: return BI_OPCODE_MUX_I32;
-        default: unreachable("invalid size");
-        }
+   switch (size) {
+   case 8:
+      return BI_OPCODE_MUX_V4I8;
+   case 16:
+      return BI_OPCODE_MUX_V2I16;
+   case 32:
+      return BI_OPCODE_MUX_I32;
+   default:
+      unreachable("invalid size");
+   }
 }
 
 static bool
 bi_is_fixed_mux(bi_instr *I, unsigned size, bi_index v1)
 {
-        return I->op == bi_sized_mux_op(size) &&
-               bi_is_value_equiv(I->src[0], bi_zero()) &&
-               bi_is_value_equiv(I->src[1], v1);
+   return I->op == bi_sized_mux_op(size) &&
+          bi_is_value_equiv(I->src[0], bi_zero()) &&
+          bi_is_value_equiv(I->src[1], v1);
 }
 
 static bool
 bi_takes_int_result_type(enum bi_opcode op)
 {
-        switch (op) {
-        case BI_OPCODE_ICMP_I32:
-        case BI_OPCODE_ICMP_S32:
-        case BI_OPCODE_ICMP_U32:
-        case BI_OPCODE_ICMP_V2I16:
-        case BI_OPCODE_ICMP_V2S16:
-        case BI_OPCODE_ICMP_V2U16:
-        case BI_OPCODE_ICMP_V4I8:
-        case BI_OPCODE_ICMP_V4S8:
-        case BI_OPCODE_ICMP_V4U8:
-        case BI_OPCODE_FCMP_F32:
-        case BI_OPCODE_FCMP_V2F16:
-                return true;
-        default:
-                return false;
-        }
+   switch (op) {
+   case BI_OPCODE_ICMP_I32:
+   case BI_OPCODE_ICMP_S32:
+   case BI_OPCODE_ICMP_U32:
+   case BI_OPCODE_ICMP_V2I16:
+   case BI_OPCODE_ICMP_V2S16:
+   case BI_OPCODE_ICMP_V2U16:
+   case BI_OPCODE_ICMP_V4I8:
+   case BI_OPCODE_ICMP_V4S8:
+   case BI_OPCODE_ICMP_V4U8:
+   case BI_OPCODE_FCMP_F32:
+   case BI_OPCODE_FCMP_V2F16:
+      return true;
+   default:
+      return false;
+   }
 }
 
 static bool
 bi_takes_float_result_type(enum bi_opcode op)
 {
-        return (op == BI_OPCODE_FCMP_F32) ||
-               (op == BI_OPCODE_FCMP_V2F16);
+   return (op == BI_OPCODE_FCMP_F32) || (op == BI_OPCODE_FCMP_V2F16);
 }
 
 /* CMP+MUX -> CMP with result type */
 static bool
 bi_optimizer_result_type(bi_instr *I, bi_instr *mux)
 {
-        if (bi_opcode_props[I->op].size != bi_opcode_props[mux->op].size)
-                return false;
+   if (bi_opcode_props[I->op].size != bi_opcode_props[mux->op].size)
+      return false;
 
-        if (bi_is_fixed_mux(mux, 32, bi_imm_f32(1.0)) ||
-            bi_is_fixed_mux(mux, 16, bi_imm_f16(1.0))) {
+   if (bi_is_fixed_mux(mux, 32, bi_imm_f32(1.0)) ||
+       bi_is_fixed_mux(mux, 16, bi_imm_f16(1.0))) {
 
-                if (!bi_takes_float_result_type(I->op))
-                        return false;
+      if (!bi_takes_float_result_type(I->op))
+         return false;
 
-                I->result_type = BI_RESULT_TYPE_F1;
-        } else if (bi_is_fixed_mux(mux, 32, bi_imm_u32(1)) ||
-                   bi_is_fixed_mux(mux, 16, bi_imm_u16(1)) ||
-                   bi_is_fixed_mux(mux,  8, bi_imm_u8(1))) {
+      I->result_type = BI_RESULT_TYPE_F1;
+   } else if (bi_is_fixed_mux(mux, 32, bi_imm_u32(1)) ||
+              bi_is_fixed_mux(mux, 16, bi_imm_u16(1)) ||
+              bi_is_fixed_mux(mux, 8, bi_imm_u8(1))) {
 
-                if (!bi_takes_int_result_type(I->op))
-                        return false;
+      if (!bi_takes_int_result_type(I->op))
+         return false;
 
-                I->result_type = BI_RESULT_TYPE_I1;
-        } else {
-                return false;
-        }
+      I->result_type = BI_RESULT_TYPE_I1;
+   } else {
+      return false;
+   }
 
-        I->dest[0] = mux->dest[0];
-        return true;
+   I->dest[0] = mux->dest[0];
+   return true;
 }
 
 static bool
 bi_is_var_tex(bi_instr *var, bi_instr *tex)
 {
-        return (var->op == BI_OPCODE_LD_VAR_IMM) &&
-                (tex->op == BI_OPCODE_TEXS_2D_F16 || tex->op == BI_OPCODE_TEXS_2D_F32) &&
-                (var->register_format == BI_REGISTER_FORMAT_F32) &&
-                ((var->sample == BI_SAMPLE_CENTER && var->update == BI_UPDATE_STORE) ||
-                 (var->sample == BI_SAMPLE_NONE && var->update == BI_UPDATE_RETRIEVE)) &&
-                (tex->texture_index == tex->sampler_index) &&
-                (tex->texture_index < 4) &&
-                (var->index < 8);
+   return (var->op == BI_OPCODE_LD_VAR_IMM) &&
+          (tex->op == BI_OPCODE_TEXS_2D_F16 ||
+           tex->op == BI_OPCODE_TEXS_2D_F32) &&
+          (var->register_format == BI_REGISTER_FORMAT_F32) &&
+          ((var->sample == BI_SAMPLE_CENTER &&
+            var->update == BI_UPDATE_STORE) ||
+           (var->sample == BI_SAMPLE_NONE &&
+            var->update == BI_UPDATE_RETRIEVE)) &&
+          (tex->texture_index == tex->sampler_index) &&
+          (tex->texture_index < 4) && (var->index < 8);
 }
 
 static bool
 bi_optimizer_var_tex(bi_context *ctx, bi_instr *var, bi_instr *tex)
 {
-        if (!bi_is_var_tex(var, tex)) return false;
+   if (!bi_is_var_tex(var, tex))
+      return false;
 
-        /* Construct the corresponding VAR_TEX intruction */
-        bi_builder b = bi_init_builder(ctx, bi_after_instr(var));
+   /* Construct the corresponding VAR_TEX intruction */
+   bi_builder b = bi_init_builder(ctx, bi_after_instr(var));
 
-        bi_instr *I = bi_var_tex_f32_to(&b, tex->dest[0], tex->lod_mode,
-                        var->sample, var->update, tex->texture_index, var->index);
-        I->skip = tex->skip;
+   bi_instr *I = bi_var_tex_f32_to(&b, tex->dest[0], tex->lod_mode, var->sample,
+                                   var->update, tex->texture_index, var->index);
+   I->skip = tex->skip;
 
-        if (tex->op == BI_OPCODE_TEXS_2D_F16)
-                I->op = BI_OPCODE_VAR_TEX_F16;
+   if (tex->op == BI_OPCODE_TEXS_2D_F16)
+      I->op = BI_OPCODE_VAR_TEX_F16;
 
-        /* Dead code elimination will clean up for us */
-        return true;
+   /* Dead code elimination will clean up for us */
+   return true;
 }
 
 void
 bi_opt_mod_prop_backward(bi_context *ctx)
 {
-        unsigned count = ctx->ssa_alloc;
-        bi_instr **uses = calloc(count, sizeof(*uses));
-        BITSET_WORD *multiple = calloc(BITSET_WORDS(count), sizeof(*multiple));
+   unsigned count = ctx->ssa_alloc;
+   bi_instr **uses = calloc(count, sizeof(*uses));
+   BITSET_WORD *multiple = calloc(BITSET_WORDS(count), sizeof(*multiple));
 
-        bi_foreach_instr_global_rev(ctx, I) {
-                bi_foreach_ssa_src(I, s) {
-                        unsigned v = I->src[s].value;
+   bi_foreach_instr_global_rev(ctx, I) {
+      bi_foreach_ssa_src(I, s) {
+         unsigned v = I->src[s].value;
 
-                        if (uses[v] && uses[v] != I)
-                                BITSET_SET(multiple, v);
-                        else
-                                uses[v] = I;
-                }
+         if (uses[v] && uses[v] != I)
+            BITSET_SET(multiple, v);
+         else
+            uses[v] = I;
+      }
 
-                if (!I->nr_dests)
-                        continue;
+      if (!I->nr_dests)
+         continue;
 
-                bi_instr *use = uses[I->dest[0].value];
+      bi_instr *use = uses[I->dest[0].value];
 
-                if (!use || BITSET_TEST(multiple, I->dest[0].value))
-                        continue;
+      if (!use || BITSET_TEST(multiple, I->dest[0].value))
+         continue;
 
-                /* Destination has a single use, try to propagate */
-                bool propagated =
-                        bi_optimizer_clamp(I, use) ||
-                        bi_optimizer_result_type(I, use);
+      /* Destination has a single use, try to propagate */
+      bool propagated =
+         bi_optimizer_clamp(I, use) || bi_optimizer_result_type(I, use);
 
-                if (!propagated && I->op == BI_OPCODE_LD_VAR_IMM && use->op == BI_OPCODE_SPLIT_I32) {
-                        /* Need to see through the split in a
-                         * ld_var_imm/split/var_tex  sequence
-                         */
-                        bi_instr *tex = uses[use->dest[0].value];
+      if (!propagated && I->op == BI_OPCODE_LD_VAR_IMM &&
+          use->op == BI_OPCODE_SPLIT_I32) {
+         /* Need to see through the split in a
+          * ld_var_imm/split/var_tex  sequence
+          */
+         bi_instr *tex = uses[use->dest[0].value];
 
-                        if (!tex || BITSET_TEST(multiple, use->dest[0].value))
-                                continue;
+         if (!tex || BITSET_TEST(multiple, use->dest[0].value))
+            continue;
 
-                        use = tex;
-                        propagated = bi_optimizer_var_tex(ctx, I, use);
-                }
+         use = tex;
+         propagated = bi_optimizer_var_tex(ctx, I, use);
+      }
 
-                if (propagated) {
-                        bi_remove_instruction(use);
-                        continue;
-                }
-        }
+      if (propagated) {
+         bi_remove_instruction(use);
+         continue;
+      }
+   }
 
-        free(uses);
-        free(multiple);
+   free(uses);
+   free(multiple);
 }
 
 /*
@@ -443,37 +456,37 @@ bi_opt_mod_prop_backward(bi_context *ctx)
 static bool
 bi_lower_opt_instruction_helper(bi_builder *b, bi_instr *I)
 {
-        bi_instr *repl;
+   bi_instr *repl;
 
-        switch (I->op) {
-        case BI_OPCODE_FABSNEG_F32:
-        case BI_OPCODE_FCLAMP_F32:
-                repl = bi_fadd_f32_to(b, I->dest[0], I->src[0], bi_negzero());
-                repl->clamp = I->clamp;
-                return true;
+   switch (I->op) {
+   case BI_OPCODE_FABSNEG_F32:
+   case BI_OPCODE_FCLAMP_F32:
+      repl = bi_fadd_f32_to(b, I->dest[0], I->src[0], bi_negzero());
+      repl->clamp = I->clamp;
+      return true;
 
-        case BI_OPCODE_FABSNEG_V2F16:
-        case BI_OPCODE_FCLAMP_V2F16:
-                repl = bi_fadd_v2f16_to(b, I->dest[0], I->src[0], bi_negzero());
-                repl->clamp = I->clamp;
-                return true;
+   case BI_OPCODE_FABSNEG_V2F16:
+   case BI_OPCODE_FCLAMP_V2F16:
+      repl = bi_fadd_v2f16_to(b, I->dest[0], I->src[0], bi_negzero());
+      repl->clamp = I->clamp;
+      return true;
 
-        case BI_OPCODE_DISCARD_B32:
-                bi_discard_f32(b, I->src[0], bi_zero(), BI_CMPF_NE);
-                return true;
+   case BI_OPCODE_DISCARD_B32:
+      bi_discard_f32(b, I->src[0], bi_zero(), BI_CMPF_NE);
+      return true;
 
-        default:
-                return false;
-        }
+   default:
+      return false;
+   }
 }
 
 void
 bi_lower_opt_instructions(bi_context *ctx)
 {
-        bi_foreach_instr_global_safe(ctx, I) {
-                bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
+   bi_foreach_instr_global_safe(ctx, I) {
+      bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
 
-                if (bi_lower_opt_instruction_helper(&b, I))
-                        bi_remove_instruction(I);
-        }
+      if (bi_lower_opt_instruction_helper(&b, I))
+         bi_remove_instruction(I);
+   }
 }
diff --git a/src/panfrost/bifrost/bi_opt_push_ubo.c b/src/panfrost/bifrost/bi_opt_push_ubo.c
index 941993d55fb..01f08635076 100644
--- a/src/panfrost/bifrost/bi_opt_push_ubo.c
+++ b/src/panfrost/bifrost/bi_opt_push_ubo.c
@@ -21,8 +21,8 @@
  * SOFTWARE.
  */
 
-#include "compiler.h"
 #include "bi_builder.h"
+#include "compiler.h"
 
 /* This optimization pass, intended to run once after code emission but before
  * copy propagation, analyzes direct word-aligned UBO reads and promotes a
@@ -32,17 +32,16 @@
 static bool
 bi_is_ubo(bi_instr *ins)
 {
-        return (bi_opcode_props[ins->op].message == BIFROST_MESSAGE_LOAD) &&
-                (ins->seg == BI_SEG_UBO);
+   return (bi_opcode_props[ins->op].message == BIFROST_MESSAGE_LOAD) &&
+          (ins->seg == BI_SEG_UBO);
 }
 
 static bool
 bi_is_direct_aligned_ubo(bi_instr *ins)
 {
-        return bi_is_ubo(ins) &&
-                (ins->src[0].type == BI_INDEX_CONSTANT) &&
-                (ins->src[1].type == BI_INDEX_CONSTANT) &&
-                ((ins->src[0].value & 0x3) == 0);
+   return bi_is_ubo(ins) && (ins->src[0].type == BI_INDEX_CONSTANT) &&
+          (ins->src[1].type == BI_INDEX_CONSTANT) &&
+          ((ins->src[0].value & 0x3) == 0);
 }
 
 /* Represents use data for a single UBO */
@@ -50,44 +49,46 @@ bi_is_direct_aligned_ubo(bi_instr *ins)
 #define MAX_UBO_WORDS (65536 / 16)
 
 struct bi_ubo_block {
-        BITSET_DECLARE(pushed, MAX_UBO_WORDS);
-        uint8_t range[MAX_UBO_WORDS];
+   BITSET_DECLARE(pushed, MAX_UBO_WORDS);
+   uint8_t range[MAX_UBO_WORDS];
 };
 
 struct bi_ubo_analysis {
-        /* Per block analysis */
-        unsigned nr_blocks;
-        struct bi_ubo_block *blocks;
+   /* Per block analysis */
+   unsigned nr_blocks;
+   struct bi_ubo_block *blocks;
 };
 
 static struct bi_ubo_analysis
 bi_analyze_ranges(bi_context *ctx)
 {
-        struct bi_ubo_analysis res = {
-                .nr_blocks = ctx->nir->info.num_ubos + 1,
-        };
+   struct bi_ubo_analysis res = {
+      .nr_blocks = ctx->nir->info.num_ubos + 1,
+   };
 
-        res.blocks = calloc(res.nr_blocks, sizeof(struct bi_ubo_block));
+   res.blocks = calloc(res.nr_blocks, sizeof(struct bi_ubo_block));
 
-        bi_foreach_instr_global(ctx, ins) {
-                if (!bi_is_direct_aligned_ubo(ins)) continue;
+   bi_foreach_instr_global(ctx, ins) {
+      if (!bi_is_direct_aligned_ubo(ins))
+         continue;
 
-                unsigned ubo = ins->src[1].value;
-                unsigned word = ins->src[0].value / 4;
-                unsigned channels = bi_opcode_props[ins->op].sr_count;
+      unsigned ubo = ins->src[1].value;
+      unsigned word = ins->src[0].value / 4;
+      unsigned channels = bi_opcode_props[ins->op].sr_count;
 
-                assert(ubo < res.nr_blocks);
-                assert(channels > 0 && channels <= 4);
+      assert(ubo < res.nr_blocks);
+      assert(channels > 0 && channels <= 4);
 
-                if (word >= MAX_UBO_WORDS) continue;
+      if (word >= MAX_UBO_WORDS)
+         continue;
 
-                /* Must use max if the same base is read with different channel
-                 * counts, which is possible with nir_opt_shrink_vectors */
-                uint8_t *range = res.blocks[ubo].range;
-                range[word] = MAX2(range[word], channels);
-        }
+      /* Must use max if the same base is read with different channel
+       * counts, which is possible with nir_opt_shrink_vectors */
+      uint8_t *range = res.blocks[ubo].range;
+      range[word] = MAX2(range[word], channels);
+   }
 
-        return res;
+   return res;
 }
 
 /* Select UBO words to push. A sophisticated implementation would consider the
@@ -97,92 +98,93 @@ bi_analyze_ranges(bi_context *ctx)
 static void
 bi_pick_ubo(struct panfrost_ubo_push *push, struct bi_ubo_analysis *analysis)
 {
-        for (signed ubo = analysis->nr_blocks - 1; ubo >= 0; --ubo) {
-                struct bi_ubo_block *block = &analysis->blocks[ubo];
+   for (signed ubo = analysis->nr_blocks - 1; ubo >= 0; --ubo) {
+      struct bi_ubo_block *block = &analysis->blocks[ubo];
 
-                for (unsigned r = 0; r < MAX_UBO_WORDS; ++r) {
-                        unsigned range = block->range[r];
+      for (unsigned r = 0; r < MAX_UBO_WORDS; ++r) {
+         unsigned range = block->range[r];
 
-                        /* Don't push something we don't access */
-                        if (range == 0) continue;
+         /* Don't push something we don't access */
+         if (range == 0)
+            continue;
 
-                        /* Don't push more than possible */
-                        if (push->count > PAN_MAX_PUSH - range)
-                                return;
+         /* Don't push more than possible */
+         if (push->count > PAN_MAX_PUSH - range)
+            return;
 
-                        for (unsigned offs = 0; offs < range; ++offs) {
-                                struct panfrost_ubo_word word = {
-                                        .ubo = ubo,
-                                        .offset = (r + offs) * 4,
-                                };
+         for (unsigned offs = 0; offs < range; ++offs) {
+            struct panfrost_ubo_word word = {
+               .ubo = ubo,
+               .offset = (r + offs) * 4,
+            };
 
-                                push->words[push->count++] = word;
-                        }
+            push->words[push->count++] = word;
+         }
 
-                        /* Mark it as pushed so we can rewrite */
-                        BITSET_SET(block->pushed, r);
-                }
-        }
+         /* Mark it as pushed so we can rewrite */
+         BITSET_SET(block->pushed, r);
+      }
+   }
 }
 
 void
 bi_opt_push_ubo(bi_context *ctx)
 {
-        struct bi_ubo_analysis analysis = bi_analyze_ranges(ctx);
-        bi_pick_ubo(ctx->info.push, &analysis);
+   struct bi_ubo_analysis analysis = bi_analyze_ranges(ctx);
+   bi_pick_ubo(ctx->info.push, &analysis);
 
-        ctx->ubo_mask = 0;
+   ctx->ubo_mask = 0;
 
-        bi_foreach_instr_global_safe(ctx, ins) {
-                if (!bi_is_ubo(ins)) continue;
+   bi_foreach_instr_global_safe(ctx, ins) {
+      if (!bi_is_ubo(ins))
+         continue;
 
-                unsigned ubo = ins->src[1].value;
-                unsigned offset = ins->src[0].value;
+      unsigned ubo = ins->src[1].value;
+      unsigned offset = ins->src[0].value;
 
-                if (!bi_is_direct_aligned_ubo(ins)) {
-                        /* The load can't be pushed, so this UBO needs to be
-                         * uploaded conventionally */
-                        if (ins->src[1].type == BI_INDEX_CONSTANT)
-                                ctx->ubo_mask |= BITSET_BIT(ubo);
-                        else
-                                ctx->ubo_mask = ~0;
+      if (!bi_is_direct_aligned_ubo(ins)) {
+         /* The load can't be pushed, so this UBO needs to be
+          * uploaded conventionally */
+         if (ins->src[1].type == BI_INDEX_CONSTANT)
+            ctx->ubo_mask |= BITSET_BIT(ubo);
+         else
+            ctx->ubo_mask = ~0;
 
-                        continue;
-                }
+         continue;
+      }
 
-                /* Check if we decided to push this */
-                assert(ubo < analysis.nr_blocks);
-                if (!BITSET_TEST(analysis.blocks[ubo].pushed, offset / 4)) {
-                        ctx->ubo_mask |= BITSET_BIT(ubo);
-                        continue;
-                }
+      /* Check if we decided to push this */
+      assert(ubo < analysis.nr_blocks);
+      if (!BITSET_TEST(analysis.blocks[ubo].pushed, offset / 4)) {
+         ctx->ubo_mask |= BITSET_BIT(ubo);
+         continue;
+      }
 
-                /* Replace the UBO load with moves from FAU */
-                bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));
+      /* Replace the UBO load with moves from FAU */
+      bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));
 
-                unsigned nr = bi_opcode_props[ins->op].sr_count;
-                bi_instr *vec = bi_collect_i32_to(&b, ins->dest[0], nr);
+      unsigned nr = bi_opcode_props[ins->op].sr_count;
+      bi_instr *vec = bi_collect_i32_to(&b, ins->dest[0], nr);
 
-                bi_foreach_src(vec, w) {
-                        /* FAU is grouped in pairs (2 x 4-byte) */
-                        unsigned base =
-                                pan_lookup_pushed_ubo(ctx->info.push, ubo,
-                                                      (offset + 4 * w));
+      bi_foreach_src(vec, w) {
+         /* FAU is grouped in pairs (2 x 4-byte) */
+         unsigned base =
+            pan_lookup_pushed_ubo(ctx->info.push, ubo, (offset + 4 * w));
 
-                        unsigned fau_idx = (base >> 1);
-                        unsigned fau_hi = (base & 1);
+         unsigned fau_idx = (base >> 1);
+         unsigned fau_hi = (base & 1);
 
-                        vec->src[w] = bi_fau(BIR_FAU_UNIFORM | fau_idx, fau_hi);
-                }
+         vec->src[w] = bi_fau(BIR_FAU_UNIFORM | fau_idx, fau_hi);
+      }
 
-                bi_remove_instruction(ins);
-        }
+      bi_remove_instruction(ins);
+   }
 
-        free(analysis.blocks);
+   free(analysis.blocks);
 }
 
 typedef struct {
-        BITSET_DECLARE(row, PAN_MAX_PUSH);
+   BITSET_DECLARE(row, PAN_MAX_PUSH);
 } adjacency_row;
 
 /* Find the connected component containing `node` with depth-first search */
@@ -190,33 +192,32 @@ static void
 bi_find_component(adjacency_row *adjacency, BITSET_WORD *visited,
                   unsigned *component, unsigned *size, unsigned node)
 {
-        unsigned neighbour;
+   unsigned neighbour;
 
-        BITSET_SET(visited, node);
-        component[(*size)++] = node;
+   BITSET_SET(visited, node);
+   component[(*size)++] = node;
 
-        BITSET_FOREACH_SET(neighbour, adjacency[node].row, PAN_MAX_PUSH) {
-                if (!BITSET_TEST(visited, neighbour)) {
-                        bi_find_component(adjacency, visited, component, size,
-                                          neighbour);
-                }
-        }
+   BITSET_FOREACH_SET(neighbour, adjacency[node].row, PAN_MAX_PUSH) {
+      if (!BITSET_TEST(visited, neighbour)) {
+         bi_find_component(adjacency, visited, component, size, neighbour);
+      }
+   }
 }
 
 static bool
 bi_is_uniform(bi_index idx)
 {
-        return (idx.type == BI_INDEX_FAU) && (idx.value & BIR_FAU_UNIFORM);
+   return (idx.type == BI_INDEX_FAU) && (idx.value & BIR_FAU_UNIFORM);
 }
 
 /* Get the index of a uniform in 32-bit words from the start of FAU-RAM */
 static unsigned
 bi_uniform_word(bi_index idx)
 {
-        assert(bi_is_uniform(idx));
-        assert(idx.offset <= 1);
+   assert(bi_is_uniform(idx));
+   assert(idx.offset <= 1);
 
-        return ((idx.value & ~BIR_FAU_UNIFORM) << 1) | idx.offset;
+   return ((idx.value & ~BIR_FAU_UNIFORM) << 1) | idx.offset;
 }
 
 /*
@@ -228,35 +229,35 @@ bi_uniform_word(bi_index idx)
 static void
 bi_create_fau_interference_graph(bi_context *ctx, adjacency_row *adjacency)
 {
-        bi_foreach_instr_global(ctx, I) {
-                unsigned nodes[BI_MAX_SRCS] = {};
-                unsigned node_count = 0;
+   bi_foreach_instr_global(ctx, I) {
+      unsigned nodes[BI_MAX_SRCS] = {};
+      unsigned node_count = 0;
 
-                /* Set nodes[] to 32-bit uniforms accessed */
-                bi_foreach_src(I, s) {
-                        if (bi_is_uniform(I->src[s])) {
-                                unsigned word = bi_uniform_word(I->src[s]);
+      /* Set nodes[] to 32-bit uniforms accessed */
+      bi_foreach_src(I, s) {
+         if (bi_is_uniform(I->src[s])) {
+            unsigned word = bi_uniform_word(I->src[s]);
 
-                                if (word >= ctx->info.push_offset)
-                                        nodes[node_count++] = word;
-                        }
-                }
+            if (word >= ctx->info.push_offset)
+               nodes[node_count++] = word;
+         }
+      }
 
-                /* Create clique connecting nodes[] */
-                for (unsigned i = 0; i < node_count; ++i) {
-                        for (unsigned j = 0; j < node_count; ++j) {
-                                if (i == j)
-                                        continue;
+      /* Create clique connecting nodes[] */
+      for (unsigned i = 0; i < node_count; ++i) {
+         for (unsigned j = 0; j < node_count; ++j) {
+            if (i == j)
+               continue;
 
-                                unsigned x = nodes[i], y = nodes[j];
-                                assert(MAX2(x, y) < ctx->info.push->count);
+            unsigned x = nodes[i], y = nodes[j];
+            assert(MAX2(x, y) < ctx->info.push->count);
 
-                                /* Add undirected edge between the nodes */
-                                BITSET_SET(adjacency[x].row, y);
-                                BITSET_SET(adjacency[y].row, x);
-                        }
-                }
-        }
+            /* Add undirected edge between the nodes */
+            BITSET_SET(adjacency[x].row, y);
+            BITSET_SET(adjacency[y].row, x);
+         }
+      }
+   }
 }
 
 /*
@@ -278,71 +279,72 @@ bi_create_fau_interference_graph(bi_context *ctx, adjacency_row *adjacency)
 void
 bi_opt_reorder_push(bi_context *ctx)
 {
-        adjacency_row adjacency[PAN_MAX_PUSH] = { 0 };
-        BITSET_DECLARE(visited, PAN_MAX_PUSH) = { 0 };
+   adjacency_row adjacency[PAN_MAX_PUSH] = {0};
+   BITSET_DECLARE(visited, PAN_MAX_PUSH) = {0};
 
-        unsigned ordering[PAN_MAX_PUSH] = { 0 };
-        unsigned unpaired[PAN_MAX_PUSH] = { 0 };
-        unsigned pushed = 0, unpaired_count = 0;
+   unsigned ordering[PAN_MAX_PUSH] = {0};
+   unsigned unpaired[PAN_MAX_PUSH] = {0};
+   unsigned pushed = 0, unpaired_count = 0;
 
-        struct panfrost_ubo_push *push = ctx->info.push;
-        unsigned push_offset = ctx->info.push_offset;
+   struct panfrost_ubo_push *push = ctx->info.push;
+   unsigned push_offset = ctx->info.push_offset;
 
-        bi_create_fau_interference_graph(ctx, adjacency);
+   bi_create_fau_interference_graph(ctx, adjacency);
 
-        for (unsigned i = push_offset; i < push->count; ++i) {
-                if (BITSET_TEST(visited, i)) continue;
+   for (unsigned i = push_offset; i < push->count; ++i) {
+      if (BITSET_TEST(visited, i))
+         continue;
 
-                unsigned component[PAN_MAX_PUSH] = { 0 };
-                unsigned size = 0;
-                bi_find_component(adjacency, visited, component, &size, i);
+      unsigned component[PAN_MAX_PUSH] = {0};
+      unsigned size = 0;
+      bi_find_component(adjacency, visited, component, &size, i);
 
-                /* If there is an odd number of uses, at least one use must be
-                 * unpaired. Arbitrarily take the last one.
-                 */
-                if (size % 2)
-                        unpaired[unpaired_count++] = component[--size];
+      /* If there is an odd number of uses, at least one use must be
+       * unpaired. Arbitrarily take the last one.
+       */
+      if (size % 2)
+         unpaired[unpaired_count++] = component[--size];
 
-                /* The rest of uses are paired */
-                assert((size % 2) == 0);
+      /* The rest of uses are paired */
+      assert((size % 2) == 0);
 
-                /* Push the paired uses */
-                memcpy(ordering + pushed, component, sizeof(unsigned) * size);
-                pushed += size;
-        }
+      /* Push the paired uses */
+      memcpy(ordering + pushed, component, sizeof(unsigned) * size);
+      pushed += size;
+   }
 
-        /* Push unpaired nodes at the end */
-        memcpy(ordering + pushed, unpaired, sizeof(unsigned) * unpaired_count);
-        pushed += unpaired_count;
+   /* Push unpaired nodes at the end */
+   memcpy(ordering + pushed, unpaired, sizeof(unsigned) * unpaired_count);
+   pushed += unpaired_count;
 
-        /* Ordering is a permutation. Invert it for O(1) lookup. */
-        unsigned old_to_new[PAN_MAX_PUSH] = { 0 };
+   /* Ordering is a permutation. Invert it for O(1) lookup. */
+   unsigned old_to_new[PAN_MAX_PUSH] = {0};
 
-        for (unsigned i = 0; i < push_offset; ++i) {
-                old_to_new[i] = i;
-        }
+   for (unsigned i = 0; i < push_offset; ++i) {
+      old_to_new[i] = i;
+   }
 
-        for (unsigned i = 0; i < pushed; ++i) {
-                assert(ordering[i] >= push_offset);
-                old_to_new[ordering[i]] = push_offset + i;
-        }
+   for (unsigned i = 0; i < pushed; ++i) {
+      assert(ordering[i] >= push_offset);
+      old_to_new[ordering[i]] = push_offset + i;
+   }
 
-        /* Use new ordering throughout the program */
-        bi_foreach_instr_global(ctx, I) {
-                bi_foreach_src(I, s) {
-                        if (bi_is_uniform(I->src[s])) {
-                                unsigned node = bi_uniform_word(I->src[s]);
-                                unsigned new_node = old_to_new[node];
-                                I->src[s].value = BIR_FAU_UNIFORM | (new_node >> 1);
-                                I->src[s].offset = new_node & 1;
-                        }
-                }
-        }
+   /* Use new ordering throughout the program */
+   bi_foreach_instr_global(ctx, I) {
+      bi_foreach_src(I, s) {
+         if (bi_is_uniform(I->src[s])) {
+            unsigned node = bi_uniform_word(I->src[s]);
+            unsigned new_node = old_to_new[node];
+            I->src[s].value = BIR_FAU_UNIFORM | (new_node >> 1);
+            I->src[s].offset = new_node & 1;
+         }
+      }
+   }
 
-        /* Use new ordering for push */
-        struct panfrost_ubo_push old = *push;
-        for (unsigned i = 0; i < pushed; ++i)
-                push->words[push_offset + i] = old.words[ordering[i]];
+   /* Use new ordering for push */
+   struct panfrost_ubo_push old = *push;
+   for (unsigned i = 0; i < pushed; ++i)
+      push->words[push_offset + i] = old.words[ordering[i]];
 
-        push->count = push_offset + pushed;
+   push->count = push_offset + pushed;
 }
diff --git a/src/panfrost/bifrost/bi_pack.c b/src/panfrost/bifrost/bi_pack.c
index da27a315cbd..7782c07ecd6 100644
--- a/src/panfrost/bifrost/bi_pack.c
+++ b/src/panfrost/bifrost/bi_pack.c
@@ -21,8 +21,8 @@
  * SOFTWARE.
  */
 
-#include "compiler.h"
 #include "bi_quirks.h"
+#include "compiler.h"
 
 /* This file contains the final passes of the compiler. Running after
  * scheduling and RA, the IR is now finalized, so we need to emit it to actual
@@ -31,39 +31,38 @@
 static uint64_t
 bi_pack_header(bi_clause *clause, bi_clause *next_1, bi_clause *next_2)
 {
-        /* next_dependencies are the union of the dependencies of successors'
-         * dependencies */
+   /* next_dependencies are the union of the dependencies of successors'
+    * dependencies */
 
-        unsigned dependency_wait = next_1 ? next_1->dependencies : 0;
-        dependency_wait |= next_2 ? next_2->dependencies : 0;
+   unsigned dependency_wait = next_1 ? next_1->dependencies : 0;
+   dependency_wait |= next_2 ? next_2->dependencies : 0;
 
-        /* Signal barriers (slot #7) immediately. This is not optimal but good
-         * enough. Doing better requires extending the IR and scheduler.
-         */
-        if (clause->message_type == BIFROST_MESSAGE_BARRIER)
-                dependency_wait |= BITFIELD_BIT(7);
+   /* Signal barriers (slot #7) immediately. This is not optimal but good
+    * enough. Doing better requires extending the IR and scheduler.
+    */
+   if (clause->message_type == BIFROST_MESSAGE_BARRIER)
+      dependency_wait |= BITFIELD_BIT(7);
 
-        bool staging_barrier = next_1 ? next_1->staging_barrier : false;
-        staging_barrier |= next_2 ? next_2->staging_barrier : 0;
+   bool staging_barrier = next_1 ? next_1->staging_barrier : false;
+   staging_barrier |= next_2 ? next_2->staging_barrier : 0;
 
-        struct bifrost_header header = {
-                .flow_control =
-                        (next_1 == NULL && next_2 == NULL) ?
-                        BIFROST_FLOW_END :  clause->flow_control,
-                .terminate_discarded_threads = clause->td,
-                .next_clause_prefetch = clause->next_clause_prefetch && next_1,
-                .staging_barrier = staging_barrier,
-                .staging_register = clause->staging_register,
-                .dependency_wait = dependency_wait,
-                .dependency_slot = clause->scoreboard_id,
-                .message_type = clause->message_type,
-                .next_message_type = next_1 ? next_1->message_type : 0,
-                .flush_to_zero = clause->ftz ? BIFROST_FTZ_ALWAYS : BIFROST_FTZ_DISABLE,
-        };
+   struct bifrost_header header = {
+      .flow_control = (next_1 == NULL && next_2 == NULL) ? BIFROST_FLOW_END
+                                                         : clause->flow_control,
+      .terminate_discarded_threads = clause->td,
+      .next_clause_prefetch = clause->next_clause_prefetch && next_1,
+      .staging_barrier = staging_barrier,
+      .staging_register = clause->staging_register,
+      .dependency_wait = dependency_wait,
+      .dependency_slot = clause->scoreboard_id,
+      .message_type = clause->message_type,
+      .next_message_type = next_1 ? next_1->message_type : 0,
+      .flush_to_zero = clause->ftz ? BIFROST_FTZ_ALWAYS : BIFROST_FTZ_DISABLE,
+   };
 
-        uint64_t u = 0;
-        memcpy(&u, &header, sizeof(header));
-        return u;
+   uint64_t u = 0;
+   memcpy(&u, &header, sizeof(header));
+   return u;
 }
 
 /* Assigns a slot for reading, before anything is written */
@@ -71,205 +70,207 @@ bi_pack_header(bi_clause *clause, bi_clause *next_1, bi_clause *next_2)
 static void
 bi_assign_slot_read(bi_registers *regs, bi_index src)
 {
-        /* We only assign for registers */
-        if (src.type != BI_INDEX_REGISTER)
-                return;
+   /* We only assign for registers */
+   if (src.type != BI_INDEX_REGISTER)
+      return;
 
-        /* Check if we already assigned the slot */
-        for (unsigned i = 0; i <= 1; ++i) {
-                if (regs->slot[i] == src.value && regs->enabled[i])
-                        return;
-        }
+   /* Check if we already assigned the slot */
+   for (unsigned i = 0; i <= 1; ++i) {
+      if (regs->slot[i] == src.value && regs->enabled[i])
+         return;
+   }
 
-        if (regs->slot[2] == src.value && regs->slot23.slot2 == BIFROST_OP_READ)
-                return;
+   if (regs->slot[2] == src.value && regs->slot23.slot2 == BIFROST_OP_READ)
+      return;
 
-        /* Assign it now */
+   /* Assign it now */
 
-        for (unsigned i = 0; i <= 1; ++i) {
-                if (!regs->enabled[i]) {
-                        regs->slot[i] = src.value;
-                        regs->enabled[i] = true;
-                        return;
-                }
-        }
+   for (unsigned i = 0; i <= 1; ++i) {
+      if (!regs->enabled[i]) {
+         regs->slot[i] = src.value;
+         regs->enabled[i] = true;
+         return;
+      }
+   }
 
-        if (!regs->slot23.slot3) {
-                regs->slot[2] = src.value;
-                regs->slot23.slot2 = BIFROST_OP_READ;
-                return;
-        }
+   if (!regs->slot23.slot3) {
+      regs->slot[2] = src.value;
+      regs->slot23.slot2 = BIFROST_OP_READ;
+      return;
+   }
 
-        bi_print_slots(regs, stderr);
-        unreachable("Failed to find a free slot for src");
+   bi_print_slots(regs, stderr);
+   unreachable("Failed to find a free slot for src");
 }
 
 static bi_registers
 bi_assign_slots(bi_tuple *now, bi_tuple *prev)
 {
-        /* We assign slots for the main register mechanism. Special ops
-         * use the data registers, which has its own mechanism entirely
-         * and thus gets skipped over here. */
+   /* We assign slots for the main register mechanism. Special ops
+    * use the data registers, which has its own mechanism entirely
+    * and thus gets skipped over here. */
 
-        bool read_dreg = now->add && bi_opcode_props[now->add->op].sr_read;
-        bool write_dreg = prev->add && bi_opcode_props[prev->add->op].sr_write;
+   bool read_dreg = now->add && bi_opcode_props[now->add->op].sr_read;
+   bool write_dreg = prev->add && bi_opcode_props[prev->add->op].sr_write;
 
-        /* First, assign reads */
+   /* First, assign reads */
 
-        if (now->fma)
-                bi_foreach_src(now->fma, src)
-                        bi_assign_slot_read(&now->regs, (now->fma)->src[src]);
+   if (now->fma)
+      bi_foreach_src(now->fma, src)
+         bi_assign_slot_read(&now->regs, (now->fma)->src[src]);
 
-        if (now->add) {
-                bi_foreach_src(now->add, src) {
-                        /* This is not a real source, we shouldn't assign a
-                         * slot for it.
-                         */
-                        if (now->add->op == BI_OPCODE_BLEND && src == 4)
-                                continue;
+   if (now->add) {
+      bi_foreach_src(now->add, src) {
+         /* This is not a real source, we shouldn't assign a
+          * slot for it.
+          */
+         if (now->add->op == BI_OPCODE_BLEND && src == 4)
+            continue;
 
-                        if (!(src == 0 && read_dreg))
-                                bi_assign_slot_read(&now->regs, (now->add)->src[src]);
-                }
-        }
+         if (!(src == 0 && read_dreg))
+            bi_assign_slot_read(&now->regs, (now->add)->src[src]);
+      }
+   }
 
-        /* Next, assign writes. Staging writes are assigned separately, but
-         * +ATEST wants its destination written to both a staging register
-         * _and_ a regular write, because it may not generate a message */
+   /* Next, assign writes. Staging writes are assigned separately, but
+    * +ATEST wants its destination written to both a staging register
+    * _and_ a regular write, because it may not generate a message */
 
-        if (prev->add && prev->add->nr_dests && (!write_dreg || prev->add->op == BI_OPCODE_ATEST)) {
-                bi_index idx = prev->add->dest[0];
+   if (prev->add && prev->add->nr_dests &&
+       (!write_dreg || prev->add->op == BI_OPCODE_ATEST)) {
+      bi_index idx = prev->add->dest[0];
 
-                if (idx.type == BI_INDEX_REGISTER) {
-                        now->regs.slot[3] = idx.value;
-                        now->regs.slot23.slot3 = BIFROST_OP_WRITE;
-                }
-        }
+      if (idx.type == BI_INDEX_REGISTER) {
+         now->regs.slot[3] = idx.value;
+         now->regs.slot23.slot3 = BIFROST_OP_WRITE;
+      }
+   }
 
-        if (prev->fma && prev->fma->nr_dests) {
-                bi_index idx = prev->fma->dest[0];
+   if (prev->fma && prev->fma->nr_dests) {
+      bi_index idx = prev->fma->dest[0];
 
-                if (idx.type == BI_INDEX_REGISTER) {
-                        if (now->regs.slot23.slot3) {
-                                /* Scheduler constraint: cannot read 3 and write 2 */
-                                assert(!now->regs.slot23.slot2);
-                                now->regs.slot[2] = idx.value;
-                                now->regs.slot23.slot2 = BIFROST_OP_WRITE;
-                        } else {
-                                now->regs.slot[3] = idx.value;
-                                now->regs.slot23.slot3 = BIFROST_OP_WRITE;
-                                now->regs.slot23.slot3_fma = true;
-                        }
-                }
-        }
+      if (idx.type == BI_INDEX_REGISTER) {
+         if (now->regs.slot23.slot3) {
+            /* Scheduler constraint: cannot read 3 and write 2 */
+            assert(!now->regs.slot23.slot2);
+            now->regs.slot[2] = idx.value;
+            now->regs.slot23.slot2 = BIFROST_OP_WRITE;
+         } else {
+            now->regs.slot[3] = idx.value;
+            now->regs.slot23.slot3 = BIFROST_OP_WRITE;
+            now->regs.slot23.slot3_fma = true;
+         }
+      }
+   }
 
-        return now->regs;
+   return now->regs;
 }
 
 static enum bifrost_reg_mode
 bi_pack_register_mode(bi_registers r)
 {
-        /* Handle idle as a special case */
-        if (!(r.slot23.slot2 | r.slot23.slot3))
-                return r.first_instruction ? BIFROST_IDLE_1 : BIFROST_IDLE;
+   /* Handle idle as a special case */
+   if (!(r.slot23.slot2 | r.slot23.slot3))
+      return r.first_instruction ? BIFROST_IDLE_1 : BIFROST_IDLE;
 
-        /* Otherwise, use the LUT */
-        for (unsigned i = 0; i < ARRAY_SIZE(bifrost_reg_ctrl_lut); ++i) {
-                if (memcmp(bifrost_reg_ctrl_lut + i, &r.slot23, sizeof(r.slot23)) == 0)
-                        return i;
-        }
+   /* Otherwise, use the LUT */
+   for (unsigned i = 0; i < ARRAY_SIZE(bifrost_reg_ctrl_lut); ++i) {
+      if (memcmp(bifrost_reg_ctrl_lut + i, &r.slot23, sizeof(r.slot23)) == 0)
+         return i;
+   }
 
-        bi_print_slots(&r, stderr);
-        unreachable("Invalid slot assignment");
+   bi_print_slots(&r, stderr);
+   unreachable("Invalid slot assignment");
 }
 
 static uint64_t
 bi_pack_registers(bi_registers regs)
 {
-        enum bifrost_reg_mode mode = bi_pack_register_mode(regs);
-        struct bifrost_regs s = { 0 };
-        uint64_t packed = 0;
+   enum bifrost_reg_mode mode = bi_pack_register_mode(regs);
+   struct bifrost_regs s = {0};
+   uint64_t packed = 0;
 
-        /* Need to pack 5-bit mode as a 4-bit field. The decoder moves bit 3 to bit 4 for
-         * first instruction and adds 16 when reg 2 == reg 3 */
+   /* Need to pack 5-bit mode as a 4-bit field. The decoder moves bit 3 to bit 4
+    * for first instruction and adds 16 when reg 2 == reg 3 */
 
-        unsigned ctrl;
-        bool r2_equals_r3 = false;
+   unsigned ctrl;
+   bool r2_equals_r3 = false;
 
-        if (regs.first_instruction) {
-                /* Bit 3 implicitly must be clear for first instructions.
-                 * The affected patterns all write both ADD/FMA, but that
-                 * is forbidden for the last instruction (whose writes are
-                 * encoded by the first), so this does not add additional
-                 * encoding constraints */
-                assert(!(mode & 0x8));
+   if (regs.first_instruction) {
+      /* Bit 3 implicitly must be clear for first instructions.
+       * The affected patterns all write both ADD/FMA, but that
+       * is forbidden for the last instruction (whose writes are
+       * encoded by the first), so this does not add additional
+       * encoding constraints */
+      assert(!(mode & 0x8));
 
-                /* Move bit 4 to bit 3, since bit 3 is clear */
-                ctrl = (mode & 0x7) | ((mode & 0x10) >> 1);
+      /* Move bit 4 to bit 3, since bit 3 is clear */
+      ctrl = (mode & 0x7) | ((mode & 0x10) >> 1);
 
-                /* If we can let r2 equal r3, we have to or the hardware raises
-                 * INSTR_INVALID_ENC (it's unclear why). */
-                if (!(regs.slot23.slot2 && regs.slot23.slot3))
-                        r2_equals_r3 = true;
-        } else {
-                /* We force r2=r3 or not for the upper bit */
-                ctrl = (mode & 0xF);
-                r2_equals_r3 = (mode & 0x10);
-        }
+      /* If we can let r2 equal r3, we have to or the hardware raises
+       * INSTR_INVALID_ENC (it's unclear why). */
+      if (!(regs.slot23.slot2 && regs.slot23.slot3))
+         r2_equals_r3 = true;
+   } else {
+      /* We force r2=r3 or not for the upper bit */
+      ctrl = (mode & 0xF);
+      r2_equals_r3 = (mode & 0x10);
+   }
 
-        if (regs.enabled[1]) {
-                /* Gotta save that bit!~ Required by the 63-x trick */
-                assert(regs.slot[1] > regs.slot[0]);
-                assert(regs.enabled[0]);
+   if (regs.enabled[1]) {
+      /* Gotta save that bit!~ Required by the 63-x trick */
+      assert(regs.slot[1] > regs.slot[0]);
+      assert(regs.enabled[0]);
 
-                /* Do the 63-x trick, see docs/disasm */
-                if (regs.slot[0] > 31) {
-                        regs.slot[0] = 63 - regs.slot[0];
-                        regs.slot[1] = 63 - regs.slot[1];
-                }
+      /* Do the 63-x trick, see docs/disasm */
+      if (regs.slot[0] > 31) {
+         regs.slot[0] = 63 - regs.slot[0];
+         regs.slot[1] = 63 - regs.slot[1];
+      }
 
-                assert(regs.slot[0] <= 31);
-                assert(regs.slot[1] <= 63);
+      assert(regs.slot[0] <= 31);
+      assert(regs.slot[1] <= 63);
 
-                s.ctrl = ctrl;
-                s.reg1 = regs.slot[1];
-                s.reg0 = regs.slot[0];
-        } else {
-                /* slot 1 disabled, so set to zero and use slot 1 for ctrl */
-                s.ctrl = 0;
-                s.reg1 = ctrl << 2;
+      s.ctrl = ctrl;
+      s.reg1 = regs.slot[1];
+      s.reg0 = regs.slot[0];
+   } else {
+      /* slot 1 disabled, so set to zero and use slot 1 for ctrl */
+      s.ctrl = 0;
+      s.reg1 = ctrl << 2;
 
-                if (regs.enabled[0]) {
-                        /* Bit 0 upper bit of slot 0 */
-                        s.reg1 |= (regs.slot[0] >> 5);
+      if (regs.enabled[0]) {
+         /* Bit 0 upper bit of slot 0 */
+         s.reg1 |= (regs.slot[0] >> 5);
 
-                        /* Rest of slot 0 in usual spot */
-                        s.reg0 = (regs.slot[0] & 0b11111);
-                } else {
-                        /* Bit 1 set if slot 0 also disabled */
-                        s.reg1 |= (1 << 1);
-                }
-        }
+         /* Rest of slot 0 in usual spot */
+         s.reg0 = (regs.slot[0] & 0b11111);
+      } else {
+         /* Bit 1 set if slot 0 also disabled */
+         s.reg1 |= (1 << 1);
+      }
+   }
 
-        /* Force r2 =/!= r3 as needed */
-        if (r2_equals_r3) {
-                assert(regs.slot[3] == regs.slot[2] || !(regs.slot23.slot2 && regs.slot23.slot3));
+   /* Force r2 =/!= r3 as needed */
+   if (r2_equals_r3) {
+      assert(regs.slot[3] == regs.slot[2] ||
+             !(regs.slot23.slot2 && regs.slot23.slot3));
 
-                if (regs.slot23.slot2)
-                        regs.slot[3] = regs.slot[2];
-                else
-                        regs.slot[2] = regs.slot[3];
-        } else if (!regs.first_instruction) {
-                /* Enforced by the encoding anyway */
-                assert(regs.slot[2] != regs.slot[3]);
-        }
+      if (regs.slot23.slot2)
+         regs.slot[3] = regs.slot[2];
+      else
+         regs.slot[2] = regs.slot[3];
+   } else if (!regs.first_instruction) {
+      /* Enforced by the encoding anyway */
+      assert(regs.slot[2] != regs.slot[3]);
+   }
 
-        s.reg2 = regs.slot[2];
-        s.reg3 = regs.slot[3];
-        s.fau_idx = regs.fau_idx;
+   s.reg2 = regs.slot[2];
+   s.reg3 = regs.slot[3];
+   s.fau_idx = regs.fau_idx;
 
-        memcpy(&packed, &s, sizeof(s));
-        return packed;
+   memcpy(&packed, &s, sizeof(s));
+   return packed;
 }
 
 /* We must ensure slot 1 > slot 0 for the 63-x trick to function, so we fix
@@ -278,94 +279,92 @@ bi_pack_registers(bi_registers regs)
 static void
 bi_flip_slots(bi_registers *regs)
 {
-        if (regs->enabled[0] && regs->enabled[1] && regs->slot[1] < regs->slot[0]) {
-                unsigned temp = regs->slot[0];
-                regs->slot[0] = regs->slot[1];
-                regs->slot[1] = temp;
-        }
-
+   if (regs->enabled[0] && regs->enabled[1] && regs->slot[1] < regs->slot[0]) {
+      unsigned temp = regs->slot[0];
+      regs->slot[0] = regs->slot[1];
+      regs->slot[1] = temp;
+   }
 }
 
 static inline enum bifrost_packed_src
 bi_get_src_slot(bi_registers *regs, unsigned reg)
 {
-        if (regs->slot[0] == reg && regs->enabled[0])
-                return BIFROST_SRC_PORT0;
-        else if (regs->slot[1] == reg && regs->enabled[1])
-                return BIFROST_SRC_PORT1;
-        else if (regs->slot[2] == reg && regs->slot23.slot2 == BIFROST_OP_READ)
-                return BIFROST_SRC_PORT2;
-        else
-                unreachable("Tried to access register with no port");
+   if (regs->slot[0] == reg && regs->enabled[0])
+      return BIFROST_SRC_PORT0;
+   else if (regs->slot[1] == reg && regs->enabled[1])
+      return BIFROST_SRC_PORT1;
+   else if (regs->slot[2] == reg && regs->slot23.slot2 == BIFROST_OP_READ)
+      return BIFROST_SRC_PORT2;
+   else
+      unreachable("Tried to access register with no port");
 }
 
 static inline enum bifrost_packed_src
 bi_get_src_new(bi_instr *ins, bi_registers *regs, unsigned s)
 {
-        if (!ins || s >= ins->nr_srcs)
-                return 0;
+   if (!ins || s >= ins->nr_srcs)
+      return 0;
 
-        bi_index src = ins->src[s];
+   bi_index src = ins->src[s];
 
-        if (src.type == BI_INDEX_REGISTER)
-                return bi_get_src_slot(regs, src.value);
-        else if (src.type == BI_INDEX_PASS)
-                return src.value;
-        else {
-                /* TODO make safer */
-                return BIFROST_SRC_STAGE;
-        }
+   if (src.type == BI_INDEX_REGISTER)
+      return bi_get_src_slot(regs, src.value);
+   else if (src.type == BI_INDEX_PASS)
+      return src.value;
+   else {
+      /* TODO make safer */
+      return BIFROST_SRC_STAGE;
+   }
 }
 
 static struct bi_packed_tuple
-bi_pack_tuple(bi_clause *clause, bi_tuple *tuple, bi_tuple *prev, bool first_tuple, gl_shader_stage stage)
+bi_pack_tuple(bi_clause *clause, bi_tuple *tuple, bi_tuple *prev,
+              bool first_tuple, gl_shader_stage stage)
 {
-        bi_assign_slots(tuple, prev);
-        tuple->regs.fau_idx = tuple->fau_idx;
-        tuple->regs.first_instruction = first_tuple;
+   bi_assign_slots(tuple, prev);
+   tuple->regs.fau_idx = tuple->fau_idx;
+   tuple->regs.first_instruction = first_tuple;
 
-        bi_flip_slots(&tuple->regs);
+   bi_flip_slots(&tuple->regs);
 
-        bool sr_read = tuple->add &&
-                bi_opcode_props[(tuple->add)->op].sr_read;
+   bool sr_read = tuple->add && bi_opcode_props[(tuple->add)->op].sr_read;
 
-        uint64_t reg = bi_pack_registers(tuple->regs);
-        uint64_t fma = bi_pack_fma(tuple->fma,
-                        bi_get_src_new(tuple->fma, &tuple->regs, 0),
-                        bi_get_src_new(tuple->fma, &tuple->regs, 1),
-                        bi_get_src_new(tuple->fma, &tuple->regs, 2),
-                        bi_get_src_new(tuple->fma, &tuple->regs, 3));
+   uint64_t reg = bi_pack_registers(tuple->regs);
+   uint64_t fma =
+      bi_pack_fma(tuple->fma, bi_get_src_new(tuple->fma, &tuple->regs, 0),
+                  bi_get_src_new(tuple->fma, &tuple->regs, 1),
+                  bi_get_src_new(tuple->fma, &tuple->regs, 2),
+                  bi_get_src_new(tuple->fma, &tuple->regs, 3));
 
-        uint64_t add = bi_pack_add(tuple->add,
-                        bi_get_src_new(tuple->add, &tuple->regs, sr_read + 0),
-                        bi_get_src_new(tuple->add, &tuple->regs, sr_read + 1),
-                        bi_get_src_new(tuple->add, &tuple->regs, sr_read + 2),
-                        0);
+   uint64_t add = bi_pack_add(
+      tuple->add, bi_get_src_new(tuple->add, &tuple->regs, sr_read + 0),
+      bi_get_src_new(tuple->add, &tuple->regs, sr_read + 1),
+      bi_get_src_new(tuple->add, &tuple->regs, sr_read + 2), 0);
 
-        if (tuple->add) {
-                bi_instr *add = tuple->add;
+   if (tuple->add) {
+      bi_instr *add = tuple->add;
 
-                bool sr_write = bi_opcode_props[add->op].sr_write &&
-                        !bi_is_null(add->dest[0]);
+      bool sr_write =
+         bi_opcode_props[add->op].sr_write && !bi_is_null(add->dest[0]);
 
-                if (sr_read && !bi_is_null(add->src[0])) {
-                        assert(add->src[0].type == BI_INDEX_REGISTER);
-                        clause->staging_register = add->src[0].value;
+      if (sr_read && !bi_is_null(add->src[0])) {
+         assert(add->src[0].type == BI_INDEX_REGISTER);
+         clause->staging_register = add->src[0].value;
 
-                        if (sr_write)
-                                assert(bi_is_equiv(add->src[0], add->dest[0]));
-                } else if (sr_write) {
-                        assert(add->dest[0].type == BI_INDEX_REGISTER);
-                        clause->staging_register = add->dest[0].value;
-                }
-        }
+         if (sr_write)
+            assert(bi_is_equiv(add->src[0], add->dest[0]));
+      } else if (sr_write) {
+         assert(add->dest[0].type == BI_INDEX_REGISTER);
+         clause->staging_register = add->dest[0].value;
+      }
+   }
 
-        struct bi_packed_tuple packed = {
-                .lo = reg | (fma << 35) | ((add & 0b111111) << 58),
-                .hi = add >> 6,
-        };
+   struct bi_packed_tuple packed = {
+      .lo = reg | (fma << 35) | ((add & 0b111111) << 58),
+      .hi = add >> 6,
+   };
 
-        return packed;
+   return packed;
 }
 
 /* A block contains at most one PC-relative constant, from a terminal branch.
@@ -378,357 +377,328 @@ bi_pack_tuple(bi_clause *clause, bi_tuple *tuple, bi_tuple *prev, bool first_tup
 static void
 bi_assign_branch_offset(bi_context *ctx, bi_block *block)
 {
-        if (list_is_empty(&block->clauses))
-                return;
+   if (list_is_empty(&block->clauses))
+      return;
 
-        bi_clause *clause = list_last_entry(&block->clauses, bi_clause, link);
-        bi_instr *br = bi_last_instr_in_clause(clause);
+   bi_clause *clause = list_last_entry(&block->clauses, bi_clause, link);
+   bi_instr *br = bi_last_instr_in_clause(clause);
 
-        if (!br->branch_target)
-                return;
+   if (!br->branch_target)
+      return;
 
-        /* Put it in the high place */
-        int32_t qwords = bi_block_offset(ctx, clause, br->branch_target);
-        int32_t bytes = qwords * 16;
+   /* Put it in the high place */
+   int32_t qwords = bi_block_offset(ctx, clause, br->branch_target);
+   int32_t bytes = qwords * 16;
 
-        /* Copy so we can toy with the sign without undefined behaviour */
-        uint32_t raw = 0;
-        memcpy(&raw, &bytes, sizeof(raw));
+   /* Copy so we can toy with the sign without undefined behaviour */
+   uint32_t raw = 0;
+   memcpy(&raw, &bytes, sizeof(raw));
 
-        /* Clear off top bits for A1/B1 bits */
-        raw &= ~0xF0000000;
+   /* Clear off top bits for A1/B1 bits */
+   raw &= ~0xF0000000;
 
-        /* Put in top 32-bits */
-        assert(clause->pcrel_idx < 8);
-        clause->constants[clause->pcrel_idx] |= ((uint64_t) raw) << 32ull;
+   /* Put in top 32-bits */
+   assert(clause->pcrel_idx < 8);
+   clause->constants[clause->pcrel_idx] |= ((uint64_t)raw) << 32ull;
 }
 
 static void
-bi_pack_constants(unsigned tuple_count, uint64_t *constants,
-                unsigned word_idx, unsigned constant_words, bool ec0_packed,
-                struct util_dynarray *emission)
+bi_pack_constants(unsigned tuple_count, uint64_t *constants, unsigned word_idx,
+                  unsigned constant_words, bool ec0_packed,
+                  struct util_dynarray *emission)
 {
-        unsigned index = (word_idx << 1) + ec0_packed;
+   unsigned index = (word_idx << 1) + ec0_packed;
 
-        /* Do more constants follow */
-        bool more = (word_idx + 1) < constant_words;
+   /* Do more constants follow */
+   bool more = (word_idx + 1) < constant_words;
 
-        /* Indexed first by tuple count and second by constant word number,
-         * indicates the position in the clause */
-        unsigned pos_lookup[8][3] = {
-                { 0 },
-                { 1 },
-                { 3 },
-                { 2, 5 },
-                { 4, 8 },
-                { 7, 11, 14 },
-                { 6, 10, 13 },
-                { 9, 12 },
-        };
+   /* Indexed first by tuple count and second by constant word number,
+    * indicates the position in the clause */
+   unsigned pos_lookup[8][3] = {
+      {0}, {1}, {3}, {2, 5}, {4, 8}, {7, 11, 14}, {6, 10, 13}, {9, 12},
+   };
 
-        /* Compute the pos, and check everything is reasonable */
-        assert((tuple_count - 1) < 8);
-        assert(word_idx < 3);
-        unsigned pos = pos_lookup[tuple_count - 1][word_idx];
-        assert(pos != 0 || (tuple_count == 1 && word_idx == 0));
+   /* Compute the pos, and check everything is reasonable */
+   assert((tuple_count - 1) < 8);
+   assert(word_idx < 3);
+   unsigned pos = pos_lookup[tuple_count - 1][word_idx];
+   assert(pos != 0 || (tuple_count == 1 && word_idx == 0));
 
-        struct bifrost_fmt_constant quad = {
-                .pos = pos,
-                .tag = more ? BIFROST_FMTC_CONSTANTS : BIFROST_FMTC_FINAL,
-                .imm_1 = constants[index + 0] >> 4,
-                .imm_2 = constants[index + 1] >> 4,
-        };
+   struct bifrost_fmt_constant quad = {
+      .pos = pos,
+      .tag = more ? BIFROST_FMTC_CONSTANTS : BIFROST_FMTC_FINAL,
+      .imm_1 = constants[index + 0] >> 4,
+      .imm_2 = constants[index + 1] >> 4,
+   };
 
-        util_dynarray_append(emission, struct bifrost_fmt_constant, quad);
+   util_dynarray_append(emission, struct bifrost_fmt_constant, quad);
 }
 
 uint8_t
 bi_pack_literal(enum bi_clause_subword literal)
 {
-        assert(literal >= BI_CLAUSE_SUBWORD_LITERAL_0);
-        assert(literal <= BI_CLAUSE_SUBWORD_LITERAL_7);
+   assert(literal >= BI_CLAUSE_SUBWORD_LITERAL_0);
+   assert(literal <= BI_CLAUSE_SUBWORD_LITERAL_7);
 
-        return (literal - BI_CLAUSE_SUBWORD_LITERAL_0);
+   return (literal - BI_CLAUSE_SUBWORD_LITERAL_0);
 }
 
 static inline uint8_t
-bi_clause_upper(unsigned val,
-                struct bi_packed_tuple *tuples,
+bi_clause_upper(unsigned val, struct bi_packed_tuple *tuples,
                 ASSERTED unsigned tuple_count)
 {
-        assert(val < tuple_count);
+   assert(val < tuple_count);
 
-        /* top 3-bits of 78-bits is tuple >> 75 == (tuple >> 64) >> 11 */
-        struct bi_packed_tuple tuple = tuples[val];
-        return (tuple.hi >> 11);
+   /* top 3-bits of 78-bits is tuple >> 75 == (tuple >> 64) >> 11 */
+   struct bi_packed_tuple tuple = tuples[val];
+   return (tuple.hi >> 11);
 }
 
 uint8_t
-bi_pack_upper(enum bi_clause_subword upper,
-                struct bi_packed_tuple *tuples,
-                ASSERTED unsigned tuple_count)
+bi_pack_upper(enum bi_clause_subword upper, struct bi_packed_tuple *tuples,
+              ASSERTED unsigned tuple_count)
 {
-        assert(upper >= BI_CLAUSE_SUBWORD_UPPER_0);
-        assert(upper <= BI_CLAUSE_SUBWORD_UPPER_7);
+   assert(upper >= BI_CLAUSE_SUBWORD_UPPER_0);
+   assert(upper <= BI_CLAUSE_SUBWORD_UPPER_7);
 
-        return bi_clause_upper(upper - BI_CLAUSE_SUBWORD_UPPER_0, tuples,
-                        tuple_count);
+   return bi_clause_upper(upper - BI_CLAUSE_SUBWORD_UPPER_0, tuples,
+                          tuple_count);
 }
 
 uint64_t
-bi_pack_tuple_bits(enum bi_clause_subword idx,
-                struct bi_packed_tuple *tuples,
-                ASSERTED unsigned tuple_count,
-                unsigned offset, unsigned nbits)
+bi_pack_tuple_bits(enum bi_clause_subword idx, struct bi_packed_tuple *tuples,
+                   ASSERTED unsigned tuple_count, unsigned offset,
+                   unsigned nbits)
 {
-        assert(idx >= BI_CLAUSE_SUBWORD_TUPLE_0);
-        assert(idx <= BI_CLAUSE_SUBWORD_TUPLE_7);
+   assert(idx >= BI_CLAUSE_SUBWORD_TUPLE_0);
+   assert(idx <= BI_CLAUSE_SUBWORD_TUPLE_7);
 
-        unsigned val = (idx - BI_CLAUSE_SUBWORD_TUPLE_0);
-        assert(val < tuple_count);
+   unsigned val = (idx - BI_CLAUSE_SUBWORD_TUPLE_0);
+   assert(val < tuple_count);
 
-        struct bi_packed_tuple tuple = tuples[val];
+   struct bi_packed_tuple tuple = tuples[val];
 
-        assert(offset + nbits < 78);
-        assert(nbits <= 64);
+   assert(offset + nbits < 78);
+   assert(nbits <= 64);
 
-        /* (X >> start) & m
-         * = (((hi << 64) | lo) >> start) & m
-         * = (((hi << 64) >> start) | (lo >> start)) & m
-         * = { ((hi << (64 - start)) | (lo >> start)) & m if start <= 64
-         *   { ((hi >> (start - 64)) | (lo >> start)) & m if start >= 64
-         * = { ((hi << (64 - start)) & m) | ((lo >> start) & m) if start <= 64
-         *   { ((hi >> (start - 64)) & m) | ((lo >> start) & m) if start >= 64
-         *
-         * By setting m = 2^64 - 1, we justify doing the respective shifts as
-         * 64-bit integers. Zero special cased to avoid undefined behaviour.
-         */
+   /* (X >> start) & m
+    * = (((hi << 64) | lo) >> start) & m
+    * = (((hi << 64) >> start) | (lo >> start)) & m
+    * = { ((hi << (64 - start)) | (lo >> start)) & m if start <= 64
+    *   { ((hi >> (start - 64)) | (lo >> start)) & m if start >= 64
+    * = { ((hi << (64 - start)) & m) | ((lo >> start) & m) if start <= 64
+    *   { ((hi >> (start - 64)) & m) | ((lo >> start) & m) if start >= 64
+    *
+    * By setting m = 2^64 - 1, we justify doing the respective shifts as
+    * 64-bit integers. Zero special cased to avoid undefined behaviour.
+    */
 
-        uint64_t lo = (tuple.lo >> offset);
-        uint64_t hi = (offset == 0) ? 0
-                : (offset > 64) ? (tuple.hi >> (offset - 64))
-                : (tuple.hi << (64 - offset));
+   uint64_t lo = (tuple.lo >> offset);
+   uint64_t hi = (offset == 0)   ? 0
+                 : (offset > 64) ? (tuple.hi >> (offset - 64))
+                                 : (tuple.hi << (64 - offset));
 
-        return (lo | hi) & ((1ULL << nbits) - 1);
+   return (lo | hi) & ((1ULL << nbits) - 1);
 }
 
 static inline uint16_t
-bi_pack_lu(enum bi_clause_subword word,
-                struct bi_packed_tuple *tuples,
-                ASSERTED unsigned tuple_count)
+bi_pack_lu(enum bi_clause_subword word, struct bi_packed_tuple *tuples,
+           ASSERTED unsigned tuple_count)
 {
-        return (word >= BI_CLAUSE_SUBWORD_UPPER_0) ?
-                bi_pack_upper(word, tuples, tuple_count) :
-                bi_pack_literal(word);
+   return (word >= BI_CLAUSE_SUBWORD_UPPER_0)
+             ? bi_pack_upper(word, tuples, tuple_count)
+             : bi_pack_literal(word);
 }
 
 uint8_t
-bi_pack_sync(enum bi_clause_subword t1,
-             enum bi_clause_subword t2,
-             enum bi_clause_subword t3,
-             struct bi_packed_tuple *tuples,
-             ASSERTED unsigned tuple_count,
-             bool z)
+bi_pack_sync(enum bi_clause_subword t1, enum bi_clause_subword t2,
+             enum bi_clause_subword t3, struct bi_packed_tuple *tuples,
+             ASSERTED unsigned tuple_count, bool z)
 {
-        uint8_t sync =
-                (bi_pack_lu(t3, tuples, tuple_count) << 0) |
-                (bi_pack_lu(t2, tuples, tuple_count) << 3);
+   uint8_t sync = (bi_pack_lu(t3, tuples, tuple_count) << 0) |
+                  (bi_pack_lu(t2, tuples, tuple_count) << 3);
 
-        if (t1 == BI_CLAUSE_SUBWORD_Z)
-                sync |= z << 6;
-        else
-                sync |= bi_pack_literal(t1) << 6;
+   if (t1 == BI_CLAUSE_SUBWORD_Z)
+      sync |= z << 6;
+   else
+      sync |= bi_pack_literal(t1) << 6;
 
-        return sync;
+   return sync;
 }
 
 static inline uint64_t
-bi_pack_t_ec(enum bi_clause_subword word,
-                struct bi_packed_tuple *tuples,
-                ASSERTED unsigned tuple_count,
-                uint64_t ec0)
+bi_pack_t_ec(enum bi_clause_subword word, struct bi_packed_tuple *tuples,
+             ASSERTED unsigned tuple_count, uint64_t ec0)
 {
-        if (word == BI_CLAUSE_SUBWORD_CONSTANT)
-                return ec0;
-        else
-                return bi_pack_tuple_bits(word, tuples, tuple_count, 0, 60);
+   if (word == BI_CLAUSE_SUBWORD_CONSTANT)
+      return ec0;
+   else
+      return bi_pack_tuple_bits(word, tuples, tuple_count, 0, 60);
 }
 
 static uint32_t
-bi_pack_subwords_56(enum bi_clause_subword t,
-                struct bi_packed_tuple *tuples,
-                ASSERTED unsigned tuple_count,
-                uint64_t header, uint64_t ec0,
-                unsigned tuple_subword)
+bi_pack_subwords_56(enum bi_clause_subword t, struct bi_packed_tuple *tuples,
+                    ASSERTED unsigned tuple_count, uint64_t header,
+                    uint64_t ec0, unsigned tuple_subword)
 {
-        switch (t) {
-        case BI_CLAUSE_SUBWORD_HEADER:
-                return (header & ((1 << 30) - 1));
-        case BI_CLAUSE_SUBWORD_RESERVED:
-                return 0;
-        case BI_CLAUSE_SUBWORD_CONSTANT:
-                return (ec0 >> 15) & ((1 << 30) - 1);
-        default:
-                return bi_pack_tuple_bits(t, tuples, tuple_count, tuple_subword * 15, 30);
-        }
+   switch (t) {
+   case BI_CLAUSE_SUBWORD_HEADER:
+      return (header & ((1 << 30) - 1));
+   case BI_CLAUSE_SUBWORD_RESERVED:
+      return 0;
+   case BI_CLAUSE_SUBWORD_CONSTANT:
+      return (ec0 >> 15) & ((1 << 30) - 1);
+   default:
+      return bi_pack_tuple_bits(t, tuples, tuple_count, tuple_subword * 15, 30);
+   }
 }
 
 static uint16_t
 bi_pack_subword(enum bi_clause_subword t, unsigned format,
-                struct bi_packed_tuple *tuples,
-                ASSERTED unsigned tuple_count,
+                struct bi_packed_tuple *tuples, ASSERTED unsigned tuple_count,
                 uint64_t header, uint64_t ec0, unsigned m0,
                 unsigned tuple_subword)
 {
-        switch (t) {
-        case BI_CLAUSE_SUBWORD_HEADER:
-                return header >> 30;
-        case BI_CLAUSE_SUBWORD_M:
-                return m0;
-        case BI_CLAUSE_SUBWORD_CONSTANT:
-                return (format == 5 || format == 10) ?
-                        (ec0 & ((1 << 15) - 1)) :
-                        (ec0 >> (15 + 30));
-        case BI_CLAUSE_SUBWORD_UPPER_23:
-                return (bi_clause_upper(2, tuples, tuple_count) << 12) |
-                        (bi_clause_upper(3, tuples, tuple_count) << 9);
-        case BI_CLAUSE_SUBWORD_UPPER_56:
-                return (bi_clause_upper(5, tuples, tuple_count) << 12) |
-                        (bi_clause_upper(6, tuples, tuple_count) << 9);
-        case BI_CLAUSE_SUBWORD_UPPER_0 ... BI_CLAUSE_SUBWORD_UPPER_7:
-                return bi_pack_upper(t, tuples, tuple_count) << 12;
-        default:
-                return bi_pack_tuple_bits(t, tuples, tuple_count, tuple_subword * 15, 15);
-        }
+   switch (t) {
+   case BI_CLAUSE_SUBWORD_HEADER:
+      return header >> 30;
+   case BI_CLAUSE_SUBWORD_M:
+      return m0;
+   case BI_CLAUSE_SUBWORD_CONSTANT:
+      return (format == 5 || format == 10) ? (ec0 & ((1 << 15) - 1))
+                                           : (ec0 >> (15 + 30));
+   case BI_CLAUSE_SUBWORD_UPPER_23:
+      return (bi_clause_upper(2, tuples, tuple_count) << 12) |
+             (bi_clause_upper(3, tuples, tuple_count) << 9);
+   case BI_CLAUSE_SUBWORD_UPPER_56:
+      return (bi_clause_upper(5, tuples, tuple_count) << 12) |
+             (bi_clause_upper(6, tuples, tuple_count) << 9);
+   case BI_CLAUSE_SUBWORD_UPPER_0 ... BI_CLAUSE_SUBWORD_UPPER_7:
+      return bi_pack_upper(t, tuples, tuple_count) << 12;
+   default:
+      return bi_pack_tuple_bits(t, tuples, tuple_count, tuple_subword * 15, 15);
+   }
 }
 
 /* EC0 is 60-bits (bottom 4 already shifted off) */
 void
-bi_pack_format(struct util_dynarray *emission,
-                unsigned index,
-                struct bi_packed_tuple *tuples,
-                ASSERTED unsigned tuple_count,
-                uint64_t header, uint64_t ec0,
-                unsigned m0, bool z)
+bi_pack_format(struct util_dynarray *emission, unsigned index,
+               struct bi_packed_tuple *tuples, ASSERTED unsigned tuple_count,
+               uint64_t header, uint64_t ec0, unsigned m0, bool z)
 {
-        struct bi_clause_format format = bi_clause_formats[index];
+   struct bi_clause_format format = bi_clause_formats[index];
 
-        uint8_t sync = bi_pack_sync(format.tag_1, format.tag_2, format.tag_3,
-                        tuples, tuple_count, z);
+   uint8_t sync = bi_pack_sync(format.tag_1, format.tag_2, format.tag_3, tuples,
+                               tuple_count, z);
 
-        uint64_t s0_s3 = bi_pack_t_ec(format.s0_s3, tuples, tuple_count, ec0);
+   uint64_t s0_s3 = bi_pack_t_ec(format.s0_s3, tuples, tuple_count, ec0);
 
-        uint16_t s4 = bi_pack_subword(format.s4, format.format, tuples, tuple_count, header, ec0, m0, 4);
+   uint16_t s4 = bi_pack_subword(format.s4, format.format, tuples, tuple_count,
+                                 header, ec0, m0, 4);
 
-        uint32_t s5_s6 = bi_pack_subwords_56(format.s5_s6,
-                        tuples, tuple_count, header, ec0,
-                        (format.format == 2 || format.format == 7) ? 0 : 3);
+   uint32_t s5_s6 =
+      bi_pack_subwords_56(format.s5_s6, tuples, tuple_count, header, ec0,
+                          (format.format == 2 || format.format == 7) ? 0 : 3);
 
-        uint64_t s7 = bi_pack_subword(format.s7, format.format, tuples, tuple_count, header, ec0, m0, 2);
+   uint64_t s7 = bi_pack_subword(format.s7, format.format, tuples, tuple_count,
+                                 header, ec0, m0, 2);
 
-        /* Now that subwords are packed, split into 64-bit halves and emit */
-        uint64_t lo = sync | ((s0_s3 & ((1ull << 56) - 1)) << 8);
-        uint64_t hi = (s0_s3 >> 56) | ((uint64_t) s4 << 4) | ((uint64_t) s5_s6 << 19) | ((uint64_t) s7 << 49);
+   /* Now that subwords are packed, split into 64-bit halves and emit */
+   uint64_t lo = sync | ((s0_s3 & ((1ull << 56) - 1)) << 8);
+   uint64_t hi = (s0_s3 >> 56) | ((uint64_t)s4 << 4) | ((uint64_t)s5_s6 << 19) |
+                 ((uint64_t)s7 << 49);
 
-        util_dynarray_append(emission, uint64_t, lo);
-        util_dynarray_append(emission, uint64_t, hi);
+   util_dynarray_append(emission, uint64_t, lo);
+   util_dynarray_append(emission, uint64_t, hi);
 }
 
 static void
-bi_pack_clause(bi_context *ctx, bi_clause *clause,
-                bi_clause *next_1, bi_clause *next_2,
-                struct util_dynarray *emission, gl_shader_stage stage)
+bi_pack_clause(bi_context *ctx, bi_clause *clause, bi_clause *next_1,
+               bi_clause *next_2, struct util_dynarray *emission,
+               gl_shader_stage stage)
 {
-        struct bi_packed_tuple ins[8] = { 0 };
+   struct bi_packed_tuple ins[8] = {0};
 
-        for (unsigned i = 0; i < clause->tuple_count; ++i) {
-                unsigned prev = ((i == 0) ? clause->tuple_count : i) - 1;
-                ins[i] = bi_pack_tuple(clause, &clause->tuples[i],
-                                &clause->tuples[prev], i == 0, stage);
+   for (unsigned i = 0; i < clause->tuple_count; ++i) {
+      unsigned prev = ((i == 0) ? clause->tuple_count : i) - 1;
+      ins[i] = bi_pack_tuple(clause, &clause->tuples[i], &clause->tuples[prev],
+                             i == 0, stage);
 
-                bi_instr *add = clause->tuples[i].add;
+      bi_instr *add = clause->tuples[i].add;
 
-                /* Different GPUs support different forms of the CLPER.i32
-                 * instruction. Check we use the right one for the target.
-                 */
-                if (add && add->op == BI_OPCODE_CLPER_OLD_I32)
-                        assert(ctx->quirks & BIFROST_LIMITED_CLPER);
-                else if (add && add->op == BI_OPCODE_CLPER_I32)
-                        assert(!(ctx->quirks & BIFROST_LIMITED_CLPER));
-        }
+      /* Different GPUs support different forms of the CLPER.i32
+       * instruction. Check we use the right one for the target.
+       */
+      if (add && add->op == BI_OPCODE_CLPER_OLD_I32)
+         assert(ctx->quirks & BIFROST_LIMITED_CLPER);
+      else if (add && add->op == BI_OPCODE_CLPER_I32)
+         assert(!(ctx->quirks & BIFROST_LIMITED_CLPER));
+   }
 
-        bool ec0_packed = bi_ec0_packed(clause->tuple_count);
+   bool ec0_packed = bi_ec0_packed(clause->tuple_count);
 
-        if (ec0_packed)
-                clause->constant_count = MAX2(clause->constant_count, 1);
+   if (ec0_packed)
+      clause->constant_count = MAX2(clause->constant_count, 1);
 
-        unsigned constant_quads =
-                DIV_ROUND_UP(clause->constant_count - (ec0_packed ? 1 : 0), 2);
+   unsigned constant_quads =
+      DIV_ROUND_UP(clause->constant_count - (ec0_packed ? 1 : 0), 2);
 
-        uint64_t header = bi_pack_header(clause, next_1, next_2);
-        uint64_t ec0 = (clause->constants[0] >> 4);
-        unsigned m0 = (clause->pcrel_idx == 0) ? 4 : 0;
+   uint64_t header = bi_pack_header(clause, next_1, next_2);
+   uint64_t ec0 = (clause->constants[0] >> 4);
+   unsigned m0 = (clause->pcrel_idx == 0) ? 4 : 0;
 
-        unsigned counts[8] = {
-                1, 2, 3, 3, 4, 5, 5, 6,
-        };
+   unsigned counts[8] = {
+      1, 2, 3, 3, 4, 5, 5, 6,
+   };
 
-        unsigned indices[8][6] = {
-                { 1 },
-                { 0, 2 },
-                { 0, 3, 4 },
-                { 0, 3, 6 },
-                { 0, 3, 7, 8 },
-                { 0, 3, 5, 9, 10 },
-                { 0, 3, 5, 9, 11 },
-                { 0, 3, 5, 9, 12, 13 },
-        };
+   unsigned indices[8][6] = {
+      {1},          {0, 2},           {0, 3, 4},        {0, 3, 6},
+      {0, 3, 7, 8}, {0, 3, 5, 9, 10}, {0, 3, 5, 9, 11}, {0, 3, 5, 9, 12, 13},
+   };
 
-        unsigned count = counts[clause->tuple_count - 1];
+   unsigned count = counts[clause->tuple_count - 1];
 
-        for (unsigned pos = 0; pos < count; ++pos) {
-                ASSERTED unsigned idx = indices[clause->tuple_count - 1][pos];
-                assert(bi_clause_formats[idx].pos == pos);
-                assert((bi_clause_formats[idx].tag_1 == BI_CLAUSE_SUBWORD_Z) ==
-                                (pos == count - 1));
+   for (unsigned pos = 0; pos < count; ++pos) {
+      ASSERTED unsigned idx = indices[clause->tuple_count - 1][pos];
+      assert(bi_clause_formats[idx].pos == pos);
+      assert((bi_clause_formats[idx].tag_1 == BI_CLAUSE_SUBWORD_Z) ==
+             (pos == count - 1));
 
-                /* Whether to end the clause immediately after the last tuple */
-                bool z = (constant_quads == 0);
+      /* Whether to end the clause immediately after the last tuple */
+      bool z = (constant_quads == 0);
 
-                bi_pack_format(emission, indices[clause->tuple_count - 1][pos],
-                                ins, clause->tuple_count, header, ec0, m0,
-                                z);
-        }
+      bi_pack_format(emission, indices[clause->tuple_count - 1][pos], ins,
+                     clause->tuple_count, header, ec0, m0, z);
+   }
 
-        /* Pack the remaining constants */
+   /* Pack the remaining constants */
 
-        for (unsigned pos = 0; pos < constant_quads; ++pos) {
-                bi_pack_constants(clause->tuple_count, clause->constants,
-                                pos, constant_quads, ec0_packed, emission);
-        }
+   for (unsigned pos = 0; pos < constant_quads; ++pos) {
+      bi_pack_constants(clause->tuple_count, clause->constants, pos,
+                        constant_quads, ec0_packed, emission);
+   }
 }
 
 static void
 bi_collect_blend_ret_addr(bi_context *ctx, struct util_dynarray *emission,
                           const bi_clause *clause)
 {
-        /* No need to collect return addresses when we're in a blend shader. */
-        if (ctx->inputs->is_blend)
-                return;
+   /* No need to collect return addresses when we're in a blend shader. */
+   if (ctx->inputs->is_blend)
+      return;
 
-        const bi_tuple *tuple = &clause->tuples[clause->tuple_count - 1];
-        const bi_instr *ins = tuple->add;
+   const bi_tuple *tuple = &clause->tuples[clause->tuple_count - 1];
+   const bi_instr *ins = tuple->add;
 
-        if (!ins || ins->op != BI_OPCODE_BLEND)
-                return;
+   if (!ins || ins->op != BI_OPCODE_BLEND)
+      return;
 
-
-        unsigned loc = tuple->regs.fau_idx - BIR_FAU_BLEND_0;
-        assert(loc < ARRAY_SIZE(ctx->info.bifrost->blend));
-        assert(!ctx->info.bifrost->blend[loc].return_offset);
-        ctx->info.bifrost->blend[loc].return_offset =
-                util_dynarray_num_elements(emission, uint8_t);
-        assert(!(ctx->info.bifrost->blend[loc].return_offset & 0x7));
+   unsigned loc = tuple->regs.fau_idx - BIR_FAU_BLEND_0;
+   assert(loc < ARRAY_SIZE(ctx->info.bifrost->blend));
+   assert(!ctx->info.bifrost->blend[loc].return_offset);
+   ctx->info.bifrost->blend[loc].return_offset =
+      util_dynarray_num_elements(emission, uint8_t);
+   assert(!(ctx->info.bifrost->blend[loc].return_offset & 0x7));
 }
 
 /*
@@ -740,50 +710,49 @@ bi_collect_blend_ret_addr(bi_context *ctx, struct util_dynarray *emission,
 static void
 bi_lower_texc_dual(bi_context *ctx)
 {
-        bi_foreach_instr_global(ctx, I) {
-                if (I->op == BI_OPCODE_TEXC_DUAL) {
-                        /* In hardware, TEXC has 1 destination */
-                        I->op = BI_OPCODE_TEXC;
-                        bi_drop_dests(I, 1);
-                }
-        }
+   bi_foreach_instr_global(ctx, I) {
+      if (I->op == BI_OPCODE_TEXC_DUAL) {
+         /* In hardware, TEXC has 1 destination */
+         I->op = BI_OPCODE_TEXC;
+         bi_drop_dests(I, 1);
+      }
+   }
 }
 
 unsigned
 bi_pack(bi_context *ctx, struct util_dynarray *emission)
 {
-        unsigned previous_size = emission->size;
+   unsigned previous_size = emission->size;
 
-        bi_lower_texc_dual(ctx);
+   bi_lower_texc_dual(ctx);
 
-        bi_foreach_block(ctx, block) {
-                bi_assign_branch_offset(ctx, block);
+   bi_foreach_block(ctx, block) {
+      bi_assign_branch_offset(ctx, block);
 
-                bi_foreach_clause_in_block(block, clause) {
-                        bool is_last = (clause->link.next == &block->clauses);
+      bi_foreach_clause_in_block(block, clause) {
+         bool is_last = (clause->link.next == &block->clauses);
 
-                        /* Get the succeeding clauses, either two successors of
-                         * the block for the last clause in the block or just
-                         * the next clause within the block */
+         /* Get the succeeding clauses, either two successors of
+          * the block for the last clause in the block or just
+          * the next clause within the block */
 
-                        bi_clause *next = NULL, *next_2 = NULL;
+         bi_clause *next = NULL, *next_2 = NULL;
 
-                        if (is_last) {
-                                next = bi_next_clause(ctx, block->successors[0], NULL);
-                                next_2 = bi_next_clause(ctx, block->successors[1], NULL);
-                        } else {
-                                next = bi_next_clause(ctx, block, clause);
-                        }
+         if (is_last) {
+            next = bi_next_clause(ctx, block->successors[0], NULL);
+            next_2 = bi_next_clause(ctx, block->successors[1], NULL);
+         } else {
+            next = bi_next_clause(ctx, block, clause);
+         }
 
+         previous_size = emission->size;
 
-                        previous_size = emission->size;
+         bi_pack_clause(ctx, clause, next, next_2, emission, ctx->stage);
 
-                        bi_pack_clause(ctx, clause, next, next_2, emission, ctx->stage);
+         if (!is_last)
+            bi_collect_blend_ret_addr(ctx, emission, clause);
+      }
+   }
 
-                        if (!is_last)
-                                bi_collect_blend_ret_addr(ctx, emission, clause);
-                }
-        }
-
-        return emission->size - previous_size;
+   return emission->size - previous_size;
 }
diff --git a/src/panfrost/bifrost/bi_pressure_schedule.c b/src/panfrost/bifrost/bi_pressure_schedule.c
index a4748e8bed6..1fa3134fecf 100644
--- a/src/panfrost/bifrost/bi_pressure_schedule.c
+++ b/src/panfrost/bifrost/bi_pressure_schedule.c
@@ -26,149 +26,148 @@
 
 /* Bottom-up local scheduler to reduce register pressure */
 
-#include "compiler.h"
 #include "util/dag.h"
+#include "compiler.h"
 
 struct sched_ctx {
-        /* Dependency graph */
-        struct dag *dag;
+   /* Dependency graph */
+   struct dag *dag;
 
-        /* Live set */
-        BITSET_WORD *live;
+   /* Live set */
+   BITSET_WORD *live;
 };
 
 struct sched_node {
-        struct dag_node dag;
+   struct dag_node dag;
 
-        /* Instruction this node represents */
-        bi_instr *instr;
+   /* Instruction this node represents */
+   bi_instr *instr;
 };
 
 static void
 add_dep(struct sched_node *a, struct sched_node *b)
 {
-        if (a && b)
-                dag_add_edge(&a->dag, &b->dag, 0);
+   if (a && b)
+      dag_add_edge(&a->dag, &b->dag, 0);
 }
 
 static struct dag *
 create_dag(bi_context *ctx, bi_block *block, void *memctx)
 {
-        struct dag *dag = dag_create(ctx);
+   struct dag *dag = dag_create(ctx);
 
-        struct sched_node **last_write =
-                calloc(ctx->ssa_alloc, sizeof(struct sched_node *));
-        struct sched_node *coverage = NULL;
-        struct sched_node *preload = NULL;
+   struct sched_node **last_write =
+      calloc(ctx->ssa_alloc, sizeof(struct sched_node *));
+   struct sched_node *coverage = NULL;
+   struct sched_node *preload = NULL;
 
-        /* Last memory load, to serialize stores against */
-        struct sched_node *memory_load = NULL;
+   /* Last memory load, to serialize stores against */
+   struct sched_node *memory_load = NULL;
 
-        /* Last memory store, to serialize loads and stores against */
-        struct sched_node *memory_store = NULL;
+   /* Last memory store, to serialize loads and stores against */
+   struct sched_node *memory_store = NULL;
 
-        bi_foreach_instr_in_block(block, I) {
-                /* Leave branches at the end */
-                if (I->op == BI_OPCODE_JUMP || bi_opcode_props[I->op].branch)
-                        break;
+   bi_foreach_instr_in_block(block, I) {
+      /* Leave branches at the end */
+      if (I->op == BI_OPCODE_JUMP || bi_opcode_props[I->op].branch)
+         break;
 
-                assert(I->branch_target == NULL);
+      assert(I->branch_target == NULL);
 
-                struct sched_node *node = rzalloc(memctx, struct sched_node);
-                node->instr = I;
-                dag_init_node(dag, &node->dag);
+      struct sched_node *node = rzalloc(memctx, struct sched_node);
+      node->instr = I;
+      dag_init_node(dag, &node->dag);
 
-                /* Reads depend on writes, no other hazards in SSA */
-                bi_foreach_ssa_src(I, s)
-                        add_dep(node, last_write[I->src[s].value]);
+      /* Reads depend on writes, no other hazards in SSA */
+      bi_foreach_ssa_src(I, s)
+         add_dep(node, last_write[I->src[s].value]);
 
-                bi_foreach_dest(I, d)
-                        last_write[I->dest[d].value] = node;
+      bi_foreach_dest(I, d)
+         last_write[I->dest[d].value] = node;
 
-                switch (bi_opcode_props[I->op].message) {
-                case BIFROST_MESSAGE_LOAD:
-                        /* Regular memory loads needs to be serialized against
-                         * other memory access. However, UBO memory is read-only
-                         * so it can be moved around freely.
-                         */
-                        if (I->seg != BI_SEG_UBO) {
-                                add_dep(node, memory_store);
-                                memory_load = node;
-                        }
+      switch (bi_opcode_props[I->op].message) {
+      case BIFROST_MESSAGE_LOAD:
+         /* Regular memory loads needs to be serialized against
+          * other memory access. However, UBO memory is read-only
+          * so it can be moved around freely.
+          */
+         if (I->seg != BI_SEG_UBO) {
+            add_dep(node, memory_store);
+            memory_load = node;
+         }
 
-                        break;
+         break;
 
-                case BIFROST_MESSAGE_ATTRIBUTE:
-                        /* Regular attribute loads can be reordered, but
-                         * writeable attributes can't be. Our one use of
-                         * writeable attributes are images.
-                         */
-                        if ((I->op == BI_OPCODE_LD_TEX) ||
-                            (I->op == BI_OPCODE_LD_TEX_IMM) ||
-                            (I->op == BI_OPCODE_LD_ATTR_TEX)) {
-                                add_dep(node, memory_store);
-                                memory_load = node;
-                        }
+      case BIFROST_MESSAGE_ATTRIBUTE:
+         /* Regular attribute loads can be reordered, but
+          * writeable attributes can't be. Our one use of
+          * writeable attributes are images.
+          */
+         if ((I->op == BI_OPCODE_LD_TEX) || (I->op == BI_OPCODE_LD_TEX_IMM) ||
+             (I->op == BI_OPCODE_LD_ATTR_TEX)) {
+            add_dep(node, memory_store);
+            memory_load = node;
+         }
 
-                        break;
+         break;
 
-                case BIFROST_MESSAGE_STORE:
-                        assert(I->seg != BI_SEG_UBO);
-                        add_dep(node, memory_load);
-                        add_dep(node, memory_store);
-                        memory_store = node;
-                        break;
+      case BIFROST_MESSAGE_STORE:
+         assert(I->seg != BI_SEG_UBO);
+         add_dep(node, memory_load);
+         add_dep(node, memory_store);
+         memory_store = node;
+         break;
 
-                case BIFROST_MESSAGE_ATOMIC:
-                case BIFROST_MESSAGE_BARRIER:
-                        add_dep(node, memory_load);
-                        add_dep(node, memory_store);
-                        memory_load = node;
-                        memory_store = node;
-                        break;
+      case BIFROST_MESSAGE_ATOMIC:
+      case BIFROST_MESSAGE_BARRIER:
+         add_dep(node, memory_load);
+         add_dep(node, memory_store);
+         memory_load = node;
+         memory_store = node;
+         break;
 
-                case BIFROST_MESSAGE_BLEND:
-                case BIFROST_MESSAGE_Z_STENCIL:
-                case BIFROST_MESSAGE_TILE:
-                        add_dep(node, coverage);
-                        coverage = node;
-                        break;
+      case BIFROST_MESSAGE_BLEND:
+      case BIFROST_MESSAGE_Z_STENCIL:
+      case BIFROST_MESSAGE_TILE:
+         add_dep(node, coverage);
+         coverage = node;
+         break;
 
-                case BIFROST_MESSAGE_ATEST:
-                        /* ATEST signals the end of shader side effects */
-                        add_dep(node, memory_store);
-                        memory_store = node;
+      case BIFROST_MESSAGE_ATEST:
+         /* ATEST signals the end of shader side effects */
+         add_dep(node, memory_store);
+         memory_store = node;
 
-                        /* ATEST also updates coverage */
-                        add_dep(node, coverage);
-                        coverage = node;
-                        break;
-                default:
-                        break;
-                }
+         /* ATEST also updates coverage */
+         add_dep(node, coverage);
+         coverage = node;
+         break;
+      default:
+         break;
+      }
 
-                add_dep(node, preload);
+      add_dep(node, preload);
 
-                if (I->op == BI_OPCODE_DISCARD_F32) {
-                        /* Serialize against ATEST */
-                        add_dep(node, coverage);
-                        coverage = node;
+      if (I->op == BI_OPCODE_DISCARD_F32) {
+         /* Serialize against ATEST */
+         add_dep(node, coverage);
+         coverage = node;
 
-                        /* Also serialize against memory and barriers */
-                        add_dep(node, memory_load);
-                        add_dep(node, memory_store);
-                        memory_load = node;
-                        memory_store = node;
-                } else if ((I->op == BI_OPCODE_PHI) ||
-                           (I->op == BI_OPCODE_MOV_I32 &&
-                            I->src[0].type == BI_INDEX_REGISTER)) {
-                        preload = node;
-                }
-        }
+         /* Also serialize against memory and barriers */
+         add_dep(node, memory_load);
+         add_dep(node, memory_store);
+         memory_load = node;
+         memory_store = node;
+      } else if ((I->op == BI_OPCODE_PHI) ||
+                 (I->op == BI_OPCODE_MOV_I32 &&
+                  I->src[0].type == BI_INDEX_REGISTER)) {
+         preload = node;
+      }
+   }
 
-        free(last_write);
+   free(last_write);
 
-        return dag;
+   return dag;
 }
 
 /*
@@ -183,30 +182,30 @@ create_dag(bi_context *ctx, bi_block *block, void *memctx)
 static signed
 calculate_pressure_delta(bi_instr *I, BITSET_WORD *live)
 {
-        signed delta = 0;
+   signed delta = 0;
 
-        /* Destinations must be unique */
-        bi_foreach_dest(I, d) {
-                if (BITSET_TEST(live, I->dest[d].value))
-                        delta -= bi_count_write_registers(I, d);
-        }
+   /* Destinations must be unique */
+   bi_foreach_dest(I, d) {
+      if (BITSET_TEST(live, I->dest[d].value))
+         delta -= bi_count_write_registers(I, d);
+   }
 
-        bi_foreach_ssa_src(I, src) {
-                /* Filter duplicates */
-                bool dupe = false;
+   bi_foreach_ssa_src(I, src) {
+      /* Filter duplicates */
+      bool dupe = false;
 
-                for (unsigned i = 0; i < src; ++i) {
-                        if (bi_is_equiv(I->src[i], I->src[src])) {
-                                dupe = true;
-                                break;
-                        }
-                }
+      for (unsigned i = 0; i < src; ++i) {
+         if (bi_is_equiv(I->src[i], I->src[src])) {
+            dupe = true;
+            break;
+         }
+      }
 
-                if (!dupe && !BITSET_TEST(live, I->src[src].value))
-                        delta += bi_count_read_registers(I, src);
-        }
+      if (!dupe && !BITSET_TEST(live, I->src[src].value))
+         delta += bi_count_read_registers(I, src);
+   }
 
-        return delta;
+   return delta;
 }
 
 /*
@@ -216,87 +215,88 @@ calculate_pressure_delta(bi_instr *I, BITSET_WORD *live)
 static struct sched_node *
 choose_instr(struct sched_ctx *s)
 {
-        int32_t min_delta = INT32_MAX;
-        struct sched_node *best = NULL;
+   int32_t min_delta = INT32_MAX;
+   struct sched_node *best = NULL;
 
-        list_for_each_entry(struct sched_node, n, &s->dag->heads, dag.link) {
-                int32_t delta = calculate_pressure_delta(n->instr, s->live);
+   list_for_each_entry(struct sched_node, n, &s->dag->heads, dag.link) {
+      int32_t delta = calculate_pressure_delta(n->instr, s->live);
 
-                if (delta < min_delta) {
-                        best = n;
-                        min_delta = delta;
-                }
-        }
+      if (delta < min_delta) {
+         best = n;
+         min_delta = delta;
+      }
+   }
 
-        return best;
+   return best;
 }
 
 static void
 pressure_schedule_block(bi_context *ctx, bi_block *block, struct sched_ctx *s)
 {
-        /* off by a constant, that's ok */
-        signed pressure = 0;
-        signed orig_max_pressure = 0;
-        unsigned nr_ins = 0;
+   /* off by a constant, that's ok */
+   signed pressure = 0;
+   signed orig_max_pressure = 0;
+   unsigned nr_ins = 0;
 
-        memcpy(s->live, block->ssa_live_out, BITSET_WORDS(ctx->ssa_alloc) * sizeof(BITSET_WORD));
+   memcpy(s->live, block->ssa_live_out,
+          BITSET_WORDS(ctx->ssa_alloc) * sizeof(BITSET_WORD));
 
-        bi_foreach_instr_in_block_rev(block, I) {
-                pressure += calculate_pressure_delta(I, s->live);
-                orig_max_pressure = MAX2(pressure, orig_max_pressure);
-                bi_liveness_ins_update_ssa(s->live, I);
-                nr_ins++;
-        }
+   bi_foreach_instr_in_block_rev(block, I) {
+      pressure += calculate_pressure_delta(I, s->live);
+      orig_max_pressure = MAX2(pressure, orig_max_pressure);
+      bi_liveness_ins_update_ssa(s->live, I);
+      nr_ins++;
+   }
 
-        memcpy(s->live, block->ssa_live_out, BITSET_WORDS(ctx->ssa_alloc) * sizeof(BITSET_WORD));
+   memcpy(s->live, block->ssa_live_out,
+          BITSET_WORDS(ctx->ssa_alloc) * sizeof(BITSET_WORD));
 
-        /* off by a constant, that's ok */
-        signed max_pressure = 0;
-        pressure = 0;
+   /* off by a constant, that's ok */
+   signed max_pressure = 0;
+   pressure = 0;
 
-        struct sched_node **schedule = calloc(nr_ins, sizeof(struct sched_node *));
-        nr_ins = 0;
+   struct sched_node **schedule = calloc(nr_ins, sizeof(struct sched_node *));
+   nr_ins = 0;
 
-        while (!list_is_empty(&s->dag->heads)) {
-                struct sched_node *node = choose_instr(s);
-                pressure += calculate_pressure_delta(node->instr, s->live);
-                max_pressure = MAX2(pressure, max_pressure);
-                dag_prune_head(s->dag, &node->dag);
+   while (!list_is_empty(&s->dag->heads)) {
+      struct sched_node *node = choose_instr(s);
+      pressure += calculate_pressure_delta(node->instr, s->live);
+      max_pressure = MAX2(pressure, max_pressure);
+      dag_prune_head(s->dag, &node->dag);
 
-                schedule[nr_ins++] = node;
-                bi_liveness_ins_update_ssa(s->live, node->instr);
-        }
+      schedule[nr_ins++] = node;
+      bi_liveness_ins_update_ssa(s->live, node->instr);
+   }
 
-        /* Bail if it looks like it's worse */
-        if (max_pressure >= orig_max_pressure) {
-                free(schedule);
-                return;
-        }
+   /* Bail if it looks like it's worse */
+   if (max_pressure >= orig_max_pressure) {
+      free(schedule);
+      return;
+   }
 
-        /* Apply the schedule */
-        for (unsigned i = 0; i < nr_ins; ++i) {
-                bi_remove_instruction(schedule[i]->instr);
-                list_add(&schedule[i]->instr->link, &block->instructions);
-        }
+   /* Apply the schedule */
+   for (unsigned i = 0; i < nr_ins; ++i) {
+      bi_remove_instruction(schedule[i]->instr);
+      list_add(&schedule[i]->instr->link, &block->instructions);
+   }
 
-        free(schedule);
+   free(schedule);
 }
 
 void
 bi_pressure_schedule(bi_context *ctx)
 {
-        bi_compute_liveness_ssa(ctx);
-        void *memctx = ralloc_context(ctx);
-        BITSET_WORD *live = ralloc_array(memctx, BITSET_WORD, BITSET_WORDS(ctx->ssa_alloc));
+   bi_compute_liveness_ssa(ctx);
+   void *memctx = ralloc_context(ctx);
+   BITSET_WORD *live =
+      ralloc_array(memctx, BITSET_WORD, BITSET_WORDS(ctx->ssa_alloc));
 
-        bi_foreach_block(ctx, block) {
-                struct sched_ctx sctx = {
-                        .dag = create_dag(ctx, block, memctx),
-                        .live = live
-                };
+   bi_foreach_block(ctx, block) {
+      struct sched_ctx sctx = {.dag = create_dag(ctx, block, memctx),
+                               .live = live};
 
-                pressure_schedule_block(ctx, block, &sctx);
-        }
+      pressure_schedule_block(ctx, block, &sctx);
+   }
 
-        ralloc_free(memctx);
+   ralloc_free(memctx);
 }
diff --git a/src/panfrost/bifrost/bi_print.c b/src/panfrost/bifrost/bi_print.c
index 8aa0293dfe1..5d997c79dd3 100644
--- a/src/panfrost/bifrost/bi_print.c
+++ b/src/panfrost/bifrost/bi_print.c
@@ -24,177 +24,179 @@
  * SOFTWARE.
  */
 
-#include "compiler.h"
 #include "bi_print_common.h"
+#include "compiler.h"
 
 static const char *
 bi_reg_op_name(enum bifrost_reg_op op)
 {
-        switch (op) {
-        case BIFROST_OP_IDLE: return "idle";
-        case BIFROST_OP_READ: return "read";
-        case BIFROST_OP_WRITE: return "write";
-        case BIFROST_OP_WRITE_LO: return "write lo";
-        case BIFROST_OP_WRITE_HI: return "write hi";
-        default: return "invalid";
-        }
+   switch (op) {
+   case BIFROST_OP_IDLE:
+      return "idle";
+   case BIFROST_OP_READ:
+      return "read";
+   case BIFROST_OP_WRITE:
+      return "write";
+   case BIFROST_OP_WRITE_LO:
+      return "write lo";
+   case BIFROST_OP_WRITE_HI:
+      return "write hi";
+   default:
+      return "invalid";
+   }
 }
 
 void
 bi_print_slots(bi_registers *regs, FILE *fp)
 {
-        for (unsigned i = 0; i < 2; ++i) {
-                if (regs->enabled[i])
-                        fprintf(fp, "slot %u: %u\n", i, regs->slot[i]);
-        }
+   for (unsigned i = 0; i < 2; ++i) {
+      if (regs->enabled[i])
+         fprintf(fp, "slot %u: %u\n", i, regs->slot[i]);
+   }
 
-        if (regs->slot23.slot2) {
-                fprintf(fp, "slot 2 (%s%s): %u\n",
-                                bi_reg_op_name(regs->slot23.slot2),
-                                regs->slot23.slot2 >= BIFROST_OP_WRITE ?
-                                        " FMA": "",
-                                regs->slot[2]);
-        }
+   if (regs->slot23.slot2) {
+      fprintf(fp, "slot 2 (%s%s): %u\n", bi_reg_op_name(regs->slot23.slot2),
+              regs->slot23.slot2 >= BIFROST_OP_WRITE ? " FMA" : "",
+              regs->slot[2]);
+   }
 
-        if (regs->slot23.slot3) {
-                fprintf(fp, "slot 3 (%s %s): %u\n",
-                                bi_reg_op_name(regs->slot23.slot3),
-                                regs->slot23.slot3_fma ? "FMA" : "ADD",
-                                regs->slot[3]);
-        }
+   if (regs->slot23.slot3) {
+      fprintf(fp, "slot 3 (%s %s): %u\n", bi_reg_op_name(regs->slot23.slot3),
+              regs->slot23.slot3_fma ? "FMA" : "ADD", regs->slot[3]);
+   }
 }
 
 void
 bi_print_tuple(bi_tuple *tuple, FILE *fp)
 {
-        bi_instr *ins[2] = { tuple->fma, tuple->add };
+   bi_instr *ins[2] = {tuple->fma, tuple->add};
 
-        for (unsigned i = 0; i < 2; ++i) {
-                fprintf(fp, (i == 0) ? "\t* " : "\t+ ");
+   for (unsigned i = 0; i < 2; ++i) {
+      fprintf(fp, (i == 0) ? "\t* " : "\t+ ");
 
-                if (ins[i])
-                        bi_print_instr(ins[i], fp);
-                else
-                        fprintf(fp, "NOP\n");
-        }
+      if (ins[i])
+         bi_print_instr(ins[i], fp);
+      else
+         fprintf(fp, "NOP\n");
+   }
 }
 
 void
 bi_print_clause(bi_clause *clause, FILE *fp)
 {
-        fprintf(fp, "id(%u)", clause->scoreboard_id);
+   fprintf(fp, "id(%u)", clause->scoreboard_id);
 
-        if (clause->dependencies) {
-                fprintf(fp, " wait(");
+   if (clause->dependencies) {
+      fprintf(fp, " wait(");
 
-                for (unsigned i = 0; i < 8; ++i) {
-                        if (clause->dependencies & (1 << i))
-                                fprintf(fp, "%u ", i);
-                }
+      for (unsigned i = 0; i < 8; ++i) {
+         if (clause->dependencies & (1 << i))
+            fprintf(fp, "%u ", i);
+      }
 
-                fprintf(fp, ")");
-        }
+      fprintf(fp, ")");
+   }
 
-        fprintf(fp, " %s", bi_flow_control_name(clause->flow_control));
+   fprintf(fp, " %s", bi_flow_control_name(clause->flow_control));
 
-        if (!clause->next_clause_prefetch)
-               fprintf(fp, " no_prefetch");
+   if (!clause->next_clause_prefetch)
+      fprintf(fp, " no_prefetch");
 
-        if (clause->staging_barrier)
-                fprintf(fp, " osrb");
+   if (clause->staging_barrier)
+      fprintf(fp, " osrb");
 
-        if (clause->td)
-                fprintf(fp, " td");
+   if (clause->td)
+      fprintf(fp, " td");
 
-        if (clause->pcrel_idx != ~0)
-                fprintf(fp, " pcrel(%u)", clause->pcrel_idx);
+   if (clause->pcrel_idx != ~0)
+      fprintf(fp, " pcrel(%u)", clause->pcrel_idx);
 
-        fprintf(fp, "\n");
+   fprintf(fp, "\n");
 
-        for (unsigned i = 0; i < clause->tuple_count; ++i)
-                bi_print_tuple(&clause->tuples[i], fp);
+   for (unsigned i = 0; i < clause->tuple_count; ++i)
+      bi_print_tuple(&clause->tuples[i], fp);
 
-        if (clause->constant_count) {
-                for (unsigned i = 0; i < clause->constant_count; ++i)
-                        fprintf(fp, "%" PRIx64 " ", clause->constants[i]);
+   if (clause->constant_count) {
+      for (unsigned i = 0; i < clause->constant_count; ++i)
+         fprintf(fp, "%" PRIx64 " ", clause->constants[i]);
 
-                if (clause->branch_constant)
-                        fprintf(fp, "*");
+      if (clause->branch_constant)
+         fprintf(fp, "*");
 
-                fprintf(fp, "\n");
-        }
+      fprintf(fp, "\n");
+   }
 
-        fprintf(fp, "\n");
+   fprintf(fp, "\n");
 }
 
 static void
-bi_print_scoreboard_line(unsigned slot, const char *name, uint64_t mask, FILE *fp)
+bi_print_scoreboard_line(unsigned slot, const char *name, uint64_t mask,
+                         FILE *fp)
 {
-        if (!mask)
-                return;
+   if (!mask)
+      return;
 
-        fprintf(fp, "slot %u %s:", slot, name);
+   fprintf(fp, "slot %u %s:", slot, name);
 
-        u_foreach_bit64(reg, mask)
-                fprintf(fp, " r%" PRId64, reg);
+   u_foreach_bit64(reg, mask) fprintf(fp, " r%" PRId64, reg);
 
-        fprintf(fp, "\n");
+   fprintf(fp, "\n");
 }
 
 static void
 bi_print_scoreboard(struct bi_scoreboard_state *state, FILE *fp)
 {
-        for (unsigned i = 0; i < BI_NUM_SLOTS; ++i) {
-                bi_print_scoreboard_line(i, "reads", state->read[i], fp);
-                bi_print_scoreboard_line(i, "writes", state->write[i], fp);
-        }
+   for (unsigned i = 0; i < BI_NUM_SLOTS; ++i) {
+      bi_print_scoreboard_line(i, "reads", state->read[i], fp);
+      bi_print_scoreboard_line(i, "writes", state->write[i], fp);
+   }
 }
 
 void
 bi_print_block(bi_block *block, FILE *fp)
 {
-        if (block->scheduled) {
-                bi_print_scoreboard(&block->scoreboard_in, fp);
-                fprintf(fp, "\n");
-        }
+   if (block->scheduled) {
+      bi_print_scoreboard(&block->scoreboard_in, fp);
+      fprintf(fp, "\n");
+   }
 
-        fprintf(fp, "block%u {\n", block->index);
+   fprintf(fp, "block%u {\n", block->index);
 
-        if (block->scheduled) {
-                bi_foreach_clause_in_block(block, clause)
-                        bi_print_clause(clause, fp);
-        } else {
-                bi_foreach_instr_in_block(block, ins)
-                        bi_print_instr((bi_instr *) ins, fp);
-        }
+   if (block->scheduled) {
+      bi_foreach_clause_in_block(block, clause)
+         bi_print_clause(clause, fp);
+   } else {
+      bi_foreach_instr_in_block(block, ins)
+         bi_print_instr((bi_instr *)ins, fp);
+   }
 
-        fprintf(fp, "}");
+   fprintf(fp, "}");
 
-        if (block->successors[0]) {
-                fprintf(fp, " -> ");
+   if (block->successors[0]) {
+      fprintf(fp, " -> ");
 
-                bi_foreach_successor((block), succ)
-                        fprintf(fp, "block%u ", succ->index);
-        }
+      bi_foreach_successor((block), succ)
+         fprintf(fp, "block%u ", succ->index);
+   }
 
-        if (bi_num_predecessors(block)) {
-                fprintf(fp, " from");
+   if (bi_num_predecessors(block)) {
+      fprintf(fp, " from");
 
-                bi_foreach_predecessor(block, pred)
-                        fprintf(fp, " block%u", (*pred)->index);
-        }
+      bi_foreach_predecessor(block, pred)
+         fprintf(fp, " block%u", (*pred)->index);
+   }
 
-        if (block->scheduled) {
-                fprintf(fp, "\n");
-                bi_print_scoreboard(&block->scoreboard_out, fp);
-        }
+   if (block->scheduled) {
+      fprintf(fp, "\n");
+      bi_print_scoreboard(&block->scoreboard_out, fp);
+   }
 
-        fprintf(fp, "\n\n");
+   fprintf(fp, "\n\n");
 }
 
 void
 bi_print_shader(bi_context *ctx, FILE *fp)
 {
-        bi_foreach_block(ctx, block)
-                bi_print_block(block, fp);
+   bi_foreach_block(ctx, block)
+      bi_print_block(block, fp);
 }
diff --git a/src/panfrost/bifrost/bi_print_common.c b/src/panfrost/bifrost/bi_print_common.c
index 91589afa749..b3f5e0b52c5 100644
--- a/src/panfrost/bifrost/bi_print_common.c
+++ b/src/panfrost/bifrost/bi_print_common.c
@@ -31,38 +31,63 @@
 const char *
 bi_message_type_name(enum bifrost_message_type T)
 {
-        switch (T) {
-        case BIFROST_MESSAGE_NONE: return "";
-        case BIFROST_MESSAGE_VARYING: return "vary";
-        case BIFROST_MESSAGE_ATTRIBUTE: return "attr";
-        case BIFROST_MESSAGE_TEX: return "tex";
-        case BIFROST_MESSAGE_VARTEX: return "vartex";
-        case BIFROST_MESSAGE_LOAD: return "load";
-        case BIFROST_MESSAGE_STORE: return "store";
-        case BIFROST_MESSAGE_ATOMIC: return "atomic";
-        case BIFROST_MESSAGE_BARRIER: return "barrier";
-        case BIFROST_MESSAGE_BLEND: return "blend";
-        case BIFROST_MESSAGE_TILE: return "tile";
-        case BIFROST_MESSAGE_Z_STENCIL: return "z_stencil";
-        case BIFROST_MESSAGE_ATEST: return "atest";
-        case BIFROST_MESSAGE_JOB: return "job";
-        case BIFROST_MESSAGE_64BIT: return "64";
-        default: return "XXX reserved";
-        }
+   switch (T) {
+   case BIFROST_MESSAGE_NONE:
+      return "";
+   case BIFROST_MESSAGE_VARYING:
+      return "vary";
+   case BIFROST_MESSAGE_ATTRIBUTE:
+      return "attr";
+   case BIFROST_MESSAGE_TEX:
+      return "tex";
+   case BIFROST_MESSAGE_VARTEX:
+      return "vartex";
+   case BIFROST_MESSAGE_LOAD:
+      return "load";
+   case BIFROST_MESSAGE_STORE:
+      return "store";
+   case BIFROST_MESSAGE_ATOMIC:
+      return "atomic";
+   case BIFROST_MESSAGE_BARRIER:
+      return "barrier";
+   case BIFROST_MESSAGE_BLEND:
+      return "blend";
+   case BIFROST_MESSAGE_TILE:
+      return "tile";
+   case BIFROST_MESSAGE_Z_STENCIL:
+      return "z_stencil";
+   case BIFROST_MESSAGE_ATEST:
+      return "atest";
+   case BIFROST_MESSAGE_JOB:
+      return "job";
+   case BIFROST_MESSAGE_64BIT:
+      return "64";
+   default:
+      return "XXX reserved";
+   }
 }
 
 const char *
 bi_flow_control_name(enum bifrost_flow mode)
 {
-        switch (mode) {
-        case BIFROST_FLOW_END: return "eos";
-        case BIFROST_FLOW_NBTB_PC: return "nbb br_pc";
-        case BIFROST_FLOW_NBTB_UNCONDITIONAL: return "nbb r_uncond";
-        case BIFROST_FLOW_NBTB: return "nbb";
-        case BIFROST_FLOW_BTB_UNCONDITIONAL: return "bb r_uncond";
-        case BIFROST_FLOW_BTB_NONE: return "bb";
-        case BIFROST_FLOW_WE_UNCONDITIONAL: return "we r_uncond";
-        case BIFROST_FLOW_WE: return "we";
-        default: return "XXX";
-        }
+   switch (mode) {
+   case BIFROST_FLOW_END:
+      return "eos";
+   case BIFROST_FLOW_NBTB_PC:
+      return "nbb br_pc";
+   case BIFROST_FLOW_NBTB_UNCONDITIONAL:
+      return "nbb r_uncond";
+   case BIFROST_FLOW_NBTB:
+      return "nbb";
+   case BIFROST_FLOW_BTB_UNCONDITIONAL:
+      return "bb r_uncond";
+   case BIFROST_FLOW_BTB_NONE:
+      return "bb";
+   case BIFROST_FLOW_WE_UNCONDITIONAL:
+      return "we r_uncond";
+   case BIFROST_FLOW_WE:
+      return "we";
+   default:
+      return "XXX";
+   }
 }
diff --git a/src/panfrost/bifrost/bi_print_common.h b/src/panfrost/bifrost/bi_print_common.h
index 675738b389c..ed8931154ac 100644
--- a/src/panfrost/bifrost/bi_print_common.h
+++ b/src/panfrost/bifrost/bi_print_common.h
@@ -30,7 +30,7 @@
 #include <stdio.h>
 #include "bifrost.h"
 
-const char * bi_message_type_name(enum bifrost_message_type T);
-const char * bi_flow_control_name(enum bifrost_flow mode);
+const char *bi_message_type_name(enum bifrost_message_type T);
+const char *bi_flow_control_name(enum bifrost_flow mode);
 
 #endif
diff --git a/src/panfrost/bifrost/bi_quirks.h b/src/panfrost/bifrost/bi_quirks.h
index 5dd75dd1db6..be05ed51a9c 100644
--- a/src/panfrost/bifrost/bi_quirks.h
+++ b/src/panfrost/bifrost/bi_quirks.h
@@ -44,15 +44,15 @@
 static inline unsigned
 bifrost_get_quirks(unsigned product_id)
 {
-        switch (product_id >> 8) {
-        case 0x60: /* G71 */
-                return BIFROST_NO_FP32_TRANSCENDENTALS | BIFROST_LIMITED_CLPER;
-        case 0x62: /* G72 */
-        case 0x70: /* G31 */
-                return BIFROST_LIMITED_CLPER;
-        default:
-                return 0;
-        }
+   switch (product_id >> 8) {
+   case 0x60: /* G71 */
+      return BIFROST_NO_FP32_TRANSCENDENTALS | BIFROST_LIMITED_CLPER;
+   case 0x62: /* G72 */
+   case 0x70: /* G31 */
+      return BIFROST_LIMITED_CLPER;
+   default:
+      return 0;
+   }
 }
 
 #endif
diff --git a/src/panfrost/bifrost/bi_ra.c b/src/panfrost/bifrost/bi_ra.c
index c103fab10d4..34047cdcf3e 100644
--- a/src/panfrost/bifrost/bi_ra.c
+++ b/src/panfrost/bifrost/bi_ra.c
@@ -24,32 +24,32 @@
  *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
  */
 
+#include "util/u_memory.h"
+#include "bi_builder.h"
 #include "compiler.h"
 #include "nodearray.h"
-#include "bi_builder.h"
-#include "util/u_memory.h"
 
 struct lcra_state {
-        unsigned node_count;
-        uint64_t *affinity;
+   unsigned node_count;
+   uint64_t *affinity;
 
-        /* Linear constraints imposed. For each node there there is a
-         * 'nodearray' structure, which changes between a sparse and dense
-         * array depending on the number of elements.
-         *
-         * Each element is itself a bit field denoting whether (c_j - c_i) bias
-         * is present or not, including negative biases.
-         *
-         * We support up to 8 components so the bias is in range
-         * [-7, 7] encoded by a 16-bit field
-         */
-        nodearray *linear;
+   /* Linear constraints imposed. For each node there there is a
+    * 'nodearray' structure, which changes between a sparse and dense
+    * array depending on the number of elements.
+    *
+    * Each element is itself a bit field denoting whether (c_j - c_i) bias
+    * is present or not, including negative biases.
+    *
+    * We support up to 8 components so the bias is in range
+    * [-7, 7] encoded by a 16-bit field
+    */
+   nodearray *linear;
 
-        /* Before solving, forced registers; after solving, solutions. */
-        unsigned *solutions;
+   /* Before solving, forced registers; after solving, solutions. */
+   unsigned *solutions;
 
-        /** Node which caused register allocation to fail */
-        unsigned spill_node;
+   /** Node which caused register allocation to fail */
+   unsigned spill_node;
 };
 
 /* This module is an implementation of "Linearly Constrained
@@ -61,128 +61,134 @@ struct lcra_state {
 static struct lcra_state *
 lcra_alloc_equations(unsigned node_count)
 {
-        struct lcra_state *l = calloc(1, sizeof(*l));
+   struct lcra_state *l = calloc(1, sizeof(*l));
 
-        l->node_count = node_count;
+   l->node_count = node_count;
 
-        l->linear = calloc(sizeof(l->linear[0]), node_count);
-        l->solutions = calloc(sizeof(l->solutions[0]), node_count);
-        l->affinity = calloc(sizeof(l->affinity[0]), node_count);
+   l->linear = calloc(sizeof(l->linear[0]), node_count);
+   l->solutions = calloc(sizeof(l->solutions[0]), node_count);
+   l->affinity = calloc(sizeof(l->affinity[0]), node_count);
 
-        memset(l->solutions, ~0, sizeof(l->solutions[0]) * node_count);
+   memset(l->solutions, ~0, sizeof(l->solutions[0]) * node_count);
 
-        return l;
+   return l;
 }
 
 static void
 lcra_free(struct lcra_state *l)
 {
-        for (unsigned i = 0; i < l->node_count; ++i)
-                nodearray_reset(&l->linear[i]);
+   for (unsigned i = 0; i < l->node_count; ++i)
+      nodearray_reset(&l->linear[i]);
 
-        free(l->linear);
-        free(l->affinity);
-        free(l->solutions);
-        free(l);
+   free(l->linear);
+   free(l->affinity);
+   free(l->solutions);
+   free(l);
 }
 
 static void
-lcra_add_node_interference(struct lcra_state *l, unsigned i, unsigned cmask_i, unsigned j, unsigned cmask_j)
+lcra_add_node_interference(struct lcra_state *l, unsigned i, unsigned cmask_i,
+                           unsigned j, unsigned cmask_j)
 {
-        if (i == j)
-                return;
+   if (i == j)
+      return;
 
-        nodearray_value constraint_fw = 0;
-        nodearray_value constraint_bw = 0;
+   nodearray_value constraint_fw = 0;
+   nodearray_value constraint_bw = 0;
 
-        /* The constraint bits are reversed from lcra.c so that register
-         * allocation can be done in parallel for every possible solution,
-         * with lower-order bits representing smaller registers. */
+   /* The constraint bits are reversed from lcra.c so that register
+    * allocation can be done in parallel for every possible solution,
+    * with lower-order bits representing smaller registers. */
 
-        for (unsigned D = 0; D < 8; ++D) {
-                if (cmask_i & (cmask_j << D)) {
-                        constraint_fw |= (1 << (7 + D));
-                        constraint_bw |= (1 << (7 - D));
-                }
+   for (unsigned D = 0; D < 8; ++D) {
+      if (cmask_i & (cmask_j << D)) {
+         constraint_fw |= (1 << (7 + D));
+         constraint_bw |= (1 << (7 - D));
+      }
 
-                if (cmask_i & (cmask_j >> D)) {
-                        constraint_bw |= (1 << (7 + D));
-                        constraint_fw |= (1 << (7 - D));
-                }
-        }
+      if (cmask_i & (cmask_j >> D)) {
+         constraint_bw |= (1 << (7 + D));
+         constraint_fw |= (1 << (7 - D));
+      }
+   }
 
-        /* Use dense arrays after adding 256 elements */
-        nodearray_orr(&l->linear[j], i, constraint_fw, 256, l->node_count);
-        nodearray_orr(&l->linear[i], j, constraint_bw, 256, l->node_count);
+   /* Use dense arrays after adding 256 elements */
+   nodearray_orr(&l->linear[j], i, constraint_fw, 256, l->node_count);
+   nodearray_orr(&l->linear[i], j, constraint_bw, 256, l->node_count);
 }
 
 static bool
 lcra_test_linear(struct lcra_state *l, unsigned *solutions, unsigned i)
 {
-        signed constant = solutions[i];
+   signed constant = solutions[i];
 
-        if (nodearray_is_sparse(&l->linear[i])) {
-                nodearray_sparse_foreach(&l->linear[i], elem) {
-                        unsigned j = nodearray_sparse_key(elem);
-                        nodearray_value constraint = nodearray_sparse_value(elem);
+   if (nodearray_is_sparse(&l->linear[i])) {
+      nodearray_sparse_foreach(&l->linear[i], elem) {
+         unsigned j = nodearray_sparse_key(elem);
+         nodearray_value constraint = nodearray_sparse_value(elem);
 
-                        if (solutions[j] == ~0) continue;
+         if (solutions[j] == ~0)
+            continue;
 
-                        signed lhs = constant - solutions[j];
+         signed lhs = constant - solutions[j];
 
-                        if (lhs < -7 || lhs > 7)
-                                continue;
+         if (lhs < -7 || lhs > 7)
+            continue;
 
-                        if (constraint & (1 << (lhs + 7)))
-                                return false;
-                }
+         if (constraint & (1 << (lhs + 7)))
+            return false;
+      }
 
-                return true;
-        }
+      return true;
+   }
 
-        nodearray_value *row = l->linear[i].dense;
+   nodearray_value *row = l->linear[i].dense;
 
-        for (unsigned j = 0; j < l->node_count; ++j) {
-                if (solutions[j] == ~0) continue;
+   for (unsigned j = 0; j < l->node_count; ++j) {
+      if (solutions[j] == ~0)
+         continue;
 
-                signed lhs = constant - solutions[j];
+      signed lhs = constant - solutions[j];
 
-                if (lhs < -7 || lhs > 7)
-                        continue;
+      if (lhs < -7 || lhs > 7)
+         continue;
 
-                if (row[j] & (1 << (lhs + 7)))
-                        return false;
-        }
+      if (row[j] & (1 << (lhs + 7)))
+         return false;
+   }
 
-        return true;
+   return true;
 }
 
 static bool
 lcra_solve(struct lcra_state *l)
 {
-        for (unsigned step = 0; step < l->node_count; ++step) {
-                if (l->solutions[step] != ~0) continue;
-                if (l->affinity[step] == 0) continue;
+   for (unsigned step = 0; step < l->node_count; ++step) {
+      if (l->solutions[step] != ~0)
+         continue;
+      if (l->affinity[step] == 0)
+         continue;
 
-                bool succ = false;
+      bool succ = false;
 
-                u_foreach_bit64(r, l->affinity[step]) {
-                        l->solutions[step] = r;
+      u_foreach_bit64(r, l->affinity[step])
+      {
+         l->solutions[step] = r;
 
-                        if (lcra_test_linear(l, l->solutions, step)) {
-                                succ = true;
-                                break;
-                        }
-                }
+         if (lcra_test_linear(l, l->solutions, step)) {
+            succ = true;
+            break;
+         }
+      }
 
-                /* Out of registers - prepare to spill */
-                if (!succ) {
-                        l->spill_node = step;
-                        return false;
-                }
-        }
+      /* Out of registers - prepare to spill */
+      if (!succ) {
+         l->spill_node = step;
+         return false;
+      }
+   }
 
-        return true;
+   return true;
 }
 
 /* Register spilling is implemented with a cost-benefit system. Costs are set
@@ -191,18 +197,18 @@ lcra_solve(struct lcra_state *l)
 static unsigned
 lcra_count_constraints(struct lcra_state *l, unsigned i)
 {
-        unsigned count = 0;
-        nodearray *constraints = &l->linear[i];
+   unsigned count = 0;
+   nodearray *constraints = &l->linear[i];
 
-        if (nodearray_is_sparse(constraints)) {
-                nodearray_sparse_foreach(constraints, elem)
-                        count += util_bitcount(nodearray_sparse_value(elem));
-        } else {
-                nodearray_dense_foreach_64(constraints, elem)
-                        count += util_bitcount64(*elem);
-        }
+   if (nodearray_is_sparse(constraints)) {
+      nodearray_sparse_foreach(constraints, elem)
+         count += util_bitcount(nodearray_sparse_value(elem));
+   } else {
+      nodearray_dense_foreach_64(constraints, elem)
+         count += util_bitcount64(*elem);
+   }
 
-        return count;
+   return count;
 }
 
 /* Liveness analysis is a backwards-may dataflow analysis pass. Within a block,
@@ -212,46 +218,46 @@ lcra_count_constraints(struct lcra_state *l, unsigned i)
 static void
 bi_liveness_ins_update_ra(uint8_t *live, bi_instr *ins)
 {
-        /* live_in[s] = GEN[s] + (live_out[s] - KILL[s]) */
+   /* live_in[s] = GEN[s] + (live_out[s] - KILL[s]) */
 
-        bi_foreach_dest(ins, d) {
-                live[ins->dest[d].value] &= ~bi_writemask(ins, d);
-        }
+   bi_foreach_dest(ins, d) {
+      live[ins->dest[d].value] &= ~bi_writemask(ins, d);
+   }
 
-        bi_foreach_ssa_src(ins, src) {
-                unsigned count = bi_count_read_registers(ins, src);
-                unsigned rmask = BITFIELD_MASK(count);
+   bi_foreach_ssa_src(ins, src) {
+      unsigned count = bi_count_read_registers(ins, src);
+      unsigned rmask = BITFIELD_MASK(count);
 
-                live[ins->src[src].value] |= (rmask << ins->src[src].offset);
-        }
+      live[ins->src[src].value] |= (rmask << ins->src[src].offset);
+   }
 }
 
 static bool
 liveness_block_update(bi_block *blk, unsigned temp_count)
 {
-        bool progress = false;
+   bool progress = false;
 
-        /* live_out[s] = sum { p in succ[s] } ( live_in[p] ) */
-        bi_foreach_successor(blk, succ) {
-                for (unsigned i = 0; i < temp_count; ++i)
-                        blk->live_out[i] |= succ->live_in[i];
-        }
+   /* live_out[s] = sum { p in succ[s] } ( live_in[p] ) */
+   bi_foreach_successor(blk, succ) {
+      for (unsigned i = 0; i < temp_count; ++i)
+         blk->live_out[i] |= succ->live_in[i];
+   }
 
-        uint8_t *live = ralloc_array(blk, uint8_t, temp_count);
-        memcpy(live, blk->live_out, temp_count);
+   uint8_t *live = ralloc_array(blk, uint8_t, temp_count);
+   memcpy(live, blk->live_out, temp_count);
 
-        bi_foreach_instr_in_block_rev(blk, ins)
-                bi_liveness_ins_update_ra(live, ins);
+   bi_foreach_instr_in_block_rev(blk, ins)
+      bi_liveness_ins_update_ra(live, ins);
 
-        /* To figure out progress, diff live_in */
+   /* To figure out progress, diff live_in */
 
-        for (unsigned i = 0; (i < temp_count) && !progress; ++i)
-                progress |= (blk->live_in[i] != live[i]);
+   for (unsigned i = 0; (i < temp_count) && !progress; ++i)
+      progress |= (blk->live_in[i] != live[i]);
 
-        ralloc_free(blk->live_in);
-        blk->live_in = live;
+   ralloc_free(blk->live_in);
+   blk->live_in = live;
 
-        return progress;
+   return progress;
 }
 
 /* Globally, liveness analysis uses a fixed-point algorithm based on a
@@ -263,36 +269,36 @@ liveness_block_update(bi_block *blk, unsigned temp_count)
 static void
 bi_compute_liveness_ra(bi_context *ctx)
 {
-        u_worklist worklist;
-        bi_worklist_init(ctx, &worklist);
+   u_worklist worklist;
+   bi_worklist_init(ctx, &worklist);
 
-        bi_foreach_block(ctx, block) {
-                if (block->live_in)
-                        ralloc_free(block->live_in);
+   bi_foreach_block(ctx, block) {
+      if (block->live_in)
+         ralloc_free(block->live_in);
 
-                if (block->live_out)
-                        ralloc_free(block->live_out);
+      if (block->live_out)
+         ralloc_free(block->live_out);
 
-                block->live_in = rzalloc_array(block, uint8_t, ctx->ssa_alloc);
-                block->live_out = rzalloc_array(block, uint8_t, ctx->ssa_alloc);
+      block->live_in = rzalloc_array(block, uint8_t, ctx->ssa_alloc);
+      block->live_out = rzalloc_array(block, uint8_t, ctx->ssa_alloc);
 
-                bi_worklist_push_tail(&worklist, block);
-        }
+      bi_worklist_push_tail(&worklist, block);
+   }
 
-        while (!u_worklist_is_empty(&worklist)) {
-                /* Pop off in reverse order since liveness is backwards */
-                bi_block *blk = bi_worklist_pop_tail(&worklist);
+   while (!u_worklist_is_empty(&worklist)) {
+      /* Pop off in reverse order since liveness is backwards */
+      bi_block *blk = bi_worklist_pop_tail(&worklist);
 
-                /* Update liveness information. If we made progress, we need to
-                 * reprocess the predecessors
-                 */
-                if (liveness_block_update(blk, ctx->ssa_alloc)) {
-                        bi_foreach_predecessor(blk, pred)
-                                bi_worklist_push_head(&worklist, *pred);
-                }
-        }
+      /* Update liveness information. If we made progress, we need to
+       * reprocess the predecessors
+       */
+      if (liveness_block_update(blk, ctx->ssa_alloc)) {
+         bi_foreach_predecessor(blk, pred)
+            bi_worklist_push_head(&worklist, *pred);
+      }
+   }
 
-        u_worklist_fini(&worklist);
+   u_worklist_fini(&worklist);
 }
 
 /* Construct an affinity mask such that the vector with `count` elements does
@@ -313,246 +319,246 @@ bi_compute_liveness_ra(bi_context *ctx)
 static uint64_t
 bi_make_affinity(uint64_t clobber, unsigned count, bool split_file)
 {
-        uint64_t clobbered = 0;
+   uint64_t clobbered = 0;
 
-        for (unsigned i = 0; i < count; ++i)
-                clobbered |= (clobber >> i);
+   for (unsigned i = 0; i < count; ++i)
+      clobbered |= (clobber >> i);
 
-        /* Don't allocate past the end of the register file */
-        if (count > 1) {
-                unsigned excess = count - 1;
-                uint64_t mask = BITFIELD_MASK(excess);
-                clobbered |= mask << (64 - excess);
+   /* Don't allocate past the end of the register file */
+   if (count > 1) {
+      unsigned excess = count - 1;
+      uint64_t mask = BITFIELD_MASK(excess);
+      clobbered |= mask << (64 - excess);
 
-                if (split_file)
-                        clobbered |= mask << (16 - excess);
-        }
+      if (split_file)
+         clobbered |= mask << (16 - excess);
+   }
 
-        /* Don't allocate the middle if we split out the middle */
-        if (split_file)
-                clobbered |= BITFIELD64_MASK(32) << 16;
+   /* Don't allocate the middle if we split out the middle */
+   if (split_file)
+      clobbered |= BITFIELD64_MASK(32) << 16;
 
-        /* We can use a register iff it's not clobberred */
-        return ~clobbered;
+   /* We can use a register iff it's not clobberred */
+   return ~clobbered;
 }
 
 static void
-bi_mark_interference(bi_block *block, struct lcra_state *l, uint8_t *live, uint64_t preload_live, unsigned node_count, bool is_blend, bool split_file, bool aligned_sr)
+bi_mark_interference(bi_block *block, struct lcra_state *l, uint8_t *live,
+                     uint64_t preload_live, unsigned node_count, bool is_blend,
+                     bool split_file, bool aligned_sr)
 {
-        bi_foreach_instr_in_block_rev(block, ins) {
-                /* Mark all registers live after the instruction as
-                 * interfering with the destination */
+   bi_foreach_instr_in_block_rev(block, ins) {
+      /* Mark all registers live after the instruction as
+       * interfering with the destination */
 
-                bi_foreach_dest(ins, d) {
-                        unsigned node = ins->dest[d].value;
+      bi_foreach_dest(ins, d) {
+         unsigned node = ins->dest[d].value;
 
-                        /* Don't allocate to anything that's read later as a
-                         * preloaded register. The affinity is the intersection
-                         * of affinity masks for each write. Since writes have
-                         * offsets, but the affinity is for the whole node, we
-                         * need to offset the affinity opposite the write
-                         * offset, so we shift right. */
-                        unsigned count = bi_count_write_registers(ins, d);
-                        unsigned offset = ins->dest[d].offset;
-                        uint64_t affinity = bi_make_affinity(preload_live, count, split_file) >> offset;
-                        /* Valhall needs >= 64-bit staging writes to be pair-aligned */
-                        if (aligned_sr && (count >= 2 || offset))
-                                affinity &= EVEN_BITS_MASK;
+         /* Don't allocate to anything that's read later as a
+          * preloaded register. The affinity is the intersection
+          * of affinity masks for each write. Since writes have
+          * offsets, but the affinity is for the whole node, we
+          * need to offset the affinity opposite the write
+          * offset, so we shift right. */
+         unsigned count = bi_count_write_registers(ins, d);
+         unsigned offset = ins->dest[d].offset;
+         uint64_t affinity =
+            bi_make_affinity(preload_live, count, split_file) >> offset;
+         /* Valhall needs >= 64-bit staging writes to be pair-aligned */
+         if (aligned_sr && (count >= 2 || offset))
+            affinity &= EVEN_BITS_MASK;
 
-                        l->affinity[node] &= affinity;
+         l->affinity[node] &= affinity;
 
-                        for (unsigned i = 0; i < node_count; ++i) {
-                                uint8_t r = live[i];
+         for (unsigned i = 0; i < node_count; ++i) {
+            uint8_t r = live[i];
 
-                                /* Nodes only interfere if they occupy
-                                 * /different values/ at the same time
-                                 * (Boissinot). In particular, sources of
-                                 * moves do not interfere with their
-                                 * destinations. This enables a limited form of
-                                 * coalescing.
-                                 */
-                                if (ins->op == BI_OPCODE_MOV_I32 &&
-                                    bi_is_ssa(ins->src[0]) &&
-                                    i == ins->src[0].value) {
+            /* Nodes only interfere if they occupy
+             * /different values/ at the same time
+             * (Boissinot). In particular, sources of
+             * moves do not interfere with their
+             * destinations. This enables a limited form of
+             * coalescing.
+             */
+            if (ins->op == BI_OPCODE_MOV_I32 && bi_is_ssa(ins->src[0]) &&
+                i == ins->src[0].value) {
 
-                                        r &= ~BITFIELD_BIT(ins->src[0].offset);
-                                }
+               r &= ~BITFIELD_BIT(ins->src[0].offset);
+            }
 
-                                if (r) {
-                                        lcra_add_node_interference(l, node,
-                                                        bi_writemask(ins, d), i, r);
-                                }
-                        }
+            if (r) {
+               lcra_add_node_interference(l, node, bi_writemask(ins, d), i, r);
+            }
+         }
 
-                        unsigned node_first = ins->dest[0].value;
-                        if (d == 1) {
-                                lcra_add_node_interference(l, node, bi_writemask(ins, 1),
-                                                           node_first, bi_writemask(ins, 0));
-                        }
-                }
+         unsigned node_first = ins->dest[0].value;
+         if (d == 1) {
+            lcra_add_node_interference(l, node, bi_writemask(ins, 1),
+                                       node_first, bi_writemask(ins, 0));
+         }
+      }
 
-                /* Valhall needs >= 64-bit reads to be pair-aligned */
-                if (aligned_sr) {
-                        bi_foreach_ssa_src(ins, s) {
-                                if (bi_count_read_registers(ins, s) >= 2)
-                                        l->affinity[ins->src[s].value] &= EVEN_BITS_MASK;
-                        }
-                }
+      /* Valhall needs >= 64-bit reads to be pair-aligned */
+      if (aligned_sr) {
+         bi_foreach_ssa_src(ins, s) {
+            if (bi_count_read_registers(ins, s) >= 2)
+               l->affinity[ins->src[s].value] &= EVEN_BITS_MASK;
+         }
+      }
 
-                if (!is_blend && ins->op == BI_OPCODE_BLEND) {
-                        /* Blend shaders might clobber r0-r15, r48. */
-                        uint64_t clobber = BITFIELD64_MASK(16) | BITFIELD64_BIT(48);
+      if (!is_blend && ins->op == BI_OPCODE_BLEND) {
+         /* Blend shaders might clobber r0-r15, r48. */
+         uint64_t clobber = BITFIELD64_MASK(16) | BITFIELD64_BIT(48);
 
-                        for (unsigned i = 0; i < node_count; ++i) {
-                                if (live[i])
-                                        l->affinity[i] &= ~clobber;
-                        }
-                }
+         for (unsigned i = 0; i < node_count; ++i) {
+            if (live[i])
+               l->affinity[i] &= ~clobber;
+         }
+      }
 
-                /* Update live_in */
-                preload_live = bi_postra_liveness_ins(preload_live, ins);
-                bi_liveness_ins_update_ra(live, ins);
-        }
+      /* Update live_in */
+      preload_live = bi_postra_liveness_ins(preload_live, ins);
+      bi_liveness_ins_update_ra(live, ins);
+   }
 
-        block->reg_live_in = preload_live;
+   block->reg_live_in = preload_live;
 }
 
 static void
 bi_compute_interference(bi_context *ctx, struct lcra_state *l, bool full_regs)
 {
-        bi_compute_liveness_ra(ctx);
-        bi_postra_liveness(ctx);
+   bi_compute_liveness_ra(ctx);
+   bi_postra_liveness(ctx);
 
-        bi_foreach_block_rev(ctx, blk) {
-                uint8_t *live = mem_dup(blk->live_out, ctx->ssa_alloc);
+   bi_foreach_block_rev(ctx, blk) {
+      uint8_t *live = mem_dup(blk->live_out, ctx->ssa_alloc);
 
-                bi_mark_interference(blk, l, live, blk->reg_live_out,
-                                ctx->ssa_alloc, ctx->inputs->is_blend,
-                                !full_regs, ctx->arch >= 9);
+      bi_mark_interference(blk, l, live, blk->reg_live_out, ctx->ssa_alloc,
+                           ctx->inputs->is_blend, !full_regs, ctx->arch >= 9);
 
-                free(live);
-        }
+      free(live);
+   }
 }
 
 static struct lcra_state *
 bi_allocate_registers(bi_context *ctx, bool *success, bool full_regs)
 {
-        struct lcra_state *l = lcra_alloc_equations(ctx->ssa_alloc);
+   struct lcra_state *l = lcra_alloc_equations(ctx->ssa_alloc);
 
-        /* Blend shaders are restricted to R0-R15. Other shaders at full
-         * occupancy also can access R48-R63. At half occupancy they can access
-         * the whole file. */
+   /* Blend shaders are restricted to R0-R15. Other shaders at full
+    * occupancy also can access R48-R63. At half occupancy they can access
+    * the whole file. */
 
-        uint64_t default_affinity =
-                ctx->inputs->is_blend ? BITFIELD64_MASK(16) :
-                full_regs ? BITFIELD64_MASK(64) :
-                (BITFIELD64_MASK(16) | (BITFIELD64_MASK(16) << 48));
+   uint64_t default_affinity =
+      ctx->inputs->is_blend ? BITFIELD64_MASK(16)
+      : full_regs           ? BITFIELD64_MASK(64)
+                  : (BITFIELD64_MASK(16) | (BITFIELD64_MASK(16) << 48));
 
-        /* To test spilling, mimic a small register file */
-        if (bifrost_debug & BIFROST_DBG_SPILL && !ctx->inputs->is_blend)
-                default_affinity &= BITFIELD64_MASK(48) << 8;
+   /* To test spilling, mimic a small register file */
+   if (bifrost_debug & BIFROST_DBG_SPILL && !ctx->inputs->is_blend)
+      default_affinity &= BITFIELD64_MASK(48) << 8;
 
-        bi_foreach_instr_global(ctx, ins) {
-                bi_foreach_dest(ins, d)
-                        l->affinity[ins->dest[d].value] = default_affinity;
+   bi_foreach_instr_global(ctx, ins) {
+      bi_foreach_dest(ins, d)
+         l->affinity[ins->dest[d].value] = default_affinity;
 
-                /* Blend shaders expect the src colour to be in r0-r3 */
-                if (ins->op == BI_OPCODE_BLEND &&
-                    !ctx->inputs->is_blend) {
-                        assert(bi_is_ssa(ins->src[0]));
-                        l->solutions[ins->src[0].value] = 0;
+      /* Blend shaders expect the src colour to be in r0-r3 */
+      if (ins->op == BI_OPCODE_BLEND && !ctx->inputs->is_blend) {
+         assert(bi_is_ssa(ins->src[0]));
+         l->solutions[ins->src[0].value] = 0;
 
-                        /* Dual source blend input in r4-r7 */
-                        if (bi_is_ssa(ins->src[4]))
-                                l->solutions[ins->src[4].value] = 4;
+         /* Dual source blend input in r4-r7 */
+         if (bi_is_ssa(ins->src[4]))
+            l->solutions[ins->src[4].value] = 4;
 
-                        /* Writes to R48 */
-                        if (!bi_is_null(ins->dest[0]))
-                                l->solutions[ins->dest[0].value] = 48;
-                }
+         /* Writes to R48 */
+         if (!bi_is_null(ins->dest[0]))
+            l->solutions[ins->dest[0].value] = 48;
+      }
 
-                /* Coverage mask writes stay in R60 */
-                if ((ins->op == BI_OPCODE_ATEST ||
-                     ins->op == BI_OPCODE_ZS_EMIT) &&
-                    !bi_is_null(ins->dest[0])) {
-                        l->solutions[ins->dest[0].value] = 60;
-                }
+      /* Coverage mask writes stay in R60 */
+      if ((ins->op == BI_OPCODE_ATEST || ins->op == BI_OPCODE_ZS_EMIT) &&
+          !bi_is_null(ins->dest[0])) {
+         l->solutions[ins->dest[0].value] = 60;
+      }
 
-                /* Experimentally, it seems coverage masks inputs to ATEST must
-                 * be in R60. Otherwise coverage mask writes do not work with
-                 * early-ZS with pixel-frequency-shading (this combination of
-                 * settings is legal if depth/stencil writes are disabled).
-                 */
-                if (ins->op == BI_OPCODE_ATEST) {
-                        assert(bi_is_ssa(ins->src[0]));
-                        l->solutions[ins->src[0].value] = 60;
-                }
-        }
+      /* Experimentally, it seems coverage masks inputs to ATEST must
+       * be in R60. Otherwise coverage mask writes do not work with
+       * early-ZS with pixel-frequency-shading (this combination of
+       * settings is legal if depth/stencil writes are disabled).
+       */
+      if (ins->op == BI_OPCODE_ATEST) {
+         assert(bi_is_ssa(ins->src[0]));
+         l->solutions[ins->src[0].value] = 60;
+      }
+   }
 
-        bi_compute_interference(ctx, l, full_regs);
+   bi_compute_interference(ctx, l, full_regs);
 
-        /* Coalesce register moves if we're allowed. We need to be careful due
-         * to the restricted affinity induced by the blend shader ABI.
-         */
-        bi_foreach_instr_global(ctx, I) {
-                if (I->op != BI_OPCODE_MOV_I32) continue;
-                if (I->src[0].type != BI_INDEX_REGISTER) continue;
+   /* Coalesce register moves if we're allowed. We need to be careful due
+    * to the restricted affinity induced by the blend shader ABI.
+    */
+   bi_foreach_instr_global(ctx, I) {
+      if (I->op != BI_OPCODE_MOV_I32)
+         continue;
+      if (I->src[0].type != BI_INDEX_REGISTER)
+         continue;
 
-                unsigned reg = I->src[0].value;
-                unsigned node = I->dest[0].value;
+      unsigned reg = I->src[0].value;
+      unsigned node = I->dest[0].value;
 
-                if (l->solutions[node] != ~0) continue;
+      if (l->solutions[node] != ~0)
+         continue;
 
-                uint64_t affinity = l->affinity[node];
+      uint64_t affinity = l->affinity[node];
 
-                if (ctx->inputs->is_blend) {
-                        /* We're allowed to coalesce the moves to these */
-                        affinity |= BITFIELD64_BIT(48);
-                        affinity |= BITFIELD64_BIT(60);
-                }
+      if (ctx->inputs->is_blend) {
+         /* We're allowed to coalesce the moves to these */
+         affinity |= BITFIELD64_BIT(48);
+         affinity |= BITFIELD64_BIT(60);
+      }
 
-                /* Try to coalesce */
-                if (affinity & BITFIELD64_BIT(reg)) {
-                        l->solutions[node] = reg;
+      /* Try to coalesce */
+      if (affinity & BITFIELD64_BIT(reg)) {
+         l->solutions[node] = reg;
 
-                        if (!lcra_test_linear(l, l->solutions, node))
-                                l->solutions[node] = ~0;
-                }
-        }
+         if (!lcra_test_linear(l, l->solutions, node))
+            l->solutions[node] = ~0;
+      }
+   }
 
-        *success = lcra_solve(l);
+   *success = lcra_solve(l);
 
-        return l;
+   return l;
 }
 
 static bi_index
 bi_reg_from_index(bi_context *ctx, struct lcra_state *l, bi_index index)
 {
-        /* Offsets can only be applied when we register allocated an index, or
-         * alternatively for FAU's encoding */
+   /* Offsets can only be applied when we register allocated an index, or
+    * alternatively for FAU's encoding */
 
-        ASSERTED bool is_offset = (index.offset > 0) &&
-                (index.type != BI_INDEX_FAU);
+   ASSERTED bool is_offset = (index.offset > 0) && (index.type != BI_INDEX_FAU);
 
-        /* Did we run RA for this index at all */
-        if (!bi_is_ssa(index)) {
-                assert(!is_offset);
-                return index;
-        }
+   /* Did we run RA for this index at all */
+   if (!bi_is_ssa(index)) {
+      assert(!is_offset);
+      return index;
+   }
 
-        /* LCRA didn't bother solving this index (how lazy!) */
-        signed solution = l->solutions[index.value];
-        if (solution < 0) {
-                assert(!is_offset);
-                return index;
-        }
+   /* LCRA didn't bother solving this index (how lazy!) */
+   signed solution = l->solutions[index.value];
+   if (solution < 0) {
+      assert(!is_offset);
+      return index;
+   }
 
-        /* todo: do we want to compose with the subword swizzle? */
-        bi_index new_index = bi_register(solution + index.offset);
-        new_index.swizzle = index.swizzle;
-        new_index.abs = index.abs;
-        new_index.neg = index.neg;
-        return new_index;
+   /* todo: do we want to compose with the subword swizzle? */
+   bi_index new_index = bi_register(solution + index.offset);
+   new_index.swizzle = index.swizzle;
+   new_index.abs = index.abs;
+   new_index.neg = index.neg;
+   return new_index;
 }
 
 /* Dual texture instructions write to two sets of staging registers, modeled as
@@ -564,40 +570,40 @@ bi_reg_from_index(bi_context *ctx, struct lcra_state *l, bi_index index)
 static void
 bi_fixup_dual_tex_register(bi_instr *I)
 {
-        assert(I->dest[1].type == BI_INDEX_REGISTER);
-        assert(I->src[3].type == BI_INDEX_CONSTANT);
+   assert(I->dest[1].type == BI_INDEX_REGISTER);
+   assert(I->src[3].type == BI_INDEX_CONSTANT);
 
-        struct bifrost_dual_texture_operation desc = {
-                .secondary_register = I->dest[1].value,
-        };
+   struct bifrost_dual_texture_operation desc = {
+      .secondary_register = I->dest[1].value,
+   };
 
-        I->src[3].value |= bi_dual_tex_as_u32(desc);
+   I->src[3].value |= bi_dual_tex_as_u32(desc);
 }
 
 static void
 bi_install_registers(bi_context *ctx, struct lcra_state *l)
 {
-        bi_foreach_instr_global(ctx, ins) {
-                bi_foreach_dest(ins, d)
-                        ins->dest[d] = bi_reg_from_index(ctx, l, ins->dest[d]);
+   bi_foreach_instr_global(ctx, ins) {
+      bi_foreach_dest(ins, d)
+         ins->dest[d] = bi_reg_from_index(ctx, l, ins->dest[d]);
 
-                bi_foreach_src(ins, s)
-                        ins->src[s] = bi_reg_from_index(ctx, l, ins->src[s]);
+      bi_foreach_src(ins, s)
+         ins->src[s] = bi_reg_from_index(ctx, l, ins->src[s]);
 
-                if (ins->op == BI_OPCODE_TEXC_DUAL)
-                        bi_fixup_dual_tex_register(ins);
-        }
+      if (ins->op == BI_OPCODE_TEXC_DUAL)
+         bi_fixup_dual_tex_register(ins);
+   }
 }
 
 static void
 bi_rewrite_index_src_single(bi_instr *ins, bi_index old, bi_index new)
 {
-        bi_foreach_src(ins, i) {
-                if (bi_is_equiv(ins->src[i], old)) {
-                        ins->src[i].type = new.type;
-                        ins->src[i].value = new.value;
-                }
-        }
+   bi_foreach_src(ins, i) {
+      if (bi_is_equiv(ins->src[i], old)) {
+         ins->src[i].type = new.type;
+         ins->src[i].value = new.value;
+      }
+   }
 }
 
 /* If register allocation fails, find the best spill node */
@@ -605,83 +611,87 @@ bi_rewrite_index_src_single(bi_instr *ins, bi_index old, bi_index new)
 static signed
 bi_choose_spill_node(bi_context *ctx, struct lcra_state *l)
 {
-        /* Pick a node satisfying bi_spill_register's preconditions */
-        BITSET_WORD *no_spill = calloc(sizeof(BITSET_WORD), BITSET_WORDS(l->node_count));
+   /* Pick a node satisfying bi_spill_register's preconditions */
+   BITSET_WORD *no_spill =
+      calloc(sizeof(BITSET_WORD), BITSET_WORDS(l->node_count));
 
-        bi_foreach_instr_global(ctx, ins) {
-                bi_foreach_dest(ins, d) {
-                        /* Don't allow spilling coverage mask writes because the
-                         * register preload logic assumes it will stay in R60.
-                         * This could be optimized.
-                         */
-                        if (ins->no_spill ||
-                            ins->op == BI_OPCODE_ATEST ||
-                            ins->op == BI_OPCODE_ZS_EMIT ||
-                            (ins->op == BI_OPCODE_MOV_I32 &&
-                             ins->src[0].type == BI_INDEX_REGISTER &&
-                             ins->src[0].value == 60)) {
-                                BITSET_SET(no_spill, ins->dest[d].value);
-                        }
-                }
-        }
+   bi_foreach_instr_global(ctx, ins) {
+      bi_foreach_dest(ins, d) {
+         /* Don't allow spilling coverage mask writes because the
+          * register preload logic assumes it will stay in R60.
+          * This could be optimized.
+          */
+         if (ins->no_spill || ins->op == BI_OPCODE_ATEST ||
+             ins->op == BI_OPCODE_ZS_EMIT ||
+             (ins->op == BI_OPCODE_MOV_I32 &&
+              ins->src[0].type == BI_INDEX_REGISTER &&
+              ins->src[0].value == 60)) {
+            BITSET_SET(no_spill, ins->dest[d].value);
+         }
+      }
+   }
 
-        unsigned best_benefit = 0.0;
-        signed best_node = -1;
+   unsigned best_benefit = 0.0;
+   signed best_node = -1;
 
-        if (nodearray_is_sparse(&l->linear[l->spill_node])) {
-                nodearray_sparse_foreach(&l->linear[l->spill_node], elem) {
-                        unsigned i = nodearray_sparse_key(elem);
-                        unsigned constraint = nodearray_sparse_value(elem);
+   if (nodearray_is_sparse(&l->linear[l->spill_node])) {
+      nodearray_sparse_foreach(&l->linear[l->spill_node], elem) {
+         unsigned i = nodearray_sparse_key(elem);
+         unsigned constraint = nodearray_sparse_value(elem);
 
-                        /* Only spill nodes that interfere with the node failing
-                         * register allocation. It's pointless to spill anything else */
-                        if (!constraint) continue;
+         /* Only spill nodes that interfere with the node failing
+          * register allocation. It's pointless to spill anything else */
+         if (!constraint)
+            continue;
 
-                        if (BITSET_TEST(no_spill, i)) continue;
+         if (BITSET_TEST(no_spill, i))
+            continue;
 
-                        unsigned benefit = lcra_count_constraints(l, i);
+         unsigned benefit = lcra_count_constraints(l, i);
 
-                        if (benefit > best_benefit) {
-                                best_benefit = benefit;
-                                best_node = i;
-                        }
-                }
-        } else {
-                nodearray_value *row = l->linear[l->spill_node].dense;
+         if (benefit > best_benefit) {
+            best_benefit = benefit;
+            best_node = i;
+         }
+      }
+   } else {
+      nodearray_value *row = l->linear[l->spill_node].dense;
 
-                for (unsigned i = 0; i < l->node_count; ++i) {
-                        /* Only spill nodes that interfere with the node failing
-                         * register allocation. It's pointless to spill anything else */
-                        if (!row[i]) continue;
+      for (unsigned i = 0; i < l->node_count; ++i) {
+         /* Only spill nodes that interfere with the node failing
+          * register allocation. It's pointless to spill anything else */
+         if (!row[i])
+            continue;
 
-                        if (BITSET_TEST(no_spill, i)) continue;
+         if (BITSET_TEST(no_spill, i))
+            continue;
 
-                        unsigned benefit = lcra_count_constraints(l, i);
+         unsigned benefit = lcra_count_constraints(l, i);
 
-                        if (benefit > best_benefit) {
-                                best_benefit = benefit;
-                                best_node = i;
-                        }
-                }
-        }
+         if (benefit > best_benefit) {
+            best_benefit = benefit;
+            best_node = i;
+         }
+      }
+   }
 
-        free(no_spill);
-        return best_node;
+   free(no_spill);
+   return best_node;
 }
 
 static unsigned
 bi_count_read_index(bi_instr *I, bi_index index)
 {
-        unsigned max = 0;
+   unsigned max = 0;
 
-        bi_foreach_src(I, s) {
-                if (bi_is_equiv(I->src[s], index)) {
-                        unsigned count = bi_count_read_registers(I, s);
-                        max = MAX2(max, count + I->src[s].offset);
-                }
-        }
+   bi_foreach_src(I, s) {
+      if (bi_is_equiv(I->src[s], index)) {
+         unsigned count = bi_count_read_registers(I, s);
+         max = MAX2(max, count + I->src[s].offset);
+      }
+   }
 
-        return max;
+   return max;
 }
 
 /*
@@ -692,29 +702,30 @@ bi_count_read_index(bi_instr *I, bi_index index)
 static bi_index
 bi_tls_ptr(bool hi)
 {
-        return bi_fau(BIR_FAU_TLS_PTR, hi);
+   return bi_fau(BIR_FAU_TLS_PTR, hi);
 }
 
 static bi_instr *
 bi_load_tl(bi_builder *b, unsigned bits, bi_index src, unsigned offset)
 {
-        if (b->shader->arch >= 9) {
-                return bi_load_to(b, bits, src, bi_tls_ptr(false),
-                                  bi_tls_ptr(true), BI_SEG_TL, offset);
-        } else {
-                return bi_load_to(b, bits, src, bi_imm_u32(offset), bi_zero(),
-                                  BI_SEG_TL, 0);
-        }
+   if (b->shader->arch >= 9) {
+      return bi_load_to(b, bits, src, bi_tls_ptr(false), bi_tls_ptr(true),
+                        BI_SEG_TL, offset);
+   } else {
+      return bi_load_to(b, bits, src, bi_imm_u32(offset), bi_zero(), BI_SEG_TL,
+                        0);
+   }
 }
 
 static void
 bi_store_tl(bi_builder *b, unsigned bits, bi_index src, unsigned offset)
 {
-        if (b->shader->arch >= 9) {
-                bi_store(b, bits, src, bi_tls_ptr(false), bi_tls_ptr(true), BI_SEG_TL, offset);
-        } else {
-                bi_store(b, bits, src, bi_imm_u32(offset), bi_zero(), BI_SEG_TL, 0);
-        }
+   if (b->shader->arch >= 9) {
+      bi_store(b, bits, src, bi_tls_ptr(false), bi_tls_ptr(true), BI_SEG_TL,
+               offset);
+   } else {
+      bi_store(b, bits, src, bi_imm_u32(offset), bi_zero(), BI_SEG_TL, 0);
+   }
 }
 
 /* Once we've chosen a spill node, spill it and returns bytes spilled */
@@ -722,44 +733,45 @@ bi_store_tl(bi_builder *b, unsigned bits, bi_index src, unsigned offset)
 static unsigned
 bi_spill_register(bi_context *ctx, bi_index index, uint32_t offset)
 {
-        bi_builder b = { .shader = ctx };
-        unsigned channels = 0;
+   bi_builder b = {.shader = ctx};
+   unsigned channels = 0;
 
-        /* Spill after every store, fill before every load */
-        bi_foreach_instr_global_safe(ctx, I) {
-                bi_foreach_dest(I, d) {
-                        if (!bi_is_equiv(I->dest[d], index)) continue;
+   /* Spill after every store, fill before every load */
+   bi_foreach_instr_global_safe(ctx, I) {
+      bi_foreach_dest(I, d) {
+         if (!bi_is_equiv(I->dest[d], index))
+            continue;
 
-                        unsigned extra = I->dest[d].offset;
-                        bi_index tmp = bi_temp(ctx);
+         unsigned extra = I->dest[d].offset;
+         bi_index tmp = bi_temp(ctx);
 
-                        I->dest[d] = bi_replace_index(I->dest[d], tmp);
-                        I->no_spill = true;
+         I->dest[d] = bi_replace_index(I->dest[d], tmp);
+         I->no_spill = true;
 
-                        unsigned count = bi_count_write_registers(I, d);
-                        unsigned bits = count * 32;
+         unsigned count = bi_count_write_registers(I, d);
+         unsigned bits = count * 32;
 
-                        b.cursor = bi_after_instr(I);
-                        bi_store_tl(&b, bits, tmp, offset + 4 * extra);
+         b.cursor = bi_after_instr(I);
+         bi_store_tl(&b, bits, tmp, offset + 4 * extra);
 
-                        ctx->spills++;
-                        channels = MAX2(channels, extra + count);
-                }
+         ctx->spills++;
+         channels = MAX2(channels, extra + count);
+      }
 
-                if (bi_has_arg(I, index)) {
-                        b.cursor = bi_before_instr(I);
-                        bi_index tmp = bi_temp(ctx);
+      if (bi_has_arg(I, index)) {
+         b.cursor = bi_before_instr(I);
+         bi_index tmp = bi_temp(ctx);
 
-                        unsigned bits = bi_count_read_index(I, index) * 32;
-                        bi_rewrite_index_src_single(I, index, tmp);
+         unsigned bits = bi_count_read_index(I, index) * 32;
+         bi_rewrite_index_src_single(I, index, tmp);
 
-                        bi_instr *ld = bi_load_tl(&b, bits, tmp, offset);
-                        ld->no_spill = true;
-                        ctx->fills++;
-                }
-        }
+         bi_instr *ld = bi_load_tl(&b, bits, tmp, offset);
+         ld->no_spill = true;
+         ctx->fills++;
+      }
+   }
 
-        return (channels * 4);
+   return (channels * 4);
 }
 
 /*
@@ -770,78 +782,79 @@ bi_spill_register(bi_context *ctx, bi_index index, uint32_t offset)
 static void
 bi_lower_vector(bi_context *ctx, unsigned first_reg)
 {
-        bi_index *remap = calloc(ctx->ssa_alloc, sizeof(bi_index));
+   bi_index *remap = calloc(ctx->ssa_alloc, sizeof(bi_index));
 
-        bi_foreach_instr_global_safe(ctx, I) {
-                bi_builder b = bi_init_builder(ctx, bi_after_instr(I));
+   bi_foreach_instr_global_safe(ctx, I) {
+      bi_builder b = bi_init_builder(ctx, bi_after_instr(I));
 
-                if (I->op == BI_OPCODE_SPLIT_I32) {
-                        bi_index src = I->src[0];
-                        assert(src.offset == 0);
+      if (I->op == BI_OPCODE_SPLIT_I32) {
+         bi_index src = I->src[0];
+         assert(src.offset == 0);
 
-                        bi_foreach_dest(I, i) {
-                                src.offset = i;
-                                bi_mov_i32_to(&b, I->dest[i], src);
+         bi_foreach_dest(I, i) {
+            src.offset = i;
+            bi_mov_i32_to(&b, I->dest[i], src);
 
-                                if (I->dest[i].value < first_reg)
-                                        remap[I->dest[i].value] = src;
-                        }
+            if (I->dest[i].value < first_reg)
+               remap[I->dest[i].value] = src;
+         }
 
-                        bi_remove_instruction(I);
-                } else if (I->op == BI_OPCODE_COLLECT_I32) {
-                        bi_index dest = I->dest[0];
-                        assert(dest.offset == 0);
-                        assert(((dest.value < first_reg) || I->nr_srcs == 1) && "nir_lower_phis_to_scalar");
+         bi_remove_instruction(I);
+      } else if (I->op == BI_OPCODE_COLLECT_I32) {
+         bi_index dest = I->dest[0];
+         assert(dest.offset == 0);
+         assert(((dest.value < first_reg) || I->nr_srcs == 1) &&
+                "nir_lower_phis_to_scalar");
 
-                        bi_foreach_src(I, i) {
-                                if (bi_is_null(I->src[i]))
-                                        continue;
+         bi_foreach_src(I, i) {
+            if (bi_is_null(I->src[i]))
+               continue;
 
-                                dest.offset = i;
-                                bi_mov_i32_to(&b, dest, I->src[i]);
-                        }
+            dest.offset = i;
+            bi_mov_i32_to(&b, dest, I->src[i]);
+         }
 
-                        bi_remove_instruction(I);
-                }
-        }
+         bi_remove_instruction(I);
+      }
+   }
 
-        bi_foreach_instr_global(ctx, I) {
-                bi_foreach_ssa_src(I, s) {
-                        if (I->src[s].value < first_reg && !bi_is_null(remap[I->src[s].value]))
-                                bi_replace_src(I, s, remap[I->src[s].value]);
-                }
-        }
+   bi_foreach_instr_global(ctx, I) {
+      bi_foreach_ssa_src(I, s) {
+         if (I->src[s].value < first_reg && !bi_is_null(remap[I->src[s].value]))
+            bi_replace_src(I, s, remap[I->src[s].value]);
+      }
+   }
 
-        free(remap);
+   free(remap);
 
-        /* After generating a pile of moves, clean up */
-        bi_compute_liveness_ra(ctx);
+   /* After generating a pile of moves, clean up */
+   bi_compute_liveness_ra(ctx);
 
-        bi_foreach_block_rev(ctx, block) {
-                uint8_t *live = rzalloc_array(block, uint8_t, ctx->ssa_alloc);
+   bi_foreach_block_rev(ctx, block) {
+      uint8_t *live = rzalloc_array(block, uint8_t, ctx->ssa_alloc);
 
-                bi_foreach_successor(block, succ) {
-                        for (unsigned i = 0; i < ctx->ssa_alloc; ++i)
-                                live[i] |= succ->live_in[i];
-                }
+      bi_foreach_successor(block, succ) {
+         for (unsigned i = 0; i < ctx->ssa_alloc; ++i)
+            live[i] |= succ->live_in[i];
+      }
 
-                bi_foreach_instr_in_block_safe_rev(block, ins) {
-                        bool all_null = true;
+      bi_foreach_instr_in_block_safe_rev(block, ins) {
+         bool all_null = true;
 
-                        bi_foreach_dest(ins, d) {
-                                if (live[ins->dest[d].value] & bi_writemask(ins, d))
-                                        all_null = false;
-                        }
+         bi_foreach_dest(ins, d) {
+            if (live[ins->dest[d].value] & bi_writemask(ins, d))
+               all_null = false;
+         }
 
-                        if (all_null && !bi_side_effects(ins))
-                                bi_remove_instruction(ins);
-                        else
-                                bi_liveness_ins_update_ra(live, ins);
-                }
+         if (all_null && !bi_side_effects(ins))
+            bi_remove_instruction(ins);
+         else
+            bi_liveness_ins_update_ra(live, ins);
+      }
 
-                ralloc_free(block->live_in);
-                block->live_in = live;
-        }
+      ralloc_free(block->live_in);
+      block->live_in = live;
+   }
 }
 
 /*
@@ -855,12 +868,10 @@ bi_lower_vector(bi_context *ctx, unsigned first_reg)
 static bool
 bi_is_tied(const bi_instr *I)
 {
-        return (I->op == BI_OPCODE_TEXC ||
-                I->op == BI_OPCODE_TEXC_DUAL ||
-                I->op == BI_OPCODE_ATOM_RETURN_I32 ||
-                I->op == BI_OPCODE_AXCHG_I32 ||
-                I->op == BI_OPCODE_ACMPXCHG_I32) &&
-                !bi_is_null(I->src[0]);
+   return (I->op == BI_OPCODE_TEXC || I->op == BI_OPCODE_TEXC_DUAL ||
+           I->op == BI_OPCODE_ATOM_RETURN_I32 || I->op == BI_OPCODE_AXCHG_I32 ||
+           I->op == BI_OPCODE_ACMPXCHG_I32) &&
+          !bi_is_null(I->src[0]);
 }
 
 /*
@@ -872,33 +883,34 @@ bi_is_tied(const bi_instr *I)
 static void
 bi_coalesce_tied(bi_context *ctx)
 {
-        bi_foreach_instr_global(ctx, I) {
-                if (!bi_is_tied(I)) continue;
+   bi_foreach_instr_global(ctx, I) {
+      if (!bi_is_tied(I))
+         continue;
 
-                bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
-                unsigned n = bi_count_read_registers(I, 0);
+      bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
+      unsigned n = bi_count_read_registers(I, 0);
 
-                for (unsigned i = 0; i < n; ++i) {
-                        bi_index dst = I->dest[0], src = I->src[0];
+      for (unsigned i = 0; i < n; ++i) {
+         bi_index dst = I->dest[0], src = I->src[0];
 
-                        assert(dst.offset == 0 && src.offset == 0);
-                        dst.offset = src.offset = i;
+         assert(dst.offset == 0 && src.offset == 0);
+         dst.offset = src.offset = i;
 
-                        bi_mov_i32_to(&b, dst, src);
-                }
+         bi_mov_i32_to(&b, dst, src);
+      }
 
-                bi_replace_src(I, 0, I->dest[0]);
-        }
+      bi_replace_src(I, 0, I->dest[0]);
+   }
 }
 
 static unsigned
 find_or_allocate_temp(unsigned *map, unsigned value, unsigned *alloc)
 {
-        if (!map[value])
-                map[value] = ++(*alloc);
+   if (!map[value])
+      map[value] = ++(*alloc);
 
-        assert(map[value]);
-        return map[value] - 1;
+   assert(map[value]);
+   return map[value] - 1;
 }
 
 /* Reassigns numbering to get rid of gaps in the indices and to prioritize
@@ -907,18 +919,20 @@ find_or_allocate_temp(unsigned *map, unsigned value, unsigned *alloc)
 static void
 squeeze_index(bi_context *ctx)
 {
-        unsigned *map = rzalloc_array(ctx, unsigned, ctx->ssa_alloc);
-        ctx->ssa_alloc = 0;
+   unsigned *map = rzalloc_array(ctx, unsigned, ctx->ssa_alloc);
+   ctx->ssa_alloc = 0;
 
-        bi_foreach_instr_global(ctx, I) {
-                bi_foreach_dest(I, d)
-                        I->dest[d].value = find_or_allocate_temp(map, I->dest[d].value, &ctx->ssa_alloc);
+   bi_foreach_instr_global(ctx, I) {
+      bi_foreach_dest(I, d)
+         I->dest[d].value =
+            find_or_allocate_temp(map, I->dest[d].value, &ctx->ssa_alloc);
 
-                bi_foreach_ssa_src(I, s)
-                        I->src[s].value = find_or_allocate_temp(map, I->src[s].value, &ctx->ssa_alloc);
-        }
+      bi_foreach_ssa_src(I, s)
+         I->src[s].value =
+            find_or_allocate_temp(map, I->src[s].value, &ctx->ssa_alloc);
+   }
 
-        ralloc_free(map);
+   ralloc_free(map);
 }
 
 /*
@@ -929,203 +943,211 @@ squeeze_index(bi_context *ctx)
 static unsigned
 bi_out_of_ssa(bi_context *ctx)
 {
-        bi_index zero = bi_fau(BIR_FAU_IMMEDIATE | 0, false);
-        unsigned first_reg = ctx->ssa_alloc;
+   bi_index zero = bi_fau(BIR_FAU_IMMEDIATE | 0, false);
+   unsigned first_reg = ctx->ssa_alloc;
 
-        /* Trivially lower phis */
-        bi_foreach_block(ctx, block) {
-                bi_foreach_instr_in_block_safe(block, I) {
-                        if (I->op != BI_OPCODE_PHI)
-                                break;
+   /* Trivially lower phis */
+   bi_foreach_block(ctx, block) {
+      bi_foreach_instr_in_block_safe(block, I) {
+         if (I->op != BI_OPCODE_PHI)
+            break;
 
-                        /* Assign a register for the phi */
-                        bi_index reg = bi_temp(ctx);
-                        assert(reg.value >= first_reg);
+         /* Assign a register for the phi */
+         bi_index reg = bi_temp(ctx);
+         assert(reg.value >= first_reg);
 
-                        /* Lower to a move in each predecessor. The destinations
-                         * cannot interfere so these can be sequentialized
-                         * in arbitrary order.
-                         */
-                        bi_foreach_predecessor(block, pred) {
-                                bi_builder b = bi_init_builder(ctx, bi_after_block_logical(*pred));
-                                unsigned i = bi_predecessor_index(block, *pred);
+         /* Lower to a move in each predecessor. The destinations
+          * cannot interfere so these can be sequentialized
+          * in arbitrary order.
+          */
+         bi_foreach_predecessor(block, pred) {
+            bi_builder b = bi_init_builder(ctx, bi_after_block_logical(*pred));
+            unsigned i = bi_predecessor_index(block, *pred);
 
-                                assert(!I->src[i].abs);
-                                assert(!I->src[i].neg);
-                                assert(I->src[i].swizzle == BI_SWIZZLE_H01);
+            assert(!I->src[i].abs);
+            assert(!I->src[i].neg);
+            assert(I->src[i].swizzle == BI_SWIZZLE_H01);
 
-                                /* MOV of immediate needs lowering on Valhall */
-                                if (ctx->arch >= 9 && I->src[i].type == BI_INDEX_CONSTANT)
-                                        bi_iadd_imm_i32_to(&b, reg, zero, I->src[i].value);
-                                else
-                                        bi_mov_i32_to(&b, reg, I->src[i]);
-                        }
+            /* MOV of immediate needs lowering on Valhall */
+            if (ctx->arch >= 9 && I->src[i].type == BI_INDEX_CONSTANT)
+               bi_iadd_imm_i32_to(&b, reg, zero, I->src[i].value);
+            else
+               bi_mov_i32_to(&b, reg, I->src[i]);
+         }
 
-                        /* Replace the phi with a move */
-                        bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
-                        bi_mov_i32_to(&b, I->dest[0], reg);
-                        bi_remove_instruction(I);
+         /* Replace the phi with a move */
+         bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
+         bi_mov_i32_to(&b, I->dest[0], reg);
+         bi_remove_instruction(I);
 
-                        /* Propagate that move within the block. The destination
-                         * is SSA and the source is not written in this block,
-                         * so this is legal. The move itself will be DCE'd if
-                         * possible in the next pass.
-                         */
-                        bi_foreach_instr_in_block_rev(block, prop) {
-                                if (prop->op == BI_OPCODE_PHI)
-                                        break;
+         /* Propagate that move within the block. The destination
+          * is SSA and the source is not written in this block,
+          * so this is legal. The move itself will be DCE'd if
+          * possible in the next pass.
+          */
+         bi_foreach_instr_in_block_rev(block, prop) {
+            if (prop->op == BI_OPCODE_PHI)
+               break;
 
-                                bi_foreach_src(prop, s) {
-                                        if (bi_is_equiv(prop->src[s], I->dest[0])) {
-                                                bi_replace_src(prop, s, reg);
-                                        }
-                                }
-                        }
-                }
-        }
+            bi_foreach_src(prop, s) {
+               if (bi_is_equiv(prop->src[s], I->dest[0])) {
+                  bi_replace_src(prop, s, reg);
+               }
+            }
+         }
+      }
+   }
 
-        /* Try to locally propagate the moves we created. We need to be extra
-         * careful because we're not in SSA at this point, as such this
-         * algorithm is quadratic. This will go away when we go out of SSA after
-         * RA.
-         */
-        BITSET_WORD *used = calloc(sizeof(BITSET_WORD), BITSET_WORDS(ctx->ssa_alloc));
-        BITSET_WORD *multiple_uses = calloc(sizeof(BITSET_WORD), BITSET_WORDS(ctx->ssa_alloc));
+   /* Try to locally propagate the moves we created. We need to be extra
+    * careful because we're not in SSA at this point, as such this
+    * algorithm is quadratic. This will go away when we go out of SSA after
+    * RA.
+    */
+   BITSET_WORD *used =
+      calloc(sizeof(BITSET_WORD), BITSET_WORDS(ctx->ssa_alloc));
+   BITSET_WORD *multiple_uses =
+      calloc(sizeof(BITSET_WORD), BITSET_WORDS(ctx->ssa_alloc));
 
-        bi_foreach_instr_global(ctx, I) {
-                bi_foreach_ssa_src(I, s) {
-                        if (BITSET_TEST(used, I->src[s].value))
-                                BITSET_SET(multiple_uses, I->src[s].value);
-                        else
-                                BITSET_SET(used, I->src[s].value);
-                }
-        }
+   bi_foreach_instr_global(ctx, I) {
+      bi_foreach_ssa_src(I, s) {
+         if (BITSET_TEST(used, I->src[s].value))
+            BITSET_SET(multiple_uses, I->src[s].value);
+         else
+            BITSET_SET(used, I->src[s].value);
+      }
+   }
 
-        bi_foreach_block(ctx, block) {
-                bi_foreach_instr_in_block_safe_rev(block, mov) {
-                        /* Match "reg = ssa" */
-                        if (mov->op != BI_OPCODE_MOV_I32) continue;
-                        if (mov->dest[0].type != BI_INDEX_NORMAL) continue;
-                        if (mov->dest[0].value < first_reg) continue;
-                        if (!bi_is_ssa(mov->src[0])) continue;
-                        if (mov->src[0].value >= first_reg) continue;
-                        if (BITSET_TEST(multiple_uses, mov->src[0].value)) continue;
+   bi_foreach_block(ctx, block) {
+      bi_foreach_instr_in_block_safe_rev(block, mov) {
+         /* Match "reg = ssa" */
+         if (mov->op != BI_OPCODE_MOV_I32)
+            continue;
+         if (mov->dest[0].type != BI_INDEX_NORMAL)
+            continue;
+         if (mov->dest[0].value < first_reg)
+            continue;
+         if (!bi_is_ssa(mov->src[0]))
+            continue;
+         if (mov->src[0].value >= first_reg)
+            continue;
+         if (BITSET_TEST(multiple_uses, mov->src[0].value))
+            continue;
 
-                        bool found = false;
+         bool found = false;
 
-                        /* Look locally for the write of the SSA */
-                        bi_foreach_instr_in_block_rev(block, I) {
-                                bool bail = false;
+         /* Look locally for the write of the SSA */
+         bi_foreach_instr_in_block_rev(block, I) {
+            bool bail = false;
 
-                                bi_foreach_src(I, s) {
-                                        /* Bail: write-after-read */
-                                        if (bi_is_equiv(I->src[s], mov->dest[0]))
-                                                bail = true;
-                                }
+            bi_foreach_src(I, s) {
+               /* Bail: write-after-read */
+               if (bi_is_equiv(I->src[s], mov->dest[0]))
+                  bail = true;
+            }
 
-                                if (bail)
-                                        break;
+            if (bail)
+               break;
 
-                                bi_foreach_dest(I, d) {
-                                        /* Bail: write-after-write */
-                                        if (bi_is_equiv(I->dest[d], mov->dest[0]))
-                                                break;
+            bi_foreach_dest(I, d) {
+               /* Bail: write-after-write */
+               if (bi_is_equiv(I->dest[d], mov->dest[0]))
+                  break;
 
-                                        if (!bi_is_equiv(I->dest[d], mov->src[0]))
-                                                continue;
+               if (!bi_is_equiv(I->dest[d], mov->src[0]))
+                  continue;
 
-                                        /* We found it, replace */
-                                        I->dest[d] = bi_replace_index(I->dest[d], mov->dest[0]);
-                                        found = true;
-                                        break;
-                                }
+               /* We found it, replace */
+               I->dest[d] = bi_replace_index(I->dest[d], mov->dest[0]);
+               found = true;
+               break;
+            }
 
-                                if (found)
-                                        break;
-                        }
+            if (found)
+               break;
+         }
 
-                        if (found)
-                                bi_remove_instruction(mov);
-                }
-        }
+         if (found)
+            bi_remove_instruction(mov);
+      }
+   }
 
-        free(used);
-        free(multiple_uses);
-        return first_reg;
+   free(used);
+   free(multiple_uses);
+   return first_reg;
 }
 
 void
 bi_register_allocate(bi_context *ctx)
 {
-        struct lcra_state *l = NULL;
-        bool success = false;
+   struct lcra_state *l = NULL;
+   bool success = false;
 
-        unsigned iter_count = 1000; /* max iterations */
+   unsigned iter_count = 1000; /* max iterations */
 
-        /* Number of bytes of memory we've spilled into */
-        unsigned spill_count = ctx->info.tls_size;
+   /* Number of bytes of memory we've spilled into */
+   unsigned spill_count = ctx->info.tls_size;
 
-        if (ctx->arch >= 9)
-                va_lower_split_64bit(ctx);
+   if (ctx->arch >= 9)
+      va_lower_split_64bit(ctx);
 
-        /* Lower tied operands. SSA is broken from here on. */
-        unsigned first_reg = bi_out_of_ssa(ctx);
-        bi_lower_vector(ctx, first_reg);
-        bi_coalesce_tied(ctx);
-        squeeze_index(ctx);
+   /* Lower tied operands. SSA is broken from here on. */
+   unsigned first_reg = bi_out_of_ssa(ctx);
+   bi_lower_vector(ctx, first_reg);
+   bi_coalesce_tied(ctx);
+   squeeze_index(ctx);
 
-        /* Try with reduced register pressure to improve thread count */
-        if (ctx->arch >= 7) {
-                l = bi_allocate_registers(ctx, &success, false);
+   /* Try with reduced register pressure to improve thread count */
+   if (ctx->arch >= 7) {
+      l = bi_allocate_registers(ctx, &success, false);
 
-                if (success) {
-                        ctx->info.work_reg_count = 32;
-                } else {
-                        lcra_free(l);
-                        l = NULL;
-                }
-        }
+      if (success) {
+         ctx->info.work_reg_count = 32;
+      } else {
+         lcra_free(l);
+         l = NULL;
+      }
+   }
 
-        /* Otherwise, use the register file and spill until we succeed */
-        while (!success && ((iter_count--) > 0)) {
-                l = bi_allocate_registers(ctx, &success, true);
+   /* Otherwise, use the register file and spill until we succeed */
+   while (!success && ((iter_count--) > 0)) {
+      l = bi_allocate_registers(ctx, &success, true);
 
-                if (success) {
-                        ctx->info.work_reg_count = 64;
-                } else {
-                        signed spill_node = bi_choose_spill_node(ctx, l);
-                        lcra_free(l);
-                        l = NULL;
+      if (success) {
+         ctx->info.work_reg_count = 64;
+      } else {
+         signed spill_node = bi_choose_spill_node(ctx, l);
+         lcra_free(l);
+         l = NULL;
 
-                        if (spill_node == -1)
-                                unreachable("Failed to choose spill node\n");
+         if (spill_node == -1)
+            unreachable("Failed to choose spill node\n");
 
-                        if (ctx->inputs->is_blend)
-                                unreachable("Blend shaders may not spill");
+         if (ctx->inputs->is_blend)
+            unreachable("Blend shaders may not spill");
 
-                        /* By default, we use packed TLS addressing on Valhall.
-                         * We cannot cross 16 byte boundaries with packed TLS
-                         * addressing. Align to ensure this doesn't happen. This
-                         * could be optimized a bit.
-                         */
-                        if (ctx->arch >= 9)
-                                spill_count = ALIGN_POT(spill_count, 16);
+         /* By default, we use packed TLS addressing on Valhall.
+          * We cannot cross 16 byte boundaries with packed TLS
+          * addressing. Align to ensure this doesn't happen. This
+          * could be optimized a bit.
+          */
+         if (ctx->arch >= 9)
+            spill_count = ALIGN_POT(spill_count, 16);
 
-                        spill_count += bi_spill_register(ctx,
-                                        bi_get_index(spill_node), spill_count);
+         spill_count +=
+            bi_spill_register(ctx, bi_get_index(spill_node), spill_count);
 
-                        /* In case the spill affected an instruction with tied
-                         * operands, we need to fix up.
-                         */
-                        bi_coalesce_tied(ctx);
-                }
-        }
+         /* In case the spill affected an instruction with tied
+          * operands, we need to fix up.
+          */
+         bi_coalesce_tied(ctx);
+      }
+   }
 
-        assert(success);
-        assert(l != NULL);
+   assert(success);
+   assert(l != NULL);
 
-        ctx->info.tls_size = spill_count;
-        bi_install_registers(ctx, l);
+   ctx->info.tls_size = spill_count;
+   bi_install_registers(ctx, l);
 
-        lcra_free(l);
+   lcra_free(l);
 }
diff --git a/src/panfrost/bifrost/bi_schedule.c b/src/panfrost/bifrost/bi_schedule.c
index 877d57aaadf..f55e364f928 100644
--- a/src/panfrost/bifrost/bi_schedule.c
+++ b/src/panfrost/bifrost/bi_schedule.c
@@ -24,120 +24,120 @@
  *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
  */
 
-#include "compiler.h"
 #include "bi_builder.h"
+#include "compiler.h"
 
 /* Arguments common to worklist, passed by value for convenience */
 
 struct bi_worklist {
-        /* # of instructions in the block */
-        unsigned count;
+   /* # of instructions in the block */
+   unsigned count;
 
-        /* Instructions in the block */
-        bi_instr **instructions;
+   /* Instructions in the block */
+   bi_instr **instructions;
 
-        /* Bitset of instructions in the block ready for scheduling */
-        BITSET_WORD *worklist;
+   /* Bitset of instructions in the block ready for scheduling */
+   BITSET_WORD *worklist;
 
-        /* The backwards dependency graph. nr_dependencies is the number of
-         * unscheduled instructions that must still be scheduled after (before)
-         * this instruction. dependents are which instructions need to be
-         * scheduled before (after) this instruction. */
-        unsigned *dep_counts;
-        BITSET_WORD **dependents;
+   /* The backwards dependency graph. nr_dependencies is the number of
+    * unscheduled instructions that must still be scheduled after (before)
+    * this instruction. dependents are which instructions need to be
+    * scheduled before (after) this instruction. */
+   unsigned *dep_counts;
+   BITSET_WORD **dependents;
 };
 
 /* State of a single tuple and clause under construction */
 
 struct bi_reg_state {
-        /* Number of register writes */
-        unsigned nr_writes;
+   /* Number of register writes */
+   unsigned nr_writes;
 
-        /* Register reads, expressed as (equivalence classes of)
-         * sources. Only 3 reads are allowed, but up to 2 may spill as
-         * "forced" for the next scheduled tuple, provided such a tuple
-         * can be constructed */
-        bi_index reads[5];
-        unsigned nr_reads;
+   /* Register reads, expressed as (equivalence classes of)
+    * sources. Only 3 reads are allowed, but up to 2 may spill as
+    * "forced" for the next scheduled tuple, provided such a tuple
+    * can be constructed */
+   bi_index reads[5];
+   unsigned nr_reads;
 
-        /* The previous tuple scheduled (= the next tuple executed in the
-         * program) may require certain writes, in order to bypass the register
-         * file and use a temporary passthrough for the value. Up to 2 such
-         * constraints are architecturally satisfiable */
-        unsigned forced_count;
-        bi_index forceds[2];
+   /* The previous tuple scheduled (= the next tuple executed in the
+    * program) may require certain writes, in order to bypass the register
+    * file and use a temporary passthrough for the value. Up to 2 such
+    * constraints are architecturally satisfiable */
+   unsigned forced_count;
+   bi_index forceds[2];
 };
 
 struct bi_tuple_state {
-        /* Is this the last tuple in the clause */
-        bool last;
+   /* Is this the last tuple in the clause */
+   bool last;
 
-        /* Scheduled ADD instruction, or null if none */
-        bi_instr *add;
+   /* Scheduled ADD instruction, or null if none */
+   bi_instr *add;
 
-        /* Reads for previous (succeeding) tuple */
-        bi_index prev_reads[5];
-        unsigned nr_prev_reads;
-        bi_tuple *prev;
+   /* Reads for previous (succeeding) tuple */
+   bi_index prev_reads[5];
+   unsigned nr_prev_reads;
+   bi_tuple *prev;
 
-        /* Register slot state for current tuple */
-        struct bi_reg_state reg;
+   /* Register slot state for current tuple */
+   struct bi_reg_state reg;
 
-        /* Constants are shared in the tuple. If constant_count is nonzero, it
-         * is a size for constant count. Otherwise, fau is the slot read from
-         * FAU, or zero if none is assigned. Ordinarily FAU slot 0 reads zero,
-         * but within a tuple, that should be encoded as constant_count != 0
-         * and constants[0] = constants[1] = 0 */
-        unsigned constant_count;
+   /* Constants are shared in the tuple. If constant_count is nonzero, it
+    * is a size for constant count. Otherwise, fau is the slot read from
+    * FAU, or zero if none is assigned. Ordinarily FAU slot 0 reads zero,
+    * but within a tuple, that should be encoded as constant_count != 0
+    * and constants[0] = constants[1] = 0 */
+   unsigned constant_count;
 
-        union {
-                uint32_t constants[2];
-                enum bir_fau fau;
-        };
+   union {
+      uint32_t constants[2];
+      enum bir_fau fau;
+   };
 
-        unsigned pcrel_idx;
+   unsigned pcrel_idx;
 };
 
 struct bi_const_state {
-        unsigned constant_count;
-        bool pcrel; /* applies to first const */
-        uint32_t constants[2];
+   unsigned constant_count;
+   bool pcrel; /* applies to first const */
+   uint32_t constants[2];
 
-        /* Index of the constant into the clause */
-        unsigned word_idx;
+   /* Index of the constant into the clause */
+   unsigned word_idx;
 };
 
 enum bi_ftz_state {
-        /* No flush-to-zero state assigned yet */
-        BI_FTZ_STATE_NONE,
+   /* No flush-to-zero state assigned yet */
+   BI_FTZ_STATE_NONE,
 
-        /* Never flush-to-zero */
-        BI_FTZ_STATE_DISABLE,
+   /* Never flush-to-zero */
+   BI_FTZ_STATE_DISABLE,
 
-        /* Always flush-to-zero */
-        BI_FTZ_STATE_ENABLE,
+   /* Always flush-to-zero */
+   BI_FTZ_STATE_ENABLE,
 };
 
 /* At this point, pseudoinstructions have been lowered so sources/destinations
  * are limited to what's physically supported.
  */
-#define BI_MAX_PHYS_SRCS 4
+#define BI_MAX_PHYS_SRCS  4
 #define BI_MAX_PHYS_DESTS 2
 
 struct bi_clause_state {
-        /* Has a message-passing instruction already been assigned? */
-        bool message;
+   /* Has a message-passing instruction already been assigned? */
+   bool message;
 
-        /* Indices already accessed, this needs to be tracked to avoid hazards
-         * around message-passing instructions */
-        unsigned access_count;
-        bi_index accesses[(BI_MAX_PHYS_SRCS + BI_MAX_PHYS_DESTS) * 16];
+   /* Indices already accessed, this needs to be tracked to avoid hazards
+    * around message-passing instructions */
+   unsigned access_count;
+   bi_index accesses[(BI_MAX_PHYS_SRCS + BI_MAX_PHYS_DESTS) * 16];
 
-        unsigned tuple_count;
-        struct bi_const_state consts[8];
+   unsigned tuple_count;
+   struct bi_const_state consts[8];
 
-        /* Numerical state of the clause */
-        enum bi_ftz_state ftz;
+   /* Numerical state of the clause */
+   enum bi_ftz_state ftz;
 };
 
 /* Determines messsage type by checking the table and a few special cases. Only
@@ -148,16 +148,16 @@ struct bi_clause_state {
 static enum bifrost_message_type
 bi_message_type_for_instr(bi_instr *ins)
 {
-        enum bifrost_message_type msg = bi_opcode_props[ins->op].message;
-        bool ld_var_special = (ins->op == BI_OPCODE_LD_VAR_SPECIAL);
+   enum bifrost_message_type msg = bi_opcode_props[ins->op].message;
+   bool ld_var_special = (ins->op == BI_OPCODE_LD_VAR_SPECIAL);
 
-        if (ld_var_special && ins->varying_name == BI_VARYING_NAME_FRAG_Z)
-                return BIFROST_MESSAGE_Z_STENCIL;
+   if (ld_var_special && ins->varying_name == BI_VARYING_NAME_FRAG_Z)
+      return BIFROST_MESSAGE_Z_STENCIL;
 
-        if (msg == BIFROST_MESSAGE_LOAD && ins->seg == BI_SEG_UBO)
-                return BIFROST_MESSAGE_ATTRIBUTE;
+   if (msg == BIFROST_MESSAGE_LOAD && ins->seg == BI_SEG_UBO)
+      return BIFROST_MESSAGE_ATTRIBUTE;
 
-        return msg;
+   return msg;
 }
 
 /* Attribute, texture, and UBO load (attribute message) instructions support
@@ -166,157 +166,162 @@ bi_message_type_for_instr(bi_instr *ins)
 ASSERTED static bool
 bi_supports_dtsel(bi_instr *ins)
 {
-        switch (bi_message_type_for_instr(ins)) {
-        case BIFROST_MESSAGE_ATTRIBUTE:
-                return ins->op != BI_OPCODE_LD_GCLK_U64;
-        case BIFROST_MESSAGE_TEX:
-                return true;
-        default:
-                return false;
-        }
+   switch (bi_message_type_for_instr(ins)) {
+   case BIFROST_MESSAGE_ATTRIBUTE:
+      return ins->op != BI_OPCODE_LD_GCLK_U64;
+   case BIFROST_MESSAGE_TEX:
+      return true;
+   default:
+      return false;
+   }
 }
 
 /* Adds an edge to the dependency graph */
 
 static void
-bi_push_dependency(unsigned parent, unsigned child,
-                BITSET_WORD **dependents, unsigned *dep_counts)
+bi_push_dependency(unsigned parent, unsigned child, BITSET_WORD **dependents,
+                   unsigned *dep_counts)
 {
-        if (!BITSET_TEST(dependents[parent], child)) {
-                BITSET_SET(dependents[parent], child);
-                dep_counts[child]++;
-        }
+   if (!BITSET_TEST(dependents[parent], child)) {
+      BITSET_SET(dependents[parent], child);
+      dep_counts[child]++;
+   }
 }
 
 static void
 add_dependency(struct util_dynarray *table, unsigned index, unsigned child,
-                BITSET_WORD **dependents, unsigned *dep_counts)
+               BITSET_WORD **dependents, unsigned *dep_counts)
 {
-        assert(index < 64);
-        util_dynarray_foreach(table + index, unsigned, parent)
-                bi_push_dependency(*parent, child, dependents, dep_counts);
+   assert(index < 64);
+   util_dynarray_foreach(table + index, unsigned, parent)
+      bi_push_dependency(*parent, child, dependents, dep_counts);
 }
 
 static void
 mark_access(struct util_dynarray *table, unsigned index, unsigned parent)
 {
-        assert(index < 64);
-        util_dynarray_append(&table[index], unsigned, parent);
+   assert(index < 64);
+   util_dynarray_append(&table[index], unsigned, parent);
 }
 
 static bool
 bi_is_sched_barrier(bi_instr *I)
 {
-        switch (I->op) {
-        case BI_OPCODE_BARRIER:
-        case BI_OPCODE_DISCARD_F32:
-                return true;
-        default:
-                return false;
-        }
+   switch (I->op) {
+   case BI_OPCODE_BARRIER:
+   case BI_OPCODE_DISCARD_F32:
+      return true;
+   default:
+      return false;
+   }
 }
 
 static void
 bi_create_dependency_graph(struct bi_worklist st, bool inorder, bool is_blend)
 {
-        struct util_dynarray last_read[64], last_write[64];
+   struct util_dynarray last_read[64], last_write[64];
 
-        for (unsigned i = 0; i < 64; ++i) {
-                util_dynarray_init(&last_read[i], NULL);
-                util_dynarray_init(&last_write[i], NULL);
-        }
+   for (unsigned i = 0; i < 64; ++i) {
+      util_dynarray_init(&last_read[i], NULL);
+      util_dynarray_init(&last_write[i], NULL);
+   }
 
-        /* Initialize dependency graph */
-        for (unsigned i = 0; i < st.count; ++i) {
-                st.dependents[i] =
-                        calloc(BITSET_WORDS(st.count), sizeof(BITSET_WORD));
+   /* Initialize dependency graph */
+   for (unsigned i = 0; i < st.count; ++i) {
+      st.dependents[i] = calloc(BITSET_WORDS(st.count), sizeof(BITSET_WORD));
 
-                st.dep_counts[i] = 0;
-        }
+      st.dep_counts[i] = 0;
+   }
 
-        unsigned prev_msg = ~0;
+   unsigned prev_msg = ~0;
 
-        /* Populate dependency graph */
-        for (signed i = st.count - 1; i >= 0; --i) {
-                bi_instr *ins = st.instructions[i];
+   /* Populate dependency graph */
+   for (signed i = st.count - 1; i >= 0; --i) {
+      bi_instr *ins = st.instructions[i];
 
-                bi_foreach_src(ins, s) {
-                        if (ins->src[s].type != BI_INDEX_REGISTER) continue;
-                        unsigned count = bi_count_read_registers(ins, s);
+      bi_foreach_src(ins, s) {
+         if (ins->src[s].type != BI_INDEX_REGISTER)
+            continue;
+         unsigned count = bi_count_read_registers(ins, s);
 
-                        for (unsigned c = 0; c < count; ++c)
-                                add_dependency(last_write, ins->src[s].value + c, i, st.dependents, st.dep_counts);
-                }
+         for (unsigned c = 0; c < count; ++c)
+            add_dependency(last_write, ins->src[s].value + c, i, st.dependents,
+                           st.dep_counts);
+      }
 
-                /* Keep message-passing ops in order. (This pass only cares
-                 * about bundling; reordering of message-passing instructions
-                 * happens during earlier scheduling.) */
+      /* Keep message-passing ops in order. (This pass only cares
+       * about bundling; reordering of message-passing instructions
+       * happens during earlier scheduling.) */
 
-                if (bi_message_type_for_instr(ins)) {
-                        if (prev_msg != ~0)
-                                bi_push_dependency(prev_msg, i, st.dependents, st.dep_counts);
+      if (bi_message_type_for_instr(ins)) {
+         if (prev_msg != ~0)
+            bi_push_dependency(prev_msg, i, st.dependents, st.dep_counts);
 
-                        prev_msg = i;
-                }
+         prev_msg = i;
+      }
 
-                /* Handle schedule barriers, adding All the deps */
-                if (inorder || bi_is_sched_barrier(ins)) {
-                        for (unsigned j = 0; j < st.count; ++j) {
-                                if (i == j) continue;
+      /* Handle schedule barriers, adding All the deps */
+      if (inorder || bi_is_sched_barrier(ins)) {
+         for (unsigned j = 0; j < st.count; ++j) {
+            if (i == j)
+               continue;
 
-                                bi_push_dependency(MAX2(i, j), MIN2(i, j),
-                                                st.dependents, st.dep_counts);
-                        }
-                }
+            bi_push_dependency(MAX2(i, j), MIN2(i, j), st.dependents,
+                               st.dep_counts);
+         }
+      }
 
-                bi_foreach_dest(ins, d) {
-                        assert(ins->dest[d].type == BI_INDEX_REGISTER);
-                        unsigned dest = ins->dest[d].value;
+      bi_foreach_dest(ins, d) {
+         assert(ins->dest[d].type == BI_INDEX_REGISTER);
+         unsigned dest = ins->dest[d].value;
 
-                        unsigned count = bi_count_write_registers(ins, d);
+         unsigned count = bi_count_write_registers(ins, d);
 
-                        for (unsigned c = 0; c < count; ++c) {
-                                add_dependency(last_read, dest + c, i, st.dependents, st.dep_counts);
-                                add_dependency(last_write, dest + c, i, st.dependents, st.dep_counts);
-                                mark_access(last_write, dest + c, i);
-                        }
-                }
+         for (unsigned c = 0; c < count; ++c) {
+            add_dependency(last_read, dest + c, i, st.dependents,
+                           st.dep_counts);
+            add_dependency(last_write, dest + c, i, st.dependents,
+                           st.dep_counts);
+            mark_access(last_write, dest + c, i);
+         }
+      }
 
-                /* Blend shaders are allowed to clobber R0-R15. Treat these
-                 * registers like extra destinations for scheduling purposes.
-                 */
-                if (ins->op == BI_OPCODE_BLEND && !is_blend) {
-                        for (unsigned c = 0; c < 16; ++c) {
-                                add_dependency(last_read, c, i, st.dependents, st.dep_counts);
-                                add_dependency(last_write, c, i, st.dependents, st.dep_counts);
-                                mark_access(last_write, c, i);
-                        }
-                }
+      /* Blend shaders are allowed to clobber R0-R15. Treat these
+       * registers like extra destinations for scheduling purposes.
+       */
+      if (ins->op == BI_OPCODE_BLEND && !is_blend) {
+         for (unsigned c = 0; c < 16; ++c) {
+            add_dependency(last_read, c, i, st.dependents, st.dep_counts);
+            add_dependency(last_write, c, i, st.dependents, st.dep_counts);
+            mark_access(last_write, c, i);
+         }
+      }
 
-                bi_foreach_src(ins, s) {
-                        if (ins->src[s].type != BI_INDEX_REGISTER) continue;
+      bi_foreach_src(ins, s) {
+         if (ins->src[s].type != BI_INDEX_REGISTER)
+            continue;
 
-                        unsigned count = bi_count_read_registers(ins, s);
+         unsigned count = bi_count_read_registers(ins, s);
 
-                        for (unsigned c = 0; c < count; ++c) 
-                                mark_access(last_read, ins->src[s].value + c, i);
-                }
-        }
+         for (unsigned c = 0; c < count; ++c)
+            mark_access(last_read, ins->src[s].value + c, i);
+      }
+   }
 
-        /* If there is a branch, all instructions depend on it, as interblock
-         * execution must be purely in-order */
+   /* If there is a branch, all instructions depend on it, as interblock
+    * execution must be purely in-order */
 
-        bi_instr *last = st.instructions[st.count - 1];
-        if (last->branch_target || last->op == BI_OPCODE_JUMP) {
-                for (signed i = st.count - 2; i >= 0; --i)
-                        bi_push_dependency(st.count - 1, i, st.dependents, st.dep_counts);
-        }
+   bi_instr *last = st.instructions[st.count - 1];
+   if (last->branch_target || last->op == BI_OPCODE_JUMP) {
+      for (signed i = st.count - 2; i >= 0; --i)
+         bi_push_dependency(st.count - 1, i, st.dependents, st.dep_counts);
+   }
 
-        /* Free the intermediate structures */
-        for (unsigned i = 0; i < 64; ++i) {
-                util_dynarray_fini(&last_read[i]);
-                util_dynarray_fini(&last_write[i]);
-        }
+   /* Free the intermediate structures */
+   for (unsigned i = 0; i < 64; ++i) {
+      util_dynarray_fini(&last_read[i]);
+      util_dynarray_fini(&last_write[i]);
+   }
 }
 
 /* Scheduler pseudoinstruction lowerings to enable instruction pairings.
@@ -324,22 +329,22 @@ bi_create_dependency_graph(struct bi_worklist st, bool inorder, bool is_blend)
  */
 
 static bi_instr *
-bi_lower_cubeface(bi_context *ctx,
-                struct bi_clause_state *clause, struct bi_tuple_state *tuple)
+bi_lower_cubeface(bi_context *ctx, struct bi_clause_state *clause,
+                  struct bi_tuple_state *tuple)
 {
-        bi_instr *pinstr = tuple->add;
-        bi_builder b = bi_init_builder(ctx, bi_before_instr(pinstr));
-        bi_instr *cubeface1 = bi_cubeface1_to(&b, pinstr->dest[0],
-                        pinstr->src[0], pinstr->src[1], pinstr->src[2]);
+   bi_instr *pinstr = tuple->add;
+   bi_builder b = bi_init_builder(ctx, bi_before_instr(pinstr));
+   bi_instr *cubeface1 = bi_cubeface1_to(&b, pinstr->dest[0], pinstr->src[0],
+                                         pinstr->src[1], pinstr->src[2]);
 
-        pinstr->op = BI_OPCODE_CUBEFACE2;
-        pinstr->dest[0] = pinstr->dest[1];
-        bi_drop_dests(pinstr, 1);
+   pinstr->op = BI_OPCODE_CUBEFACE2;
+   pinstr->dest[0] = pinstr->dest[1];
+   bi_drop_dests(pinstr, 1);
 
-        pinstr->src[0] = cubeface1->dest[0];
-        bi_drop_srcs(pinstr, 1);
+   pinstr->src[0] = cubeface1->dest[0];
+   bi_drop_srcs(pinstr, 1);
 
-        return cubeface1;
+   return cubeface1;
 }
 
 /* Psuedo arguments are (rbase, address lo, address hi). We need *ATOM_C.i32 to
@@ -347,83 +352,81 @@ bi_lower_cubeface(bi_context *ctx,
  * arguments (rbase, address lo, address hi, rbase) */
 
 static bi_instr *
-bi_lower_atom_c(bi_context *ctx, struct bi_clause_state *clause, struct
-                bi_tuple_state *tuple)
+bi_lower_atom_c(bi_context *ctx, struct bi_clause_state *clause,
+                struct bi_tuple_state *tuple)
 {
-        bi_instr *pinstr = tuple->add;
-        bi_builder b = bi_init_builder(ctx, bi_before_instr(pinstr));
-        bi_instr *atom_c = bi_atom_c_return_i32(&b, 
-                        pinstr->src[1], pinstr->src[2], pinstr->src[0],
-                        pinstr->atom_opc);
+   bi_instr *pinstr = tuple->add;
+   bi_builder b = bi_init_builder(ctx, bi_before_instr(pinstr));
+   bi_instr *atom_c = bi_atom_c_return_i32(&b, pinstr->src[1], pinstr->src[2],
+                                           pinstr->src[0], pinstr->atom_opc);
 
-        if (bi_is_null(pinstr->dest[0]))
-                atom_c->op = BI_OPCODE_ATOM_C_I32;
+   if (bi_is_null(pinstr->dest[0]))
+      atom_c->op = BI_OPCODE_ATOM_C_I32;
 
-        bi_instr *atom_cx = bi_atom_cx_to(&b, pinstr->dest[0], pinstr->src[0],
-                        pinstr->src[1], pinstr->src[2], pinstr->src[0],
-                        pinstr->sr_count);
-        tuple->add = atom_cx;
-        bi_remove_instruction(pinstr);
+   bi_instr *atom_cx =
+      bi_atom_cx_to(&b, pinstr->dest[0], pinstr->src[0], pinstr->src[1],
+                    pinstr->src[2], pinstr->src[0], pinstr->sr_count);
+   tuple->add = atom_cx;
+   bi_remove_instruction(pinstr);
 
-        return atom_c;
+   return atom_c;
 }
 
 static bi_instr *
-bi_lower_atom_c1(bi_context *ctx, struct bi_clause_state *clause, struct
-                bi_tuple_state *tuple)
+bi_lower_atom_c1(bi_context *ctx, struct bi_clause_state *clause,
+                 struct bi_tuple_state *tuple)
 {
-        bi_instr *pinstr = tuple->add;
-        bi_builder b = bi_init_builder(ctx, bi_before_instr(pinstr));
-        bi_instr *atom_c = bi_atom_c1_return_i32(&b,
-                        pinstr->src[0], pinstr->src[1], pinstr->atom_opc);
+   bi_instr *pinstr = tuple->add;
+   bi_builder b = bi_init_builder(ctx, bi_before_instr(pinstr));
+   bi_instr *atom_c = bi_atom_c1_return_i32(&b, pinstr->src[0], pinstr->src[1],
+                                            pinstr->atom_opc);
 
-        if (bi_is_null(pinstr->dest[0]))
-                atom_c->op = BI_OPCODE_ATOM_C1_I32;
+   if (bi_is_null(pinstr->dest[0]))
+      atom_c->op = BI_OPCODE_ATOM_C1_I32;
 
+   bi_instr *atom_cx =
+      bi_atom_cx_to(&b, pinstr->dest[0], bi_null(), pinstr->src[0],
+                    pinstr->src[1], bi_dontcare(&b), pinstr->sr_count);
+   tuple->add = atom_cx;
+   bi_remove_instruction(pinstr);
 
-        bi_instr *atom_cx = bi_atom_cx_to(&b, pinstr->dest[0], bi_null(),
-                        pinstr->src[0], pinstr->src[1], bi_dontcare(&b),
-                        pinstr->sr_count);
-        tuple->add = atom_cx;
-        bi_remove_instruction(pinstr);
-
-        return atom_c;
+   return atom_c;
 }
 
 static bi_instr *
-bi_lower_seg_add(bi_context *ctx,
-                struct bi_clause_state *clause, struct bi_tuple_state *tuple)
+bi_lower_seg_add(bi_context *ctx, struct bi_clause_state *clause,
+                 struct bi_tuple_state *tuple)
 {
-        bi_instr *pinstr = tuple->add;
-        bi_builder b = bi_init_builder(ctx, bi_before_instr(pinstr));
+   bi_instr *pinstr = tuple->add;
+   bi_builder b = bi_init_builder(ctx, bi_before_instr(pinstr));
 
-        bi_instr *fma = bi_seg_add_to(&b, pinstr->dest[0], pinstr->src[0],
-                        pinstr->preserve_null, pinstr->seg);
+   bi_instr *fma = bi_seg_add_to(&b, pinstr->dest[0], pinstr->src[0],
+                                 pinstr->preserve_null, pinstr->seg);
 
-        pinstr->op = BI_OPCODE_SEG_ADD;
-        pinstr->src[0] = pinstr->src[1];
-        bi_drop_srcs(pinstr, 1);
+   pinstr->op = BI_OPCODE_SEG_ADD;
+   pinstr->src[0] = pinstr->src[1];
+   bi_drop_srcs(pinstr, 1);
 
-        assert(pinstr->dest[0].type == BI_INDEX_REGISTER);
-        pinstr->dest[0].value += 1;
+   assert(pinstr->dest[0].type == BI_INDEX_REGISTER);
+   pinstr->dest[0].value += 1;
 
-        return fma;
+   return fma;
 }
 
 static bi_instr *
-bi_lower_dtsel(bi_context *ctx,
-                struct bi_clause_state *clause, struct bi_tuple_state *tuple)
+bi_lower_dtsel(bi_context *ctx, struct bi_clause_state *clause,
+               struct bi_tuple_state *tuple)
 {
-        bi_instr *add = tuple->add;
-        bi_builder b = bi_init_builder(ctx, bi_before_instr(add));
+   bi_instr *add = tuple->add;
+   bi_builder b = bi_init_builder(ctx, bi_before_instr(add));
 
-        bi_instr *dtsel = bi_dtsel_imm_to(&b, bi_temp(b.shader),
-                        add->src[0], add->table);
-        assert(add->nr_srcs >= 1);
-        add->src[0] = dtsel->dest[0];
+   bi_instr *dtsel =
+      bi_dtsel_imm_to(&b, bi_temp(b.shader), add->src[0], add->table);
+   assert(add->nr_srcs >= 1);
+   add->src[0] = dtsel->dest[0];
 
-        assert(bi_supports_dtsel(add));
-        return dtsel;
+   assert(bi_supports_dtsel(add));
+   return dtsel;
 }
 
 /* Flatten linked list to array for O(1) indexing */
@@ -431,18 +434,18 @@ bi_lower_dtsel(bi_context *ctx,
 static bi_instr **
 bi_flatten_block(bi_block *block, unsigned *len)
 {
-        if (list_is_empty(&block->instructions))
-                return NULL;
+   if (list_is_empty(&block->instructions))
+      return NULL;
 
-        *len = list_length(&block->instructions);
-        bi_instr **instructions = malloc(sizeof(bi_instr *) * (*len));
+   *len = list_length(&block->instructions);
+   bi_instr **instructions = malloc(sizeof(bi_instr *) * (*len));
 
-        unsigned i = 0;
+   unsigned i = 0;
 
-        bi_foreach_instr_in_block(block, ins)
-                instructions[i++] = ins;
+   bi_foreach_instr_in_block(block, ins)
+      instructions[i++] = ins;
 
-        return instructions;
+   return instructions;
 }
 
 /* The worklist would track instructions without outstanding dependencies. For
@@ -452,56 +455,56 @@ bi_flatten_block(bi_block *block, unsigned *len)
 static struct bi_worklist
 bi_initialize_worklist(bi_block *block, bool inorder, bool is_blend)
 {
-        struct bi_worklist st = { };
-        st.instructions = bi_flatten_block(block, &st.count);
+   struct bi_worklist st = {};
+   st.instructions = bi_flatten_block(block, &st.count);
 
-        if (!st.count)
-                return st;
+   if (!st.count)
+      return st;
 
-        st.dependents = calloc(st.count, sizeof(st.dependents[0]));
-        st.dep_counts = calloc(st.count, sizeof(st.dep_counts[0]));
+   st.dependents = calloc(st.count, sizeof(st.dependents[0]));
+   st.dep_counts = calloc(st.count, sizeof(st.dep_counts[0]));
 
-        bi_create_dependency_graph(st, inorder, is_blend);
-        st.worklist = calloc(BITSET_WORDS(st.count), sizeof(BITSET_WORD));
+   bi_create_dependency_graph(st, inorder, is_blend);
+   st.worklist = calloc(BITSET_WORDS(st.count), sizeof(BITSET_WORD));
 
-        for (unsigned i = 0; i < st.count; ++i) {
-                if (st.dep_counts[i] == 0)
-                        BITSET_SET(st.worklist, i);
-        }
+   for (unsigned i = 0; i < st.count; ++i) {
+      if (st.dep_counts[i] == 0)
+         BITSET_SET(st.worklist, i);
+   }
 
-        return st;
+   return st;
 }
 
 static void
 bi_free_worklist(struct bi_worklist st)
 {
-        free(st.dep_counts);
-        free(st.dependents);
-        free(st.instructions);
-        free(st.worklist);
+   free(st.dep_counts);
+   free(st.dependents);
+   free(st.instructions);
+   free(st.worklist);
 }
 
 static void
 bi_update_worklist(struct bi_worklist st, unsigned idx)
 {
-        assert(st.dep_counts[idx] == 0);
+   assert(st.dep_counts[idx] == 0);
 
-        if (!st.dependents[idx])
-                return;
+   if (!st.dependents[idx])
+      return;
 
-        /* Iterate each dependent to remove one dependency (`done`),
-         * adding dependents to the worklist where possible. */
+   /* Iterate each dependent to remove one dependency (`done`),
+    * adding dependents to the worklist where possible. */
 
-        unsigned i;
-        BITSET_FOREACH_SET(i, st.dependents[idx], st.count) {
-                assert(st.dep_counts[i] != 0);
-                unsigned new_deps = --st.dep_counts[i];
+   unsigned i;
+   BITSET_FOREACH_SET(i, st.dependents[idx], st.count) {
+      assert(st.dep_counts[i] != 0);
+      unsigned new_deps = --st.dep_counts[i];
 
-                if (new_deps == 0)
-                        BITSET_SET(st.worklist, i);
-        }
+      if (new_deps == 0)
+         BITSET_SET(st.worklist, i);
+   }
 
-        free(st.dependents[idx]);
+   free(st.dependents[idx]);
 }
 
 /* Scheduler predicates */
@@ -510,9 +513,9 @@ bi_update_worklist(struct bi_worklist st, unsigned idx)
 static bool
 bi_can_iaddc(bi_instr *ins)
 {
-        return (ins->op == BI_OPCODE_IADD_U32 && !ins->saturate &&
-                ins->src[0].swizzle == BI_SWIZZLE_H01 &&
-                ins->src[1].swizzle == BI_SWIZZLE_H01);
+   return (ins->op == BI_OPCODE_IADD_U32 && !ins->saturate &&
+           ins->src[0].swizzle == BI_SWIZZLE_H01 &&
+           ins->src[1].swizzle == BI_SWIZZLE_H01);
 }
 
 /*
@@ -523,57 +526,57 @@ bi_can_iaddc(bi_instr *ins)
 static bool
 bi_impacted_abs(bi_instr *I)
 {
-        return I->src[0].abs && I->src[1].abs &&
-               bi_is_word_equiv(I->src[0], I->src[1]);
+   return I->src[0].abs && I->src[1].abs &&
+          bi_is_word_equiv(I->src[0], I->src[1]);
 }
 
 bool
 bi_can_fma(bi_instr *ins)
 {
-        /* +IADD.i32 -> *IADDC.i32 */
-        if (bi_can_iaddc(ins))
-                return true;
+   /* +IADD.i32 -> *IADDC.i32 */
+   if (bi_can_iaddc(ins))
+      return true;
 
-        /* +MUX -> *CSEL */
-        if (bi_can_replace_with_csel(ins))
-                return true;
+   /* +MUX -> *CSEL */
+   if (bi_can_replace_with_csel(ins))
+      return true;
 
-        /* *FADD.v2f16 has restricted abs modifiers, use +FADD.v2f16 instead */
-        if (ins->op == BI_OPCODE_FADD_V2F16 && bi_impacted_abs(ins))
-                return false;
+   /* *FADD.v2f16 has restricted abs modifiers, use +FADD.v2f16 instead */
+   if (ins->op == BI_OPCODE_FADD_V2F16 && bi_impacted_abs(ins))
+      return false;
 
-        /* TODO: some additional fp16 constraints */
-        return bi_opcode_props[ins->op].fma;
+   /* TODO: some additional fp16 constraints */
+   return bi_opcode_props[ins->op].fma;
 }
 
 static bool
 bi_impacted_fadd_widens(bi_instr *I)
 {
-        enum bi_swizzle swz0 = I->src[0].swizzle;
-        enum bi_swizzle swz1 = I->src[1].swizzle;
+   enum bi_swizzle swz0 = I->src[0].swizzle;
+   enum bi_swizzle swz1 = I->src[1].swizzle;
 
-        return (swz0 == BI_SWIZZLE_H00 && swz1 == BI_SWIZZLE_H11) ||
-                (swz0 == BI_SWIZZLE_H11 && swz1 == BI_SWIZZLE_H11) ||
-                (swz0 == BI_SWIZZLE_H11 && swz1 == BI_SWIZZLE_H00);
+   return (swz0 == BI_SWIZZLE_H00 && swz1 == BI_SWIZZLE_H11) ||
+          (swz0 == BI_SWIZZLE_H11 && swz1 == BI_SWIZZLE_H11) ||
+          (swz0 == BI_SWIZZLE_H11 && swz1 == BI_SWIZZLE_H00);
 }
 
 bool
 bi_can_add(bi_instr *ins)
 {
-        /* +FADD.v2f16 lacks clamp modifier, use *FADD.v2f16 instead */
-        if (ins->op == BI_OPCODE_FADD_V2F16 && ins->clamp)
-                return false;
+   /* +FADD.v2f16 lacks clamp modifier, use *FADD.v2f16 instead */
+   if (ins->op == BI_OPCODE_FADD_V2F16 && ins->clamp)
+      return false;
 
-        /* +FCMP.v2f16 lacks abs modifier, use *FCMP.v2f16 instead */
-        if (ins->op == BI_OPCODE_FCMP_V2F16 && (ins->src[0].abs || ins->src[1].abs))
-                return false;
+   /* +FCMP.v2f16 lacks abs modifier, use *FCMP.v2f16 instead */
+   if (ins->op == BI_OPCODE_FCMP_V2F16 && (ins->src[0].abs || ins->src[1].abs))
+      return false;
 
-        /* +FADD.f32 has restricted widens, use +FADD.f32 for the full set */
-        if (ins->op == BI_OPCODE_FADD_F32 && bi_impacted_fadd_widens(ins))
-               return false;
+   /* +FADD.f32 has restricted widens, use +FADD.f32 for the full set */
+   if (ins->op == BI_OPCODE_FADD_F32 && bi_impacted_fadd_widens(ins))
+      return false;
 
-        /* TODO: some additional fp16 constraints */
-        return bi_opcode_props[ins->op].add;
+   /* TODO: some additional fp16 constraints */
+   return bi_opcode_props[ins->op].add;
 }
 
 /* Architecturally, no single instruction has a "not last" constraint. However,
@@ -589,7 +592,7 @@ bi_can_add(bi_instr *ins)
 static bool
 bi_must_not_last(bi_instr *ins)
 {
-        return (ins->nr_dests >= 2) && (ins->op != BI_OPCODE_TEXC_DUAL);
+   return (ins->nr_dests >= 2) && (ins->op != BI_OPCODE_TEXC_DUAL);
 }
 
 /* Check for a message-passing instruction. +DISCARD.f32 is special-cased; we
@@ -601,115 +604,115 @@ bi_must_not_last(bi_instr *ins)
 bool
 bi_must_message(bi_instr *ins)
 {
-        return (bi_opcode_props[ins->op].message != BIFROST_MESSAGE_NONE) ||
-                (ins->op == BI_OPCODE_DISCARD_F32);
+   return (bi_opcode_props[ins->op].message != BIFROST_MESSAGE_NONE) ||
+          (ins->op == BI_OPCODE_DISCARD_F32);
 }
 
 static bool
 bi_fma_atomic(enum bi_opcode op)
 {
-        switch (op) {
-        case BI_OPCODE_ATOM_C_I32:
-        case BI_OPCODE_ATOM_C_I64:
-        case BI_OPCODE_ATOM_C1_I32:
-        case BI_OPCODE_ATOM_C1_I64:
-        case BI_OPCODE_ATOM_C1_RETURN_I32:
-        case BI_OPCODE_ATOM_C1_RETURN_I64:
-        case BI_OPCODE_ATOM_C_RETURN_I32:
-        case BI_OPCODE_ATOM_C_RETURN_I64:
-        case BI_OPCODE_ATOM_POST_I32:
-        case BI_OPCODE_ATOM_POST_I64:
-        case BI_OPCODE_ATOM_PRE_I64:
-                return true;
-        default:
-                return false;
-        }
+   switch (op) {
+   case BI_OPCODE_ATOM_C_I32:
+   case BI_OPCODE_ATOM_C_I64:
+   case BI_OPCODE_ATOM_C1_I32:
+   case BI_OPCODE_ATOM_C1_I64:
+   case BI_OPCODE_ATOM_C1_RETURN_I32:
+   case BI_OPCODE_ATOM_C1_RETURN_I64:
+   case BI_OPCODE_ATOM_C_RETURN_I32:
+   case BI_OPCODE_ATOM_C_RETURN_I64:
+   case BI_OPCODE_ATOM_POST_I32:
+   case BI_OPCODE_ATOM_POST_I64:
+   case BI_OPCODE_ATOM_PRE_I64:
+      return true;
+   default:
+      return false;
+   }
 }
 
 bool
 bi_reads_zero(bi_instr *ins)
 {
-        return !(bi_fma_atomic(ins->op) || ins->op == BI_OPCODE_IMULD);
+   return !(bi_fma_atomic(ins->op) || ins->op == BI_OPCODE_IMULD);
 }
 
 bool
 bi_reads_temps(bi_instr *ins, unsigned src)
 {
-        switch (ins->op) {
-        /* Cannot permute a temporary */
-        case BI_OPCODE_CLPER_I32:
-        case BI_OPCODE_CLPER_OLD_I32:
-                return src != 0;
+   switch (ins->op) {
+   /* Cannot permute a temporary */
+   case BI_OPCODE_CLPER_I32:
+   case BI_OPCODE_CLPER_OLD_I32:
+      return src != 0;
 
-        /* ATEST isn't supposed to be restricted, but in practice it always
-         * wants to source its coverage mask input (source 0) from register 60,
-         * which won't work properly if we put the input in a temp. This
-         * requires workarounds in both RA and clause scheduling.
-         */
-        case BI_OPCODE_ATEST:
-                return src != 0;
+   /* ATEST isn't supposed to be restricted, but in practice it always
+    * wants to source its coverage mask input (source 0) from register 60,
+    * which won't work properly if we put the input in a temp. This
+    * requires workarounds in both RA and clause scheduling.
+    */
+   case BI_OPCODE_ATEST:
+      return src != 0;
 
-        case BI_OPCODE_IMULD:
-                return false;
-        default:
-                return true;
-        }
+   case BI_OPCODE_IMULD:
+      return false;
+   default:
+      return true;
+   }
 }
 
 static bool
 bi_impacted_t_modifiers(bi_instr *I, unsigned src)
 {
-        assert(src < I->nr_srcs);
-        enum bi_swizzle swizzle = I->src[src].swizzle;
+   assert(src < I->nr_srcs);
+   enum bi_swizzle swizzle = I->src[src].swizzle;
 
-        switch (I->op) {
-        case BI_OPCODE_F16_TO_F32:
-        case BI_OPCODE_F16_TO_S32:
-        case BI_OPCODE_F16_TO_U32:
-        case BI_OPCODE_MKVEC_V2I16:
-        case BI_OPCODE_S16_TO_F32:
-        case BI_OPCODE_S16_TO_S32:
-        case BI_OPCODE_U16_TO_F32:
-        case BI_OPCODE_U16_TO_U32:
-                return (swizzle != BI_SWIZZLE_H00);
+   switch (I->op) {
+   case BI_OPCODE_F16_TO_F32:
+   case BI_OPCODE_F16_TO_S32:
+   case BI_OPCODE_F16_TO_U32:
+   case BI_OPCODE_MKVEC_V2I16:
+   case BI_OPCODE_S16_TO_F32:
+   case BI_OPCODE_S16_TO_S32:
+   case BI_OPCODE_U16_TO_F32:
+   case BI_OPCODE_U16_TO_U32:
+      return (swizzle != BI_SWIZZLE_H00);
 
-        case BI_OPCODE_BRANCH_F32:
-        case BI_OPCODE_LOGB_F32:
-        case BI_OPCODE_ILOGB_F32:
-        case BI_OPCODE_FADD_F32:
-        case BI_OPCODE_FCMP_F32:
-        case BI_OPCODE_FREXPE_F32:
-        case BI_OPCODE_FREXPM_F32:
-        case BI_OPCODE_FROUND_F32:
-                return (swizzle != BI_SWIZZLE_H01);
+   case BI_OPCODE_BRANCH_F32:
+   case BI_OPCODE_LOGB_F32:
+   case BI_OPCODE_ILOGB_F32:
+   case BI_OPCODE_FADD_F32:
+   case BI_OPCODE_FCMP_F32:
+   case BI_OPCODE_FREXPE_F32:
+   case BI_OPCODE_FREXPM_F32:
+   case BI_OPCODE_FROUND_F32:
+      return (swizzle != BI_SWIZZLE_H01);
 
-        case BI_OPCODE_IADD_S32:
-        case BI_OPCODE_IADD_U32:
-        case BI_OPCODE_ISUB_S32:
-        case BI_OPCODE_ISUB_U32:
-        case BI_OPCODE_IADD_V4S8:
-        case BI_OPCODE_IADD_V4U8:
-        case BI_OPCODE_ISUB_V4S8:
-        case BI_OPCODE_ISUB_V4U8:
-                return (src == 1) && (swizzle != BI_SWIZZLE_H01);
+   case BI_OPCODE_IADD_S32:
+   case BI_OPCODE_IADD_U32:
+   case BI_OPCODE_ISUB_S32:
+   case BI_OPCODE_ISUB_U32:
+   case BI_OPCODE_IADD_V4S8:
+   case BI_OPCODE_IADD_V4U8:
+   case BI_OPCODE_ISUB_V4S8:
+   case BI_OPCODE_ISUB_V4U8:
+      return (src == 1) && (swizzle != BI_SWIZZLE_H01);
 
-        case BI_OPCODE_S8_TO_F32:
-        case BI_OPCODE_S8_TO_S32:
-        case BI_OPCODE_U8_TO_F32:
-        case BI_OPCODE_U8_TO_U32:
-                return (swizzle != BI_SWIZZLE_B0000);
+   case BI_OPCODE_S8_TO_F32:
+   case BI_OPCODE_S8_TO_S32:
+   case BI_OPCODE_U8_TO_F32:
+   case BI_OPCODE_U8_TO_U32:
+      return (swizzle != BI_SWIZZLE_B0000);
 
-        case BI_OPCODE_V2S8_TO_V2F16:
-        case BI_OPCODE_V2S8_TO_V2S16:
-        case BI_OPCODE_V2U8_TO_V2F16:
-        case BI_OPCODE_V2U8_TO_V2U16:
-                return (swizzle != BI_SWIZZLE_B0022);
+   case BI_OPCODE_V2S8_TO_V2F16:
+   case BI_OPCODE_V2S8_TO_V2S16:
+   case BI_OPCODE_V2U8_TO_V2F16:
+   case BI_OPCODE_V2U8_TO_V2U16:
+      return (swizzle != BI_SWIZZLE_B0022);
 
-        case BI_OPCODE_IADD_V2S16:
-        case BI_OPCODE_IADD_V2U16:
-        case BI_OPCODE_ISUB_V2S16:
-        case BI_OPCODE_ISUB_V2U16:
-                return (src == 1) && (swizzle >= BI_SWIZZLE_H11);
+   case BI_OPCODE_IADD_V2S16:
+   case BI_OPCODE_IADD_V2U16:
+   case BI_OPCODE_ISUB_V2S16:
+   case BI_OPCODE_ISUB_V2U16:
+      return (src == 1) && (swizzle >= BI_SWIZZLE_H11);
 
 #if 0
         /* Restriction on IADD in 64-bit clauses on G72 */
@@ -718,52 +721,52 @@ bi_impacted_t_modifiers(bi_instr *I, unsigned src)
                 return (src == 1) && (swizzle != BI_SWIZZLE_D0);
 #endif
 
-        default:
-                return false;
-        }
+   default:
+      return false;
+   }
 }
 
 bool
 bi_reads_t(bi_instr *ins, unsigned src)
 {
-        /* Branch offset cannot come from passthrough */
-        if (bi_opcode_props[ins->op].branch)
-                return src != 2;
+   /* Branch offset cannot come from passthrough */
+   if (bi_opcode_props[ins->op].branch)
+      return src != 2;
 
-        /* Table can never read passthrough */
-        if (bi_opcode_props[ins->op].table)
-                return false;
+   /* Table can never read passthrough */
+   if (bi_opcode_props[ins->op].table)
+      return false;
 
-        /* Staging register reads may happen before the succeeding register
-         * block encodes a write, so effectively there is no passthrough */
-        if (bi_is_staging_src(ins, src))
-                return false;
+   /* Staging register reads may happen before the succeeding register
+    * block encodes a write, so effectively there is no passthrough */
+   if (bi_is_staging_src(ins, src))
+      return false;
 
-        /* Bifrost cores newer than Mali G71 have restrictions on swizzles on
-         * same-cycle temporaries. Check the list for these hazards. */
-        if (bi_impacted_t_modifiers(ins, src))
-                return false;
+   /* Bifrost cores newer than Mali G71 have restrictions on swizzles on
+    * same-cycle temporaries. Check the list for these hazards. */
+   if (bi_impacted_t_modifiers(ins, src))
+      return false;
 
-        /* Descriptor must not come from a passthrough */
-        switch (ins->op) {
-        case BI_OPCODE_LD_CVT:
-        case BI_OPCODE_LD_TILE:
-        case BI_OPCODE_ST_CVT:
-        case BI_OPCODE_ST_TILE:
-        case BI_OPCODE_TEXC:
-        case BI_OPCODE_TEXC_DUAL:
-                return src != 2;
-        case BI_OPCODE_BLEND:
-                return src != 2 && src != 3;
+   /* Descriptor must not come from a passthrough */
+   switch (ins->op) {
+   case BI_OPCODE_LD_CVT:
+   case BI_OPCODE_LD_TILE:
+   case BI_OPCODE_ST_CVT:
+   case BI_OPCODE_ST_TILE:
+   case BI_OPCODE_TEXC:
+   case BI_OPCODE_TEXC_DUAL:
+      return src != 2;
+   case BI_OPCODE_BLEND:
+      return src != 2 && src != 3;
 
-        /* +JUMP can't read the offset from T */
-        case BI_OPCODE_JUMP:
-                return false;
+   /* +JUMP can't read the offset from T */
+   case BI_OPCODE_JUMP:
+      return false;
 
-        /* Else, just check if we can read any temps */
-        default:
-                return bi_reads_temps(ins, src);
-        }
+   /* Else, just check if we can read any temps */
+   default:
+      return bi_reads_temps(ins, src);
+   }
 }
 
 /* Counts the number of 64-bit constants required by a clause. TODO: We
@@ -773,12 +776,12 @@ bi_reads_t(bi_instr *ins, unsigned src)
 static unsigned
 bi_nconstants(struct bi_clause_state *clause)
 {
-        unsigned count_32 = 0;
+   unsigned count_32 = 0;
 
-        for (unsigned i = 0; i < ARRAY_SIZE(clause->consts); ++i)
-                count_32 += clause->consts[i].constant_count;
+   for (unsigned i = 0; i < ARRAY_SIZE(clause->consts); ++i)
+      count_32 += clause->consts[i].constant_count;
 
-        return DIV_ROUND_UP(count_32, 2);
+   return DIV_ROUND_UP(count_32, 2);
 }
 
 /* Would there be space for constants if we added one tuple? */
@@ -786,7 +789,7 @@ bi_nconstants(struct bi_clause_state *clause)
 static bool
 bi_space_for_more_constants(struct bi_clause_state *clause)
 {
-        return (bi_nconstants(clause) < 13 - (clause->tuple_count + 1));
+   return (bi_nconstants(clause) < 13 - (clause->tuple_count + 1));
 }
 
 /* Updates the FAU assignment for a tuple. A valid FAU assignment must be
@@ -795,85 +798,83 @@ bi_space_for_more_constants(struct bi_clause_state *clause)
  * bi_instr_schedulable */
 
 static bool
-bi_update_fau(struct bi_clause_state *clause,
-                struct bi_tuple_state *tuple,
-                bi_instr *instr, bool fma, bool destructive)
+bi_update_fau(struct bi_clause_state *clause, struct bi_tuple_state *tuple,
+              bi_instr *instr, bool fma, bool destructive)
 {
-        /* Maintain our own constants, for nondestructive mode */
-        uint32_t copied_constants[2], copied_count;
-        unsigned *constant_count = &tuple->constant_count;
-        uint32_t *constants = tuple->constants;
-        enum bir_fau fau = tuple->fau;
+   /* Maintain our own constants, for nondestructive mode */
+   uint32_t copied_constants[2], copied_count;
+   unsigned *constant_count = &tuple->constant_count;
+   uint32_t *constants = tuple->constants;
+   enum bir_fau fau = tuple->fau;
 
-        if (!destructive) {
-                memcpy(copied_constants, tuple->constants,
-                                (*constant_count) * sizeof(constants[0]));
-                copied_count = tuple->constant_count;
+   if (!destructive) {
+      memcpy(copied_constants, tuple->constants,
+             (*constant_count) * sizeof(constants[0]));
+      copied_count = tuple->constant_count;
 
-                constant_count = &copied_count;
-                constants = copied_constants;
-        }
+      constant_count = &copied_count;
+      constants = copied_constants;
+   }
 
-        bi_foreach_src(instr, s) {
-                bi_index src = instr->src[s];
+   bi_foreach_src(instr, s) {
+      bi_index src = instr->src[s];
 
-                if (src.type == BI_INDEX_FAU) {
-                        bool no_constants = *constant_count == 0;
-                        bool no_other_fau = (fau == src.value) || !fau;
-                        bool mergable = no_constants && no_other_fau;
+      if (src.type == BI_INDEX_FAU) {
+         bool no_constants = *constant_count == 0;
+         bool no_other_fau = (fau == src.value) || !fau;
+         bool mergable = no_constants && no_other_fau;
 
-                        if (destructive) {
-                                assert(mergable);
-                                tuple->fau = src.value;
-                        } else if (!mergable) {
-                                return false;
-                        }
+         if (destructive) {
+            assert(mergable);
+            tuple->fau = src.value;
+         } else if (!mergable) {
+            return false;
+         }
 
-                        fau = src.value;
-                } else if (src.type == BI_INDEX_CONSTANT) {
-                        /* No need to reserve space if we have a fast 0 */
-                        if (src.value == 0 && fma && bi_reads_zero(instr))
-                                continue;
+         fau = src.value;
+      } else if (src.type == BI_INDEX_CONSTANT) {
+         /* No need to reserve space if we have a fast 0 */
+         if (src.value == 0 && fma && bi_reads_zero(instr))
+            continue;
 
-                        /* If there is a branch target, #0 by convention is the
-                         * PC-relative offset to the target */
-                        bool pcrel = instr->branch_target && src.value == 0;
-                        bool found = false;
+         /* If there is a branch target, #0 by convention is the
+          * PC-relative offset to the target */
+         bool pcrel = instr->branch_target && src.value == 0;
+         bool found = false;
 
-                        for (unsigned i = 0; i < *constant_count; ++i) {
-                                found |= (constants[i] == src.value) &&
-                                        (i != tuple->pcrel_idx);
-                        }
+         for (unsigned i = 0; i < *constant_count; ++i) {
+            found |= (constants[i] == src.value) && (i != tuple->pcrel_idx);
+         }
 
-                        /* pcrel constants are unique, so don't match */
-                        if (found && !pcrel)
-                                continue;
+         /* pcrel constants are unique, so don't match */
+         if (found && !pcrel)
+            continue;
 
-                        bool no_fau = (*constant_count > 0) || !fau;
-                        bool mergable = no_fau && ((*constant_count) < 2);
+         bool no_fau = (*constant_count > 0) || !fau;
+         bool mergable = no_fau && ((*constant_count) < 2);
 
-                        if (destructive) {
-                                assert(mergable);
+         if (destructive) {
+            assert(mergable);
 
-                                if (pcrel)
-                                        tuple->pcrel_idx = *constant_count;
-                        } else if (!mergable)
-                                return false;
+            if (pcrel)
+               tuple->pcrel_idx = *constant_count;
+         } else if (!mergable)
+            return false;
 
-                        constants[(*constant_count)++] = src.value;
-                }
-        }
+         constants[(*constant_count)++] = src.value;
+      }
+   }
 
-        /* Constants per clause may be limited by tuple count */
-        bool room_for_constants = (*constant_count == 0) ||
-                bi_space_for_more_constants(clause);
+   /* Constants per clause may be limited by tuple count */
+   bool room_for_constants =
+      (*constant_count == 0) || bi_space_for_more_constants(clause);
 
-        if (destructive)
-                assert(room_for_constants);
-        else if (!room_for_constants)
-                return false;
+   if (destructive)
+      assert(room_for_constants);
+   else if (!room_for_constants)
+      return false;
 
-        return true;
+   return true;
 }
 
 /* Given an in-progress tuple, a candidate new instruction to add to the tuple,
@@ -886,28 +887,28 @@ bi_update_fau(struct bi_clause_state *clause,
 static bool
 bi_tuple_is_new_src(bi_instr *instr, struct bi_reg_state *reg, unsigned src_idx)
 {
-        assert(src_idx < instr->nr_srcs);
-        bi_index src = instr->src[src_idx];
+   assert(src_idx < instr->nr_srcs);
+   bi_index src = instr->src[src_idx];
 
-        /* Only consider sources which come from the register file */
-        if (!(src.type == BI_INDEX_NORMAL || src.type == BI_INDEX_REGISTER))
-                return false;
+   /* Only consider sources which come from the register file */
+   if (!(src.type == BI_INDEX_NORMAL || src.type == BI_INDEX_REGISTER))
+      return false;
 
-        /* Staging register reads bypass the usual register file mechanism */
-        if (bi_is_staging_src(instr, src_idx))
-                return false;
+   /* Staging register reads bypass the usual register file mechanism */
+   if (bi_is_staging_src(instr, src_idx))
+      return false;
 
-        /* If a source is already read in the tuple, it is already counted */
-        for (unsigned t = 0; t < reg->nr_reads; ++t)
-                if (bi_is_word_equiv(src, reg->reads[t]))
-                        return false;
+   /* If a source is already read in the tuple, it is already counted */
+   for (unsigned t = 0; t < reg->nr_reads; ++t)
+      if (bi_is_word_equiv(src, reg->reads[t]))
+         return false;
 
-        /* If a source is read in _this instruction_, it is already counted */
-        for (unsigned t = 0; t < src_idx; ++t)
-                if (bi_is_word_equiv(src, instr->src[t]))
-                        return false;
+   /* If a source is read in _this instruction_, it is already counted */
+   for (unsigned t = 0; t < src_idx; ++t)
+      if (bi_is_word_equiv(src, instr->src[t]))
+         return false;
 
-        return true;
+   return true;
 }
 
 /* Given two tuples in source order, count the number of register reads of the
@@ -916,31 +917,31 @@ bi_tuple_is_new_src(bi_instr *instr, struct bi_reg_state *reg, unsigned src_idx)
  */
 
 static unsigned
-bi_count_succ_reads(bi_index t0, bi_index t1,
-                bi_index *succ_reads, unsigned nr_succ_reads)
+bi_count_succ_reads(bi_index t0, bi_index t1, bi_index *succ_reads,
+                    unsigned nr_succ_reads)
 {
-        unsigned reads = 0;
+   unsigned reads = 0;
 
-        for (unsigned i = 0; i < nr_succ_reads; ++i) {
-                bool unique = true;
+   for (unsigned i = 0; i < nr_succ_reads; ++i) {
+      bool unique = true;
 
-                for (unsigned j = 0; j < i; ++j)
-                        if (bi_is_word_equiv(succ_reads[i], succ_reads[j]))
-                                unique = false;
+      for (unsigned j = 0; j < i; ++j)
+         if (bi_is_word_equiv(succ_reads[i], succ_reads[j]))
+            unique = false;
 
-                if (!unique)
-                        continue;
+      if (!unique)
+         continue;
 
-                if (bi_is_word_equiv(succ_reads[i], t0))
-                        continue;
+      if (bi_is_word_equiv(succ_reads[i], t0))
+         continue;
 
-                if (bi_is_word_equiv(succ_reads[i], t1))
-                        continue;
+      if (bi_is_word_equiv(succ_reads[i], t1))
+         continue;
 
-                reads++;
-        }
+      reads++;
+   }
 
-        return reads;
+   return reads;
 }
 
 /* Not all instructions can read from the staging passthrough (as determined by
@@ -951,23 +952,23 @@ bi_count_succ_reads(bi_index t0, bi_index t1,
 static bool
 bi_has_staging_passthrough_hazard(bi_index fma, bi_instr *add)
 {
-        bi_foreach_src(add, s) {
-                bi_index src = add->src[s];
+   bi_foreach_src(add, s) {
+      bi_index src = add->src[s];
 
-                if (src.type != BI_INDEX_REGISTER)
-                        continue;
+      if (src.type != BI_INDEX_REGISTER)
+         continue;
 
-                unsigned count = bi_count_read_registers(add, s);
-                bool read = false;
+      unsigned count = bi_count_read_registers(add, s);
+      bool read = false;
 
-                for (unsigned d = 0; d < count; ++d)
-                        read |= bi_is_equiv(fma, bi_register(src.value + d));
+      for (unsigned d = 0; d < count; ++d)
+         read |= bi_is_equiv(fma, bi_register(src.value + d));
 
-                if (read && !bi_reads_t(add, s))
-                        return true;
-        }
+      if (read && !bi_reads_t(add, s))
+         return true;
+   }
 
-        return false;
+   return false;
 }
 
 /* Likewise for cross-tuple passthrough (reads_temps) */
@@ -975,18 +976,18 @@ bi_has_staging_passthrough_hazard(bi_index fma, bi_instr *add)
 static bool
 bi_has_cross_passthrough_hazard(bi_tuple *succ, bi_instr *ins)
 {
-        if (ins->nr_dests == 0)
-                return false;
+   if (ins->nr_dests == 0)
+      return false;
 
-        bi_foreach_instr_in_tuple(succ, pins) {
-                bi_foreach_src(pins, s) {
-                        if (bi_is_word_equiv(ins->dest[0], pins->src[s]) &&
-                                        !bi_reads_temps(pins, s))
-                                return true;
-                }
-        }
+   bi_foreach_instr_in_tuple(succ, pins) {
+      bi_foreach_src(pins, s) {
+         if (bi_is_word_equiv(ins->dest[0], pins->src[s]) &&
+             !bi_reads_temps(pins, s))
+            return true;
+      }
+   }
 
-        return false;
+   return false;
 }
 
 /* Is a register written other than the staging mechanism? ATEST is special,
@@ -998,21 +999,21 @@ bi_has_cross_passthrough_hazard(bi_tuple *succ, bi_instr *ins)
 static unsigned
 bi_write_count(bi_instr *instr, uint64_t live_after_temp)
 {
-        if (instr->op == BI_OPCODE_ATEST || instr->op == BI_OPCODE_BLEND)
-                return 1;
+   if (instr->op == BI_OPCODE_ATEST || instr->op == BI_OPCODE_BLEND)
+      return 1;
 
-        unsigned count = 0;
+   unsigned count = 0;
 
-        bi_foreach_dest(instr, d) {
-                if (d == 0 && bi_opcode_props[instr->op].sr_write)
-                        continue;
+   bi_foreach_dest(instr, d) {
+      if (d == 0 && bi_opcode_props[instr->op].sr_write)
+         continue;
 
-                assert(instr->dest[0].type == BI_INDEX_REGISTER);
-                if (live_after_temp & BITFIELD64_BIT(instr->dest[0].value))
-                        count++;
-        }
+      assert(instr->dest[0].type == BI_INDEX_REGISTER);
+      if (live_after_temp & BITFIELD64_BIT(instr->dest[0].value))
+         count++;
+   }
 
-        return count;
+   return count;
 }
 
 /*
@@ -1022,8 +1023,9 @@ bi_write_count(bi_instr *instr, uint64_t live_after_temp)
 static bool
 bi_needs_ftz(bi_instr *I)
 {
-        return (I->op == BI_OPCODE_F16_TO_F32 ||
-                I->op == BI_OPCODE_V2F32_TO_V2F16) && I->ftz;
+   return (I->op == BI_OPCODE_F16_TO_F32 ||
+           I->op == BI_OPCODE_V2F32_TO_V2F16) &&
+          I->ftz;
 }
 
 /*
@@ -1033,8 +1035,8 @@ bi_needs_ftz(bi_instr *I)
 static bool
 bi_numerically_incompatible(struct bi_clause_state *clause, bi_instr *instr)
 {
-        return (clause->ftz != BI_FTZ_STATE_NONE) &&
-               ((clause->ftz == BI_FTZ_STATE_ENABLE) != bi_needs_ftz(instr));
+   return (clause->ftz != BI_FTZ_STATE_NONE) &&
+          ((clause->ftz == BI_FTZ_STATE_ENABLE) != bi_needs_ftz(instr));
 }
 
 /* Instruction placement entails two questions: what subset of instructions in
@@ -1045,209 +1047,208 @@ bi_numerically_incompatible(struct bi_clause_state *clause, bi_instr *instr)
  * whitepaper. The cost function is a heuristic. */
 
 static bool
-bi_instr_schedulable(bi_instr *instr,
-                struct bi_clause_state *clause,
-                struct bi_tuple_state *tuple,
-                uint64_t live_after_temp,
-                bool fma)
+bi_instr_schedulable(bi_instr *instr, struct bi_clause_state *clause,
+                     struct bi_tuple_state *tuple, uint64_t live_after_temp,
+                     bool fma)
 {
-        /* The units must match */
-        if ((fma && !bi_can_fma(instr)) || (!fma && !bi_can_add(instr)))
-                return false;
+   /* The units must match */
+   if ((fma && !bi_can_fma(instr)) || (!fma && !bi_can_add(instr)))
+      return false;
 
-        /* There can only be one message-passing instruction per clause */
-        if (bi_must_message(instr) && clause->message)
-                return false;
+   /* There can only be one message-passing instruction per clause */
+   if (bi_must_message(instr) && clause->message)
+      return false;
 
-        /* Some instructions have placement requirements */
-        if (bi_opcode_props[instr->op].last && !tuple->last)
-                return false;
+   /* Some instructions have placement requirements */
+   if (bi_opcode_props[instr->op].last && !tuple->last)
+      return false;
 
-        if (bi_must_not_last(instr) && tuple->last)
-                return false;
+   if (bi_must_not_last(instr) && tuple->last)
+      return false;
 
-        /* Numerical properties must be compatible with the clause */
-        if (bi_numerically_incompatible(clause, instr))
-                return false;
+   /* Numerical properties must be compatible with the clause */
+   if (bi_numerically_incompatible(clause, instr))
+      return false;
 
-        /* Message-passing instructions are not guaranteed write within the
-         * same clause (most likely they will not), so if a later instruction
-         * in the clause accesses the destination, the message-passing
-         * instruction can't be scheduled */
-        if (bi_opcode_props[instr->op].sr_write) {
-                bi_foreach_dest(instr, d) {
-                        unsigned nr = bi_count_write_registers(instr, d);
-                        assert(instr->dest[d].type == BI_INDEX_REGISTER);
-                        unsigned reg = instr->dest[d].value;
+   /* Message-passing instructions are not guaranteed write within the
+    * same clause (most likely they will not), so if a later instruction
+    * in the clause accesses the destination, the message-passing
+    * instruction can't be scheduled */
+   if (bi_opcode_props[instr->op].sr_write) {
+      bi_foreach_dest(instr, d) {
+         unsigned nr = bi_count_write_registers(instr, d);
+         assert(instr->dest[d].type == BI_INDEX_REGISTER);
+         unsigned reg = instr->dest[d].value;
 
-                        for (unsigned i = 0; i < clause->access_count; ++i) {
-                                bi_index idx = clause->accesses[i];
-                                for (unsigned d = 0; d < nr; ++d) {
-                                        if (bi_is_equiv(bi_register(reg + d), idx))
-                                                return false;
-                                }
-                        }
-                }
-        }
+         for (unsigned i = 0; i < clause->access_count; ++i) {
+            bi_index idx = clause->accesses[i];
+            for (unsigned d = 0; d < nr; ++d) {
+               if (bi_is_equiv(bi_register(reg + d), idx))
+                  return false;
+            }
+         }
+      }
+   }
 
-        if (bi_opcode_props[instr->op].sr_read && !bi_is_null(instr->src[0])) {
-                unsigned nr = bi_count_read_registers(instr, 0);
-                assert(instr->src[0].type == BI_INDEX_REGISTER);
-                unsigned reg = instr->src[0].value;
+   if (bi_opcode_props[instr->op].sr_read && !bi_is_null(instr->src[0])) {
+      unsigned nr = bi_count_read_registers(instr, 0);
+      assert(instr->src[0].type == BI_INDEX_REGISTER);
+      unsigned reg = instr->src[0].value;
 
-                for (unsigned i = 0; i < clause->access_count; ++i) {
-                        bi_index idx = clause->accesses[i];
-                        for (unsigned d = 0; d < nr; ++d) {
-                                if (bi_is_equiv(bi_register(reg + d), idx))
-                                        return false;
-                        }
-                }
-        }
+      for (unsigned i = 0; i < clause->access_count; ++i) {
+         bi_index idx = clause->accesses[i];
+         for (unsigned d = 0; d < nr; ++d) {
+            if (bi_is_equiv(bi_register(reg + d), idx))
+               return false;
+         }
+      }
+   }
 
-        /* If FAU is already assigned, we may not disrupt that. Do a
-         * non-disruptive test update */
-        if (!bi_update_fau(clause, tuple, instr, fma, false))
-                return false;
+   /* If FAU is already assigned, we may not disrupt that. Do a
+    * non-disruptive test update */
+   if (!bi_update_fau(clause, tuple, instr, fma, false))
+      return false;
 
-        /* If this choice of FMA would force a staging passthrough, the ADD
-         * instruction must support such a passthrough */
-        if (tuple->add && instr->nr_dests && bi_has_staging_passthrough_hazard(instr->dest[0], tuple->add))
-                return false;
+   /* If this choice of FMA would force a staging passthrough, the ADD
+    * instruction must support such a passthrough */
+   if (tuple->add && instr->nr_dests &&
+       bi_has_staging_passthrough_hazard(instr->dest[0], tuple->add))
+      return false;
 
-        /* If this choice of destination would force a cross-tuple passthrough, the next tuple must support that */
-        if (tuple->prev && bi_has_cross_passthrough_hazard(tuple->prev, instr))
-                return false;
+   /* If this choice of destination would force a cross-tuple passthrough, the
+    * next tuple must support that */
+   if (tuple->prev && bi_has_cross_passthrough_hazard(tuple->prev, instr))
+      return false;
 
-        /* Register file writes are limited */
-        unsigned total_writes = tuple->reg.nr_writes;
-        total_writes += bi_write_count(instr, live_after_temp);
+   /* Register file writes are limited */
+   unsigned total_writes = tuple->reg.nr_writes;
+   total_writes += bi_write_count(instr, live_after_temp);
 
-        /* Last tuple in a clause can only write a single value */
-        if (tuple->last && total_writes > 1)
-                return false;
+   /* Last tuple in a clause can only write a single value */
+   if (tuple->last && total_writes > 1)
+      return false;
 
-        /* Register file reads are limited, so count unique */
+   /* Register file reads are limited, so count unique */
 
-        unsigned unique_new_srcs = 0;
+   unsigned unique_new_srcs = 0;
 
-        bi_foreach_src(instr, s) {
-                if (bi_tuple_is_new_src(instr, &tuple->reg, s))
-                        unique_new_srcs++;
-        }
+   bi_foreach_src(instr, s) {
+      if (bi_tuple_is_new_src(instr, &tuple->reg, s))
+         unique_new_srcs++;
+   }
 
-        unsigned total_srcs = tuple->reg.nr_reads + unique_new_srcs;
+   unsigned total_srcs = tuple->reg.nr_reads + unique_new_srcs;
 
-        bool can_spill_to_moves = (!tuple->add);
-        can_spill_to_moves &= (bi_nconstants(clause) < 13 - (clause->tuple_count + 2));
-        can_spill_to_moves &= (clause->tuple_count < 7);
+   bool can_spill_to_moves = (!tuple->add);
+   can_spill_to_moves &=
+      (bi_nconstants(clause) < 13 - (clause->tuple_count + 2));
+   can_spill_to_moves &= (clause->tuple_count < 7);
 
-        /* However, we can get an extra 1 or 2 sources by inserting moves */
-        if (total_srcs > (can_spill_to_moves ? 4 : 3))
-                return false;
+   /* However, we can get an extra 1 or 2 sources by inserting moves */
+   if (total_srcs > (can_spill_to_moves ? 4 : 3))
+      return false;
 
-        /* Count effective reads for the successor */
-        unsigned succ_reads = 0;
+   /* Count effective reads for the successor */
+   unsigned succ_reads = 0;
 
-        if (instr->nr_dests) {
-                bool has_t1 = tuple->add && tuple->add->nr_dests;
-                succ_reads = bi_count_succ_reads(instr->dest[0],
-                                                 has_t1 ? tuple->add->dest[0] : bi_null(),
-                                                 tuple->prev_reads,
-                                                 tuple->nr_prev_reads);
-        }
+   if (instr->nr_dests) {
+      bool has_t1 = tuple->add && tuple->add->nr_dests;
+      succ_reads = bi_count_succ_reads(instr->dest[0],
+                                       has_t1 ? tuple->add->dest[0] : bi_null(),
+                                       tuple->prev_reads, tuple->nr_prev_reads);
+   }
 
-        /* Successor must satisfy R+W <= 4, so we require W <= 4-R */
-        if ((signed) total_writes > (4 - (signed) succ_reads))
-                return false;
+   /* Successor must satisfy R+W <= 4, so we require W <= 4-R */
+   if ((signed)total_writes > (4 - (signed)succ_reads))
+      return false;
 
-        return true;
+   return true;
 }
 
 static signed
 bi_instr_cost(bi_instr *instr, struct bi_tuple_state *tuple)
 {
-        signed cost = 0;
+   signed cost = 0;
 
-        /* Instructions that can schedule to either FMA or to ADD should be
-         * deprioritized since they're easier to reschedule elsewhere */
-        if (bi_can_fma(instr) && bi_can_add(instr))
-                cost++;
+   /* Instructions that can schedule to either FMA or to ADD should be
+    * deprioritized since they're easier to reschedule elsewhere */
+   if (bi_can_fma(instr) && bi_can_add(instr))
+      cost++;
 
-        /* Message-passing instructions impose constraints on the registers
-         * later in the clause, so schedule them as late within a clause as
-         * possible (<==> prioritize them since we're backwards <==> decrease
-         * cost) */
-        if (bi_must_message(instr))
-                cost--;
+   /* Message-passing instructions impose constraints on the registers
+    * later in the clause, so schedule them as late within a clause as
+    * possible (<==> prioritize them since we're backwards <==> decrease
+    * cost) */
+   if (bi_must_message(instr))
+      cost--;
 
-        /* Last instructions are big constraints (XXX: no effect on shader-db) */
-        if (bi_opcode_props[instr->op].last)
-                cost -= 2;
+   /* Last instructions are big constraints (XXX: no effect on shader-db) */
+   if (bi_opcode_props[instr->op].last)
+      cost -= 2;
 
-        return cost;
+   return cost;
 }
 
 static unsigned
-bi_choose_index(struct bi_worklist st,
-                struct bi_clause_state *clause,
-                struct bi_tuple_state *tuple,
-                uint64_t live_after_temp,
+bi_choose_index(struct bi_worklist st, struct bi_clause_state *clause,
+                struct bi_tuple_state *tuple, uint64_t live_after_temp,
                 bool fma)
 {
-        unsigned i, best_idx = ~0;
-        signed best_cost = INT_MAX;
+   unsigned i, best_idx = ~0;
+   signed best_cost = INT_MAX;
 
-        BITSET_FOREACH_SET(i, st.worklist, st.count) {
-                bi_instr *instr = st.instructions[i];
+   BITSET_FOREACH_SET(i, st.worklist, st.count) {
+      bi_instr *instr = st.instructions[i];
 
-                if (!bi_instr_schedulable(instr, clause, tuple, live_after_temp, fma))
-                        continue;
+      if (!bi_instr_schedulable(instr, clause, tuple, live_after_temp, fma))
+         continue;
 
-                signed cost = bi_instr_cost(instr, tuple);
+      signed cost = bi_instr_cost(instr, tuple);
 
-                /* Tie break in favour of later instructions, under the
-                 * assumption this promotes temporary usage (reducing pressure
-                 * on the register file). This is a side effect of a prepass
-                 * scheduling for pressure. */
+      /* Tie break in favour of later instructions, under the
+       * assumption this promotes temporary usage (reducing pressure
+       * on the register file). This is a side effect of a prepass
+       * scheduling for pressure. */
 
-                if (cost <= best_cost) {
-                        best_idx = i;
-                        best_cost = cost;
-                }
-        }
+      if (cost <= best_cost) {
+         best_idx = i;
+         best_cost = cost;
+      }
+   }
 
-        return best_idx;
+   return best_idx;
 }
 
 static void
 bi_pop_instr(struct bi_clause_state *clause, struct bi_tuple_state *tuple,
-                bi_instr *instr, uint64_t live_after_temp, bool fma)
+             bi_instr *instr, uint64_t live_after_temp, bool fma)
 {
-        bi_update_fau(clause, tuple, instr, fma, true);
+   bi_update_fau(clause, tuple, instr, fma, true);
 
-        assert(clause->access_count + instr->nr_srcs + instr->nr_dests <= ARRAY_SIZE(clause->accesses));
+   assert(clause->access_count + instr->nr_srcs + instr->nr_dests <=
+          ARRAY_SIZE(clause->accesses));
 
-        memcpy(clause->accesses + clause->access_count,
-               instr->src, sizeof(instr->src[0]) * instr->nr_srcs);
-        clause->access_count += instr->nr_srcs;
+   memcpy(clause->accesses + clause->access_count, instr->src,
+          sizeof(instr->src[0]) * instr->nr_srcs);
+   clause->access_count += instr->nr_srcs;
 
-        memcpy(clause->accesses + clause->access_count,
-               instr->dest, sizeof(instr->dest[0]) * instr->nr_dests);
-        clause->access_count += instr->nr_dests;
+   memcpy(clause->accesses + clause->access_count, instr->dest,
+          sizeof(instr->dest[0]) * instr->nr_dests);
+   clause->access_count += instr->nr_dests;
 
-        tuple->reg.nr_writes += bi_write_count(instr, live_after_temp);
+   tuple->reg.nr_writes += bi_write_count(instr, live_after_temp);
 
-        bi_foreach_src(instr, s) {
-                if (bi_tuple_is_new_src(instr, &tuple->reg, s))
-                        tuple->reg.reads[tuple->reg.nr_reads++] = instr->src[s];
-        }
+   bi_foreach_src(instr, s) {
+      if (bi_tuple_is_new_src(instr, &tuple->reg, s))
+         tuple->reg.reads[tuple->reg.nr_reads++] = instr->src[s];
+   }
 
-        /* This could be optimized to allow pairing integer instructions with
-         * special flush-to-zero instructions, but punting on this until we have
-         * a workload that cares.
-         */
-        clause->ftz = bi_needs_ftz(instr) ? BI_FTZ_STATE_ENABLE :
-                                            BI_FTZ_STATE_DISABLE;
+   /* This could be optimized to allow pairing integer instructions with
+    * special flush-to-zero instructions, but punting on this until we have
+    * a workload that cares.
+    */
+   clause->ftz =
+      bi_needs_ftz(instr) ? BI_FTZ_STATE_ENABLE : BI_FTZ_STATE_DISABLE;
 }
 
 /* Choose the best instruction and pop it off the worklist. Returns NULL if no
@@ -1255,74 +1256,71 @@ bi_pop_instr(struct bi_clause_state *clause, struct bi_tuple_state *tuple,
 
 static bi_instr *
 bi_take_instr(bi_context *ctx, struct bi_worklist st,
-                struct bi_clause_state *clause,
-                struct bi_tuple_state *tuple,
-                uint64_t live_after_temp,
-                bool fma)
+              struct bi_clause_state *clause, struct bi_tuple_state *tuple,
+              uint64_t live_after_temp, bool fma)
 {
-        if (tuple->add && tuple->add->op == BI_OPCODE_CUBEFACE)
-                return bi_lower_cubeface(ctx, clause, tuple);
-        else if (tuple->add && tuple->add->op == BI_OPCODE_ATOM_RETURN_I32)
-                return bi_lower_atom_c(ctx, clause, tuple);
-        else if (tuple->add && tuple->add->op == BI_OPCODE_ATOM1_RETURN_I32)
-                return bi_lower_atom_c1(ctx, clause, tuple);
-        else if (tuple->add && tuple->add->op == BI_OPCODE_SEG_ADD_I64)
-                return bi_lower_seg_add(ctx, clause, tuple);
-        else if (tuple->add && tuple->add->table)
-                return bi_lower_dtsel(ctx, clause, tuple);
+   if (tuple->add && tuple->add->op == BI_OPCODE_CUBEFACE)
+      return bi_lower_cubeface(ctx, clause, tuple);
+   else if (tuple->add && tuple->add->op == BI_OPCODE_ATOM_RETURN_I32)
+      return bi_lower_atom_c(ctx, clause, tuple);
+   else if (tuple->add && tuple->add->op == BI_OPCODE_ATOM1_RETURN_I32)
+      return bi_lower_atom_c1(ctx, clause, tuple);
+   else if (tuple->add && tuple->add->op == BI_OPCODE_SEG_ADD_I64)
+      return bi_lower_seg_add(ctx, clause, tuple);
+   else if (tuple->add && tuple->add->table)
+      return bi_lower_dtsel(ctx, clause, tuple);
 
-        /* TODO: Optimize these moves */
-        if (!fma && tuple->nr_prev_reads > 3) {
-                /* Only spill by one source for now */
-                assert(tuple->nr_prev_reads == 4);
+   /* TODO: Optimize these moves */
+   if (!fma && tuple->nr_prev_reads > 3) {
+      /* Only spill by one source for now */
+      assert(tuple->nr_prev_reads == 4);
 
-                /* Pick a source to spill */
-                bi_index src = tuple->prev_reads[0];
+      /* Pick a source to spill */
+      bi_index src = tuple->prev_reads[0];
 
-                /* Schedule the spill */
-                bi_builder b = bi_init_builder(ctx, bi_before_tuple(tuple->prev));
-                bi_instr *mov = bi_mov_i32_to(&b, src, src);
-                bi_pop_instr(clause, tuple, mov, live_after_temp, fma);
-                return mov;
-        }
+      /* Schedule the spill */
+      bi_builder b = bi_init_builder(ctx, bi_before_tuple(tuple->prev));
+      bi_instr *mov = bi_mov_i32_to(&b, src, src);
+      bi_pop_instr(clause, tuple, mov, live_after_temp, fma);
+      return mov;
+   }
 
 #ifndef NDEBUG
-        /* Don't pair instructions if debugging */
-        if ((bifrost_debug & BIFROST_DBG_NOSCHED) && tuple->add)
-                return NULL;
+   /* Don't pair instructions if debugging */
+   if ((bifrost_debug & BIFROST_DBG_NOSCHED) && tuple->add)
+      return NULL;
 #endif
 
-        unsigned idx = bi_choose_index(st, clause, tuple, live_after_temp, fma);
+   unsigned idx = bi_choose_index(st, clause, tuple, live_after_temp, fma);
 
-        if (idx >= st.count)
-                return NULL;
+   if (idx >= st.count)
+      return NULL;
 
-        /* Update state to reflect taking the instruction */
-        bi_instr *instr = st.instructions[idx];
+   /* Update state to reflect taking the instruction */
+   bi_instr *instr = st.instructions[idx];
 
-        BITSET_CLEAR(st.worklist, idx);
-        bi_update_worklist(st, idx);
-        bi_pop_instr(clause, tuple, instr, live_after_temp, fma);
+   BITSET_CLEAR(st.worklist, idx);
+   bi_update_worklist(st, idx);
+   bi_pop_instr(clause, tuple, instr, live_after_temp, fma);
 
-        /* Fixups */
-        bi_builder b = bi_init_builder(ctx, bi_before_instr(instr));
+   /* Fixups */
+   bi_builder b = bi_init_builder(ctx, bi_before_instr(instr));
 
-        if (instr->op == BI_OPCODE_IADD_U32 && fma) {
-                assert(bi_can_iaddc(instr));
-                bi_instr *iaddc =
-                        bi_iaddc_i32_to(&b, instr->dest[0], instr->src[0],
+   if (instr->op == BI_OPCODE_IADD_U32 && fma) {
+      assert(bi_can_iaddc(instr));
+      bi_instr *iaddc = bi_iaddc_i32_to(&b, instr->dest[0], instr->src[0],
                                         instr->src[1], bi_zero());
 
-                bi_remove_instruction(instr);
-                instr = iaddc;
-        } else if (fma && bi_can_replace_with_csel(instr)) {
-                bi_instr *csel = bi_csel_from_mux(&b, instr, false);
+      bi_remove_instruction(instr);
+      instr = iaddc;
+   } else if (fma && bi_can_replace_with_csel(instr)) {
+      bi_instr *csel = bi_csel_from_mux(&b, instr, false);
 
-                bi_remove_instruction(instr);
-                instr = csel;
-        }
+      bi_remove_instruction(instr);
+      instr = csel;
+   }
 
-        return instr;
+   return instr;
 }
 
 /* Variant of bi_rewrite_index_src_single that uses word-equivalence, rewriting
@@ -1331,26 +1329,25 @@ bi_take_instr(bi_context *ctx, struct bi_worklist st,
  * passthrough (which is impossible) */
 
 static void
-bi_use_passthrough(bi_instr *ins, bi_index old,
-                enum bifrost_packed_src new,
-                bool except_sr)
+bi_use_passthrough(bi_instr *ins, bi_index old, enum bifrost_packed_src new,
+                   bool except_sr)
 {
-        /* Optional for convenience */
-        if (!ins)
-                return;
+   /* Optional for convenience */
+   if (!ins)
+      return;
 
-        assert(!bi_is_null(old));
+   assert(!bi_is_null(old));
 
-        bi_foreach_src(ins, i) {
-                if ((i == 0 || i == 4) && except_sr)
-                        continue;
+   bi_foreach_src(ins, i) {
+      if ((i == 0 || i == 4) && except_sr)
+         continue;
 
-                if (bi_is_word_equiv(ins->src[i], old)) {
-                        ins->src[i].type = BI_INDEX_PASS;
-                        ins->src[i].value = new;
-                        ins->src[i].offset = 0;
-                }
-        }
+      if (bi_is_word_equiv(ins->src[i], old)) {
+         ins->src[i].type = BI_INDEX_PASS;
+         ins->src[i].value = new;
+         ins->src[i].offset = 0;
+      }
+   }
 }
 
 /* Rewrites an adjacent pair of tuples _prec_eding and _succ_eding to use
@@ -1364,43 +1361,48 @@ bi_use_passthrough(bi_instr *ins, bi_index old,
 static void
 bi_rewrite_passthrough(bi_tuple prec, bi_tuple succ)
 {
-        bool sr_read = succ.add ? bi_opcode_props[succ.add->op].sr_read : false;
+   bool sr_read = succ.add ? bi_opcode_props[succ.add->op].sr_read : false;
 
-        if (prec.add && prec.add->nr_dests) {
-                bi_use_passthrough(succ.fma, prec.add->dest[0], BIFROST_SRC_PASS_ADD, false);
-                bi_use_passthrough(succ.add, prec.add->dest[0], BIFROST_SRC_PASS_ADD, sr_read);
-        }
+   if (prec.add && prec.add->nr_dests) {
+      bi_use_passthrough(succ.fma, prec.add->dest[0], BIFROST_SRC_PASS_ADD,
+                         false);
+      bi_use_passthrough(succ.add, prec.add->dest[0], BIFROST_SRC_PASS_ADD,
+                         sr_read);
+   }
 
-        if (prec.fma && prec.fma->nr_dests) {
-                bi_use_passthrough(succ.fma, prec.fma->dest[0], BIFROST_SRC_PASS_FMA, false);
-                bi_use_passthrough(succ.add, prec.fma->dest[0], BIFROST_SRC_PASS_FMA, sr_read);
-        }
+   if (prec.fma && prec.fma->nr_dests) {
+      bi_use_passthrough(succ.fma, prec.fma->dest[0], BIFROST_SRC_PASS_FMA,
+                         false);
+      bi_use_passthrough(succ.add, prec.fma->dest[0], BIFROST_SRC_PASS_FMA,
+                         sr_read);
+   }
 }
 
 static void
 bi_rewrite_fau_to_pass(bi_tuple *tuple)
 {
-        bi_foreach_instr_and_src_in_tuple(tuple, ins, s) {
-                if (ins->src[s].type != BI_INDEX_FAU) continue;
+   bi_foreach_instr_and_src_in_tuple(tuple, ins, s) {
+      if (ins->src[s].type != BI_INDEX_FAU)
+         continue;
 
-                bi_index pass = bi_passthrough(ins->src[s].offset ?
-                                BIFROST_SRC_FAU_HI : BIFROST_SRC_FAU_LO);
+      bi_index pass = bi_passthrough(ins->src[s].offset ? BIFROST_SRC_FAU_HI
+                                                        : BIFROST_SRC_FAU_LO);
 
-                bi_replace_src(ins, s, pass);
-        }
+      bi_replace_src(ins, s, pass);
+   }
 }
 
 static void
 bi_rewrite_zero(bi_instr *ins, bool fma)
 {
-        bi_index zero = bi_passthrough(fma ? BIFROST_SRC_STAGE : BIFROST_SRC_FAU_LO);
+   bi_index zero = bi_passthrough(fma ? BIFROST_SRC_STAGE : BIFROST_SRC_FAU_LO);
 
-        bi_foreach_src(ins, s) {
-                bi_index src = ins->src[s];
+   bi_foreach_src(ins, s) {
+      bi_index src = ins->src[s];
 
-                if (src.type == BI_INDEX_CONSTANT && src.value == 0)
-                        bi_replace_src(ins, s, zero);
-        }
+      if (src.type == BI_INDEX_CONSTANT && src.value == 0)
+         bi_replace_src(ins, s, zero);
+   }
 }
 
 /* Assumes #0 to {T, FAU} rewrite has already occurred */
@@ -1408,31 +1410,32 @@ bi_rewrite_zero(bi_instr *ins, bool fma)
 static void
 bi_rewrite_constants_to_pass(bi_tuple *tuple, uint64_t constant, bool pcrel)
 {
-        bi_foreach_instr_and_src_in_tuple(tuple, ins, s) {
-                if (ins->src[s].type != BI_INDEX_CONSTANT) continue;
+   bi_foreach_instr_and_src_in_tuple(tuple, ins, s) {
+      if (ins->src[s].type != BI_INDEX_CONSTANT)
+         continue;
 
-                uint32_t cons = ins->src[s].value;
+      uint32_t cons = ins->src[s].value;
 
-                ASSERTED bool lo = (cons == (constant & 0xffffffff));
-                bool hi = (cons == (constant >> 32ull));
+      ASSERTED bool lo = (cons == (constant & 0xffffffff));
+      bool hi = (cons == (constant >> 32ull));
 
-                /* PC offsets always live in the upper half, set to zero by
-                 * convention before pack time. (This is safe, since if you
-                 * wanted to compare against zero, you would use a BRANCHZ
-                 * instruction instead.) */
-                if (cons == 0 && ins->branch_target != NULL) {
-                        assert(pcrel);
-                        hi = true;
-                        lo = false;
-                } else if (pcrel) {
-                        hi = false;
-                }
+      /* PC offsets always live in the upper half, set to zero by
+       * convention before pack time. (This is safe, since if you
+       * wanted to compare against zero, you would use a BRANCHZ
+       * instruction instead.) */
+      if (cons == 0 && ins->branch_target != NULL) {
+         assert(pcrel);
+         hi = true;
+         lo = false;
+      } else if (pcrel) {
+         hi = false;
+      }
 
-                assert(lo || hi);
+      assert(lo || hi);
 
-                bi_replace_src(ins, s,
-                               bi_passthrough(hi ? BIFROST_SRC_FAU_HI : BIFROST_SRC_FAU_LO));
-        }
+      bi_replace_src(
+         ins, s, bi_passthrough(hi ? BIFROST_SRC_FAU_HI : BIFROST_SRC_FAU_LO));
+   }
 }
 
 /* Constructs a constant state given a tuple state. This has the
@@ -1443,25 +1446,25 @@ bi_rewrite_constants_to_pass(bi_tuple *tuple, uint64_t constant, bool pcrel)
 static struct bi_const_state
 bi_get_const_state(struct bi_tuple_state *tuple)
 {
-        struct bi_const_state consts = {
-                .constant_count = tuple->constant_count,
-                .constants[0] = tuple->constants[0],
-                .constants[1] = tuple->constants[1],
-                .pcrel = tuple->add && tuple->add->branch_target,
-        };
+   struct bi_const_state consts = {
+      .constant_count = tuple->constant_count,
+      .constants[0] = tuple->constants[0],
+      .constants[1] = tuple->constants[1],
+      .pcrel = tuple->add && tuple->add->branch_target,
+   };
 
-        /* pcrel applies to the first constant by convention, and
-         * PC-relative constants will be #0 by convention here, so swap
-         * to match if needed */
-        if (consts.pcrel && consts.constants[0]) {
-                assert(consts.constant_count == 2);
-                assert(consts.constants[1] == 0);
+   /* pcrel applies to the first constant by convention, and
+    * PC-relative constants will be #0 by convention here, so swap
+    * to match if needed */
+   if (consts.pcrel && consts.constants[0]) {
+      assert(consts.constant_count == 2);
+      assert(consts.constants[1] == 0);
 
-                consts.constants[1] = consts.constants[0];
-                consts.constants[0] = 0;
-        }
+      consts.constants[1] = consts.constants[0];
+      consts.constants[0] = 0;
+   }
 
-        return consts;
+   return consts;
 }
 
 /* Merges constants in a clause, satisfying the following rules, assuming no
@@ -1483,119 +1486,122 @@ bi_get_const_state(struct bi_tuple_state *tuple)
 static uint64_t
 bi_merge_u32(uint32_t c0, uint32_t c1, bool pcrel)
 {
-        /* At this point in the constant merge algorithm, pcrel constants are
-         * treated as zero, so pcrel implies at least one constants is zero */ 
-        assert(!pcrel || (c0 == 0 || c1 == 0));
+   /* At this point in the constant merge algorithm, pcrel constants are
+    * treated as zero, so pcrel implies at least one constants is zero */
+   assert(!pcrel || (c0 == 0 || c1 == 0));
 
-        /* Order: pcrel, maximum non-pcrel, minimum non-pcrel */
-        uint32_t hi = pcrel ? 0 : MAX2(c0, c1);
-        uint32_t lo = (c0 == hi) ? c1 : c0;
+   /* Order: pcrel, maximum non-pcrel, minimum non-pcrel */
+   uint32_t hi = pcrel ? 0 : MAX2(c0, c1);
+   uint32_t lo = (c0 == hi) ? c1 : c0;
 
-        /* Merge in the selected order */
-        return lo | (((uint64_t) hi) << 32ull);
+   /* Merge in the selected order */
+   return lo | (((uint64_t)hi) << 32ull);
 }
 
 static unsigned
 bi_merge_pairs(struct bi_const_state *consts, unsigned tuple_count,
-                uint64_t *merged, unsigned *pcrel_pair)
+               uint64_t *merged, unsigned *pcrel_pair)
 {
-        unsigned merge_count = 0;
+   unsigned merge_count = 0;
 
-        for (unsigned t = 0; t < tuple_count; ++t) {
-                if (consts[t].constant_count != 2) continue;
+   for (unsigned t = 0; t < tuple_count; ++t) {
+      if (consts[t].constant_count != 2)
+         continue;
 
-                unsigned idx = ~0;
-                uint64_t val = bi_merge_u32(consts[t].constants[0],
-                                consts[t].constants[1], consts[t].pcrel);
+      unsigned idx = ~0;
+      uint64_t val = bi_merge_u32(consts[t].constants[0],
+                                  consts[t].constants[1], consts[t].pcrel);
 
-                /* Skip the pcrel pair if assigned, because if one is assigned,
-                 * this one is not pcrel by uniqueness so it's a mismatch */
-                for (unsigned s = 0; s < merge_count; ++s) {
-                        if (merged[s] == val && (*pcrel_pair) != s) {
-                                idx = s;
-                                break;
-                        }
-                }
+      /* Skip the pcrel pair if assigned, because if one is assigned,
+       * this one is not pcrel by uniqueness so it's a mismatch */
+      for (unsigned s = 0; s < merge_count; ++s) {
+         if (merged[s] == val && (*pcrel_pair) != s) {
+            idx = s;
+            break;
+         }
+      }
 
-                if (idx == ~0) {
-                        idx = merge_count++;
-                        merged[idx] = val;
+      if (idx == ~0) {
+         idx = merge_count++;
+         merged[idx] = val;
 
-                        if (consts[t].pcrel)
-                                (*pcrel_pair) = idx;
-                }
+         if (consts[t].pcrel)
+            (*pcrel_pair) = idx;
+      }
 
-                consts[t].word_idx = idx;
-        }
+      consts[t].word_idx = idx;
+   }
 
-        return merge_count;
+   return merge_count;
 }
 
 static unsigned
 bi_merge_singles(struct bi_const_state *consts, unsigned tuple_count,
-                uint64_t *pairs, unsigned pair_count, unsigned *pcrel_pair)
+                 uint64_t *pairs, unsigned pair_count, unsigned *pcrel_pair)
 {
-        bool pending = false, pending_pcrel = false;
-        uint32_t pending_single = 0;
+   bool pending = false, pending_pcrel = false;
+   uint32_t pending_single = 0;
 
-        for (unsigned t = 0; t < tuple_count; ++t) {
-                if (consts[t].constant_count != 1) continue;
+   for (unsigned t = 0; t < tuple_count; ++t) {
+      if (consts[t].constant_count != 1)
+         continue;
 
-                uint32_t val = consts[t].constants[0];
-                unsigned idx = ~0;
+      uint32_t val = consts[t].constants[0];
+      unsigned idx = ~0;
 
-                /* Try to match, but don't match pcrel with non-pcrel, even
-                 * though we can merge a pcrel with a non-pcrel single */
-                for (unsigned i = 0; i < pair_count; ++i) {
-                        bool lo = ((pairs[i] & 0xffffffff) == val);
-                        bool hi = ((pairs[i] >> 32) == val);
-                        bool match = (lo || hi);
-                        match &= ((*pcrel_pair) != i);
-                        if (match && !consts[t].pcrel) {
-                                idx = i;
-                                break;
-                        }
-                }
+      /* Try to match, but don't match pcrel with non-pcrel, even
+       * though we can merge a pcrel with a non-pcrel single */
+      for (unsigned i = 0; i < pair_count; ++i) {
+         bool lo = ((pairs[i] & 0xffffffff) == val);
+         bool hi = ((pairs[i] >> 32) == val);
+         bool match = (lo || hi);
+         match &= ((*pcrel_pair) != i);
+         if (match && !consts[t].pcrel) {
+            idx = i;
+            break;
+         }
+      }
 
-                if (idx == ~0) {
-                        idx = pair_count;
+      if (idx == ~0) {
+         idx = pair_count;
 
-                        if (pending && pending_single != val) {
-                                assert(!(pending_pcrel && consts[t].pcrel));
-                                bool pcrel = pending_pcrel || consts[t].pcrel;
+         if (pending && pending_single != val) {
+            assert(!(pending_pcrel && consts[t].pcrel));
+            bool pcrel = pending_pcrel || consts[t].pcrel;
 
-                                if (pcrel)
-                                        *pcrel_pair = idx;
+            if (pcrel)
+               *pcrel_pair = idx;
 
-                                pairs[pair_count++] = bi_merge_u32(pending_single, val, pcrel);
+            pairs[pair_count++] = bi_merge_u32(pending_single, val, pcrel);
 
-                                pending = pending_pcrel = false;
-                        } else {
-                                pending = true;
-                                pending_pcrel = consts[t].pcrel;
-                                pending_single = val;
-                        }
-                }
+            pending = pending_pcrel = false;
+         } else {
+            pending = true;
+            pending_pcrel = consts[t].pcrel;
+            pending_single = val;
+         }
+      }
 
-                consts[t].word_idx = idx;
-        }
+      consts[t].word_idx = idx;
+   }
 
-        /* Shift so it works whether pending_pcrel is set or not */
-        if (pending) {
-                if (pending_pcrel)
-                        *pcrel_pair = pair_count;
+   /* Shift so it works whether pending_pcrel is set or not */
+   if (pending) {
+      if (pending_pcrel)
+         *pcrel_pair = pair_count;
 
-                pairs[pair_count++] = ((uint64_t) pending_single) << 32ull;
-        }
+      pairs[pair_count++] = ((uint64_t)pending_single) << 32ull;
+   }
 
-        return pair_count;
+   return pair_count;
 }
 
 static unsigned
-bi_merge_constants(struct bi_const_state *consts, uint64_t *pairs, unsigned *pcrel_idx)
+bi_merge_constants(struct bi_const_state *consts, uint64_t *pairs,
+                   unsigned *pcrel_idx)
 {
-        unsigned pair_count = bi_merge_pairs(consts, 8, pairs, pcrel_idx);
-        return bi_merge_singles(consts, 8, pairs, pair_count, pcrel_idx);
+   unsigned pair_count = bi_merge_pairs(consts, 8, pairs, pcrel_idx);
+   return bi_merge_singles(consts, 8, pairs, pair_count, pcrel_idx);
 }
 
 /* Swap two constants at word i and i+1 by swapping their actual positions and
@@ -1604,16 +1610,16 @@ bi_merge_constants(struct bi_const_state *consts, uint64_t *pairs, unsigned *pcr
 static void
 bi_swap_constants(struct bi_const_state *consts, uint64_t *pairs, unsigned i)
 {
-        uint64_t tmp_pair = pairs[i + 0];
-        pairs[i + 0] = pairs[i + 1];
-        pairs[i + 1] = tmp_pair;
+   uint64_t tmp_pair = pairs[i + 0];
+   pairs[i + 0] = pairs[i + 1];
+   pairs[i + 1] = tmp_pair;
 
-        for (unsigned t = 0; t < 8; ++t) {
-                if (consts[t].word_idx == i)
-                        consts[t].word_idx = (i + 1);
-                else if (consts[t].word_idx == (i + 1))
-                        consts[t].word_idx = i;
-        }
+   for (unsigned t = 0; t < 8; ++t) {
+      if (consts[t].word_idx == i)
+         consts[t].word_idx = (i + 1);
+      else if (consts[t].word_idx == (i + 1))
+         consts[t].word_idx = i;
+   }
 }
 
 /* Given merged constants, one of which might be PC-relative, fix up the M
@@ -1621,449 +1627,456 @@ bi_swap_constants(struct bi_const_state *consts, uint64_t *pairs, unsigned i)
  * and other constants are used as-is (which might require swapping) */
 
 static unsigned
-bi_apply_constant_modifiers(struct bi_const_state *consts,
-                uint64_t *pairs, unsigned *pcrel_idx,
-                unsigned tuple_count, unsigned constant_count)
+bi_apply_constant_modifiers(struct bi_const_state *consts, uint64_t *pairs,
+                            unsigned *pcrel_idx, unsigned tuple_count,
+                            unsigned constant_count)
 {
-        unsigned start = bi_ec0_packed(tuple_count) ? 1 : 0;
+   unsigned start = bi_ec0_packed(tuple_count) ? 1 : 0;
 
-        /* Clauses with these tuple counts lack an M field for the packed EC0,
-         * so EC0 cannot be PC-relative, which might require swapping (and
-         * possibly adding an unused constant) to fit */
+   /* Clauses with these tuple counts lack an M field for the packed EC0,
+    * so EC0 cannot be PC-relative, which might require swapping (and
+    * possibly adding an unused constant) to fit */
 
-        if (*pcrel_idx == 0 && (tuple_count == 5 || tuple_count == 8)) {
-                constant_count = MAX2(constant_count, 2);
-                *pcrel_idx = 1;
-                bi_swap_constants(consts, pairs, 0);
-        }
+   if (*pcrel_idx == 0 && (tuple_count == 5 || tuple_count == 8)) {
+      constant_count = MAX2(constant_count, 2);
+      *pcrel_idx = 1;
+      bi_swap_constants(consts, pairs, 0);
+   }
 
-        /* EC0 might be packed free, after that constants are packed in pairs
-         * (with clause format 12), with M1 values computed from the pair */
+   /* EC0 might be packed free, after that constants are packed in pairs
+    * (with clause format 12), with M1 values computed from the pair */
 
-        for (unsigned i = start; i < constant_count; i += 2) {
-                bool swap = false;
-                bool last = (i + 1) == constant_count;
+   for (unsigned i = start; i < constant_count; i += 2) {
+      bool swap = false;
+      bool last = (i + 1) == constant_count;
 
-                unsigned A1 = (pairs[i] >> 60);
-                unsigned B1 = (pairs[i + 1] >> 60);
+      unsigned A1 = (pairs[i] >> 60);
+      unsigned B1 = (pairs[i + 1] >> 60);
 
-                if (*pcrel_idx == i || *pcrel_idx == (i + 1)) {
-                        /* PC-relative constant must be E0, not E1 */
-                        swap = (*pcrel_idx == (i + 1));
+      if (*pcrel_idx == i || *pcrel_idx == (i + 1)) {
+         /* PC-relative constant must be E0, not E1 */
+         swap = (*pcrel_idx == (i + 1));
 
-                        /* Set M1 = 4 by noting (A - B) mod 16 = 4 is
-                         * equivalent to A = (B + 4) mod 16 and that we can
-                         * control A */
-                        unsigned B = swap ? A1 : B1;
-                        unsigned A = (B + 4) & 0xF;
-                        pairs[*pcrel_idx] |= ((uint64_t) A) << 60;
+         /* Set M1 = 4 by noting (A - B) mod 16 = 4 is
+          * equivalent to A = (B + 4) mod 16 and that we can
+          * control A */
+         unsigned B = swap ? A1 : B1;
+         unsigned A = (B + 4) & 0xF;
+         pairs[*pcrel_idx] |= ((uint64_t)A) << 60;
 
-                        /* Swapped if swap set, identity if swap not set */
-                        *pcrel_idx = i;
-                } else {
-                        /* Compute M1 value if we don't swap */
-                        unsigned M1 = (16 + A1 - B1) & 0xF;
+         /* Swapped if swap set, identity if swap not set */
+         *pcrel_idx = i;
+      } else {
+         /* Compute M1 value if we don't swap */
+         unsigned M1 = (16 + A1 - B1) & 0xF;
 
-                        /* For M1 = 0 or M1 >= 8, the constants are unchanged,
-                         * we have 0 < (A1 - B1) % 16 < 8, which implies (B1 -
-                         * A1) % 16 >= 8, so swapping will let them be used
-                         * unchanged */
-                        swap = (M1 != 0) && (M1 < 8);
+         /* For M1 = 0 or M1 >= 8, the constants are unchanged,
+          * we have 0 < (A1 - B1) % 16 < 8, which implies (B1 -
+          * A1) % 16 >= 8, so swapping will let them be used
+          * unchanged */
+         swap = (M1 != 0) && (M1 < 8);
 
-                        /* However, we can't swap the last constant, so we
-                         * force M1 = 0 instead for this case */
-                        if (last && swap) {
-                                pairs[i + 1] |= pairs[i] & (0xfull << 60);
-                                swap = false;
-                        }
-                }
+         /* However, we can't swap the last constant, so we
+          * force M1 = 0 instead for this case */
+         if (last && swap) {
+            pairs[i + 1] |= pairs[i] & (0xfull << 60);
+            swap = false;
+         }
+      }
 
-                if (swap) {
-                        assert(!last);
-                        bi_swap_constants(consts, pairs, i);
-                }
-        }
+      if (swap) {
+         assert(!last);
+         bi_swap_constants(consts, pairs, i);
+      }
+   }
 
-        return constant_count;
+   return constant_count;
 }
 
 /* Schedule a single clause. If no instructions remain, return NULL. */
 
 static bi_clause *
-bi_schedule_clause(bi_context *ctx, bi_block *block, struct bi_worklist st, uint64_t *live)
+bi_schedule_clause(bi_context *ctx, bi_block *block, struct bi_worklist st,
+                   uint64_t *live)
 {
-        struct bi_clause_state clause_state = { 0 };
-        bi_clause *clause = rzalloc(ctx, bi_clause);
-        bi_tuple *tuple = NULL;
+   struct bi_clause_state clause_state = {0};
+   bi_clause *clause = rzalloc(ctx, bi_clause);
+   bi_tuple *tuple = NULL;
 
-        const unsigned max_tuples = ARRAY_SIZE(clause->tuples);
+   const unsigned max_tuples = ARRAY_SIZE(clause->tuples);
 
-        /* TODO: Decide flow control better */
-        clause->flow_control = BIFROST_FLOW_NBTB;
+   /* TODO: Decide flow control better */
+   clause->flow_control = BIFROST_FLOW_NBTB;
 
-        /* The last clause can only write one instruction, so initialize that */
-        struct bi_reg_state reg_state = {};
-        bi_index prev_reads[5] = { bi_null() };
-        unsigned nr_prev_reads = 0;
+   /* The last clause can only write one instruction, so initialize that */
+   struct bi_reg_state reg_state = {};
+   bi_index prev_reads[5] = {bi_null()};
+   unsigned nr_prev_reads = 0;
 
-        /* We need to track future liveness. The main *live set tracks what is
-         * live at the current point int he program we are scheduling, but to
-         * determine temp eligibility, we instead want what will be live after
-         * the next tuple in the program. If you scheduled forwards, you'd need
-         * a crystall ball for this. Luckily we schedule backwards, so we just
-         * delay updates to the live_after_temp by an extra tuple. */
-        uint64_t live_after_temp = *live;
-        uint64_t live_next_tuple = live_after_temp;
+   /* We need to track future liveness. The main *live set tracks what is
+    * live at the current point int he program we are scheduling, but to
+    * determine temp eligibility, we instead want what will be live after
+    * the next tuple in the program. If you scheduled forwards, you'd need
+    * a crystall ball for this. Luckily we schedule backwards, so we just
+    * delay updates to the live_after_temp by an extra tuple. */
+   uint64_t live_after_temp = *live;
+   uint64_t live_next_tuple = live_after_temp;
 
-        do {
-                struct bi_tuple_state tuple_state = {
-                        .last = (clause->tuple_count == 0),
-                        .reg = reg_state,
-                        .nr_prev_reads = nr_prev_reads,
-                        .prev = tuple,
-                        .pcrel_idx = ~0,
-                };
+   do {
+      struct bi_tuple_state tuple_state = {
+         .last = (clause->tuple_count == 0),
+         .reg = reg_state,
+         .nr_prev_reads = nr_prev_reads,
+         .prev = tuple,
+         .pcrel_idx = ~0,
+      };
 
-                assert(nr_prev_reads < ARRAY_SIZE(prev_reads));
-                memcpy(tuple_state.prev_reads, prev_reads, sizeof(prev_reads));
+      assert(nr_prev_reads < ARRAY_SIZE(prev_reads));
+      memcpy(tuple_state.prev_reads, prev_reads, sizeof(prev_reads));
 
-                unsigned idx = max_tuples - clause->tuple_count - 1;
+      unsigned idx = max_tuples - clause->tuple_count - 1;
 
-                tuple = &clause->tuples[idx];
+      tuple = &clause->tuples[idx];
 
-                if (clause->message && bi_opcode_props[clause->message->op].sr_read && !bi_is_null(clause->message->src[0])) {
-                        unsigned nr = bi_count_read_registers(clause->message, 0);
-                        live_after_temp |= (BITFIELD64_MASK(nr) << clause->message->src[0].value);
-                }
+      if (clause->message && bi_opcode_props[clause->message->op].sr_read &&
+          !bi_is_null(clause->message->src[0])) {
+         unsigned nr = bi_count_read_registers(clause->message, 0);
+         live_after_temp |=
+            (BITFIELD64_MASK(nr) << clause->message->src[0].value);
+      }
 
-                /* Since we schedule backwards, we schedule ADD first */
-                tuple_state.add = bi_take_instr(ctx, st, &clause_state, &tuple_state, live_after_temp, false);
-                tuple->fma = bi_take_instr(ctx, st, &clause_state, &tuple_state, live_after_temp, true);
-                tuple->add = tuple_state.add;
+      /* Since we schedule backwards, we schedule ADD first */
+      tuple_state.add = bi_take_instr(ctx, st, &clause_state, &tuple_state,
+                                      live_after_temp, false);
+      tuple->fma = bi_take_instr(ctx, st, &clause_state, &tuple_state,
+                                 live_after_temp, true);
+      tuple->add = tuple_state.add;
 
-                /* Update liveness from the new instructions */
-                if (tuple->add)
-                        *live = bi_postra_liveness_ins(*live, tuple->add);
+      /* Update liveness from the new instructions */
+      if (tuple->add)
+         *live = bi_postra_liveness_ins(*live, tuple->add);
 
-                if (tuple->fma)
-                        *live = bi_postra_liveness_ins(*live, tuple->fma);
+      if (tuple->fma)
+         *live = bi_postra_liveness_ins(*live, tuple->fma);
 
-               /* Rotate in the new per-tuple liveness */
-                live_after_temp = live_next_tuple;
-                live_next_tuple = *live;
+      /* Rotate in the new per-tuple liveness */
+      live_after_temp = live_next_tuple;
+      live_next_tuple = *live;
 
-                /* We may have a message, but only one per clause */
-                if (tuple->add && bi_must_message(tuple->add)) {
-                        assert(!clause_state.message);
-                        clause_state.message = true;
+      /* We may have a message, but only one per clause */
+      if (tuple->add && bi_must_message(tuple->add)) {
+         assert(!clause_state.message);
+         clause_state.message = true;
 
-                        clause->message_type =
-                                bi_message_type_for_instr(tuple->add);
-                        clause->message = tuple->add;
+         clause->message_type = bi_message_type_for_instr(tuple->add);
+         clause->message = tuple->add;
 
-                        /* We don't need to set dependencies for blend shaders
-                         * because the BLEND instruction in the fragment
-                         * shader should have already done the wait */
-                        if (!ctx->inputs->is_blend) {
-                                switch (tuple->add->op) {
-                                case BI_OPCODE_ATEST:
-                                        clause->dependencies |= (1 << BIFROST_SLOT_ELDEST_DEPTH);
-                                        break;
-                                case BI_OPCODE_LD_TILE:
-                                case BI_OPCODE_ST_TILE:
-                                        clause->dependencies |= (1 << BIFROST_SLOT_ELDEST_COLOUR);
-                                        break;
-                                case BI_OPCODE_BLEND:
-                                        clause->dependencies |= (1 << BIFROST_SLOT_ELDEST_DEPTH);
-                                        clause->dependencies |= (1 << BIFROST_SLOT_ELDEST_COLOUR);
-                                        break;
-                                default:
-                                        break;
-                                }
-                        }
-                }
+         /* We don't need to set dependencies for blend shaders
+          * because the BLEND instruction in the fragment
+          * shader should have already done the wait */
+         if (!ctx->inputs->is_blend) {
+            switch (tuple->add->op) {
+            case BI_OPCODE_ATEST:
+               clause->dependencies |= (1 << BIFROST_SLOT_ELDEST_DEPTH);
+               break;
+            case BI_OPCODE_LD_TILE:
+            case BI_OPCODE_ST_TILE:
+               clause->dependencies |= (1 << BIFROST_SLOT_ELDEST_COLOUR);
+               break;
+            case BI_OPCODE_BLEND:
+               clause->dependencies |= (1 << BIFROST_SLOT_ELDEST_DEPTH);
+               clause->dependencies |= (1 << BIFROST_SLOT_ELDEST_COLOUR);
+               break;
+            default:
+               break;
+            }
+         }
+      }
 
-                clause_state.consts[idx] = bi_get_const_state(&tuple_state);
+      clause_state.consts[idx] = bi_get_const_state(&tuple_state);
 
-                /* Before merging constants, eliminate zeroes, otherwise the
-                 * merging will fight over the #0 that never gets read (and is
-                 * never marked as read by update_fau) */
-                if (tuple->fma && bi_reads_zero(tuple->fma))
-                        bi_rewrite_zero(tuple->fma, true);
+      /* Before merging constants, eliminate zeroes, otherwise the
+       * merging will fight over the #0 that never gets read (and is
+       * never marked as read by update_fau) */
+      if (tuple->fma && bi_reads_zero(tuple->fma))
+         bi_rewrite_zero(tuple->fma, true);
 
-                /* Rewrite away FAU, constant write is deferred */
-                if (!tuple_state.constant_count) {
-                        tuple->fau_idx = tuple_state.fau;
-                        bi_rewrite_fau_to_pass(tuple);
-                }
+      /* Rewrite away FAU, constant write is deferred */
+      if (!tuple_state.constant_count) {
+         tuple->fau_idx = tuple_state.fau;
+         bi_rewrite_fau_to_pass(tuple);
+      }
 
-                /* Use passthrough register for cross-stage accesses. Since
-                 * there are just FMA and ADD stages, that means we rewrite to
-                 * passthrough the sources of the ADD that read from the
-                 * destination of the FMA */
+      /* Use passthrough register for cross-stage accesses. Since
+       * there are just FMA and ADD stages, that means we rewrite to
+       * passthrough the sources of the ADD that read from the
+       * destination of the FMA */
 
-                if (tuple->fma && tuple->fma->nr_dests) {
-                        bi_use_passthrough(tuple->add, tuple->fma->dest[0],
-                                        BIFROST_SRC_STAGE, false);
-                }
+      if (tuple->fma && tuple->fma->nr_dests) {
+         bi_use_passthrough(tuple->add, tuple->fma->dest[0], BIFROST_SRC_STAGE,
+                            false);
+      }
 
-                /* Don't add an empty tuple, unless the worklist has nothing
-                 * but a (pseudo)instruction failing to schedule due to a "not
-                 * last instruction" constraint */
+      /* Don't add an empty tuple, unless the worklist has nothing
+       * but a (pseudo)instruction failing to schedule due to a "not
+       * last instruction" constraint */
 
-                int some_instruction = __bitset_ffs(st.worklist, BITSET_WORDS(st.count));
-                bool not_last = (some_instruction > 0) &&
-                        bi_must_not_last(st.instructions[some_instruction - 1]);
+      int some_instruction = __bitset_ffs(st.worklist, BITSET_WORDS(st.count));
+      bool not_last = (some_instruction > 0) &&
+                      bi_must_not_last(st.instructions[some_instruction - 1]);
 
-                bool insert_empty = tuple_state.last && not_last;
+      bool insert_empty = tuple_state.last && not_last;
 
-                if (!(tuple->fma || tuple->add || insert_empty))
-                        break;
+      if (!(tuple->fma || tuple->add || insert_empty))
+         break;
 
-                clause->tuple_count++;
+      clause->tuple_count++;
 
-                /* Adding enough tuple might overflow constants */
-                if (!bi_space_for_more_constants(&clause_state))
-                        break;
+      /* Adding enough tuple might overflow constants */
+      if (!bi_space_for_more_constants(&clause_state))
+         break;
 
 #ifndef NDEBUG
-                /* Don't schedule more than 1 tuple if debugging */
-                if ((bifrost_debug & BIFROST_DBG_NOSCHED) && !insert_empty)
-                        break;
+      /* Don't schedule more than 1 tuple if debugging */
+      if ((bifrost_debug & BIFROST_DBG_NOSCHED) && !insert_empty)
+         break;
 #endif
 
-                /* Link through the register state */
-                STATIC_ASSERT(sizeof(prev_reads) == sizeof(tuple_state.reg.reads));
-                memcpy(prev_reads, tuple_state.reg.reads, sizeof(prev_reads));
-                nr_prev_reads = tuple_state.reg.nr_reads;
-                clause_state.tuple_count++;
-        } while(clause->tuple_count < 8);
+      /* Link through the register state */
+      STATIC_ASSERT(sizeof(prev_reads) == sizeof(tuple_state.reg.reads));
+      memcpy(prev_reads, tuple_state.reg.reads, sizeof(prev_reads));
+      nr_prev_reads = tuple_state.reg.nr_reads;
+      clause_state.tuple_count++;
+   } while (clause->tuple_count < 8);
 
-        /* Don't schedule an empty clause */
-        if (!clause->tuple_count)
-                return NULL;
+   /* Don't schedule an empty clause */
+   if (!clause->tuple_count)
+      return NULL;
 
-        /* Before merging, rewrite away any tuples that read only zero */
-        for (unsigned i = max_tuples - clause->tuple_count; i < max_tuples; ++i) {
-                bi_tuple *tuple = &clause->tuples[i];
-                struct bi_const_state *st = &clause_state.consts[i];
+   /* Before merging, rewrite away any tuples that read only zero */
+   for (unsigned i = max_tuples - clause->tuple_count; i < max_tuples; ++i) {
+      bi_tuple *tuple = &clause->tuples[i];
+      struct bi_const_state *st = &clause_state.consts[i];
 
-                if (st->constant_count == 0 || st->constants[0] || st->constants[1] || st->pcrel)
-                        continue;
+      if (st->constant_count == 0 || st->constants[0] || st->constants[1] ||
+          st->pcrel)
+         continue;
 
-                bi_foreach_instr_in_tuple(tuple, ins)
-                        bi_rewrite_zero(ins, false);
+      bi_foreach_instr_in_tuple(tuple, ins)
+         bi_rewrite_zero(ins, false);
 
-                /* Constant has been demoted to FAU, so don't pack it separately */
-                st->constant_count = 0;
+      /* Constant has been demoted to FAU, so don't pack it separately */
+      st->constant_count = 0;
 
-                /* Default */
-                assert(tuple->fau_idx == BIR_FAU_ZERO);
-        }
+      /* Default */
+      assert(tuple->fau_idx == BIR_FAU_ZERO);
+   }
 
-        uint64_t constant_pairs[8] = { 0 };
-        unsigned pcrel_idx = ~0;
-        unsigned constant_words =
-                bi_merge_constants(clause_state.consts, constant_pairs, &pcrel_idx);
+   uint64_t constant_pairs[8] = {0};
+   unsigned pcrel_idx = ~0;
+   unsigned constant_words =
+      bi_merge_constants(clause_state.consts, constant_pairs, &pcrel_idx);
 
-        constant_words = bi_apply_constant_modifiers(clause_state.consts,
-                        constant_pairs, &pcrel_idx, clause->tuple_count,
-                        constant_words);
+   constant_words = bi_apply_constant_modifiers(
+      clause_state.consts, constant_pairs, &pcrel_idx, clause->tuple_count,
+      constant_words);
 
-        clause->pcrel_idx = pcrel_idx;
+   clause->pcrel_idx = pcrel_idx;
 
-        for (unsigned i = max_tuples - clause->tuple_count; i < max_tuples; ++i) {
-                bi_tuple *tuple = &clause->tuples[i];
+   for (unsigned i = max_tuples - clause->tuple_count; i < max_tuples; ++i) {
+      bi_tuple *tuple = &clause->tuples[i];
 
-                /* If no constants, leave FAU as it is, possibly defaulting to 0 */
-                if (clause_state.consts[i].constant_count == 0)
-                        continue;
+      /* If no constants, leave FAU as it is, possibly defaulting to 0 */
+      if (clause_state.consts[i].constant_count == 0)
+         continue;
 
-                /* FAU is already handled */
-                assert(!tuple->fau_idx);
+      /* FAU is already handled */
+      assert(!tuple->fau_idx);
 
-                unsigned word_idx = clause_state.consts[i].word_idx;
-                assert(word_idx <= 8);
+      unsigned word_idx = clause_state.consts[i].word_idx;
+      assert(word_idx <= 8);
 
-                /* We could try to merge regardless of bottom bits as well, but
-                 * that's probably diminishing returns */
-                uint64_t pair = constant_pairs[word_idx];
-                unsigned lo = pair & 0xF;
+      /* We could try to merge regardless of bottom bits as well, but
+       * that's probably diminishing returns */
+      uint64_t pair = constant_pairs[word_idx];
+      unsigned lo = pair & 0xF;
 
-                tuple->fau_idx = bi_constant_field(word_idx) | lo;
-                bi_rewrite_constants_to_pass(tuple, pair, word_idx == pcrel_idx);
-        }
+      tuple->fau_idx = bi_constant_field(word_idx) | lo;
+      bi_rewrite_constants_to_pass(tuple, pair, word_idx == pcrel_idx);
+   }
 
-        clause->constant_count = constant_words;
-        memcpy(clause->constants, constant_pairs, sizeof(constant_pairs));
+   clause->constant_count = constant_words;
+   memcpy(clause->constants, constant_pairs, sizeof(constant_pairs));
 
-        /* Branches must be last, so this can be factored out */
-        bi_instr *last = clause->tuples[max_tuples - 1].add;
-        clause->next_clause_prefetch = !last || (last->op != BI_OPCODE_JUMP);
-        clause->block = block;
+   /* Branches must be last, so this can be factored out */
+   bi_instr *last = clause->tuples[max_tuples - 1].add;
+   clause->next_clause_prefetch = !last || (last->op != BI_OPCODE_JUMP);
+   clause->block = block;
 
-        clause->ftz = (clause_state.ftz == BI_FTZ_STATE_ENABLE);
+   clause->ftz = (clause_state.ftz == BI_FTZ_STATE_ENABLE);
 
-        /* We emit in reverse and emitted to the back of the tuples array, so
-         * move it up front for easy indexing */
-        memmove(clause->tuples,
-                       clause->tuples + (max_tuples - clause->tuple_count),
-                       clause->tuple_count * sizeof(clause->tuples[0]));
+   /* We emit in reverse and emitted to the back of the tuples array, so
+    * move it up front for easy indexing */
+   memmove(clause->tuples, clause->tuples + (max_tuples - clause->tuple_count),
+           clause->tuple_count * sizeof(clause->tuples[0]));
 
-        /* Use passthrough register for cross-tuple accesses. Note this is
-         * after the memmove, so this is forwards. Skip the first tuple since
-         * there is nothing before it to passthrough */
+   /* Use passthrough register for cross-tuple accesses. Note this is
+    * after the memmove, so this is forwards. Skip the first tuple since
+    * there is nothing before it to passthrough */
 
-        for (unsigned t = 1; t < clause->tuple_count; ++t)
-                bi_rewrite_passthrough(clause->tuples[t - 1], clause->tuples[t]);
+   for (unsigned t = 1; t < clause->tuple_count; ++t)
+      bi_rewrite_passthrough(clause->tuples[t - 1], clause->tuples[t]);
 
-        return clause;
+   return clause;
 }
 
 static void
 bi_schedule_block(bi_context *ctx, bi_block *block)
 {
-        list_inithead(&block->clauses);
+   list_inithead(&block->clauses);
 
-        /* Copy list to dynamic array */
-        struct bi_worklist st = bi_initialize_worklist(block,
-                        bifrost_debug & BIFROST_DBG_INORDER,
-                        ctx->inputs->is_blend);
+   /* Copy list to dynamic array */
+   struct bi_worklist st = bi_initialize_worklist(
+      block, bifrost_debug & BIFROST_DBG_INORDER, ctx->inputs->is_blend);
 
-        if (!st.count) {
-                bi_free_worklist(st);
-                return;
-        }
+   if (!st.count) {
+      bi_free_worklist(st);
+      return;
+   }
 
-        /* We need to track liveness during scheduling in order to determine whether we can use temporary (passthrough) registers */
-        uint64_t live = block->reg_live_out;
+   /* We need to track liveness during scheduling in order to determine whether
+    * we can use temporary (passthrough) registers */
+   uint64_t live = block->reg_live_out;
 
-        /* Schedule as many clauses as needed to fill the block */
-        bi_clause *u = NULL;
-        while((u = bi_schedule_clause(ctx, block, st, &live)))
-                list_add(&u->link, &block->clauses);
+   /* Schedule as many clauses as needed to fill the block */
+   bi_clause *u = NULL;
+   while ((u = bi_schedule_clause(ctx, block, st, &live)))
+      list_add(&u->link, &block->clauses);
 
-        /* Back-to-back bit affects only the last clause of a block,
-         * the rest are implicitly true */
-        if (!list_is_empty(&block->clauses)) {
-                bi_clause *last_clause = list_last_entry(&block->clauses, bi_clause, link);
-                if (bi_reconverge_branches(block))
-                        last_clause->flow_control = BIFROST_FLOW_NBTB_UNCONDITIONAL;
-        }
+   /* Back-to-back bit affects only the last clause of a block,
+    * the rest are implicitly true */
+   if (!list_is_empty(&block->clauses)) {
+      bi_clause *last_clause =
+         list_last_entry(&block->clauses, bi_clause, link);
+      if (bi_reconverge_branches(block))
+         last_clause->flow_control = BIFROST_FLOW_NBTB_UNCONDITIONAL;
+   }
 
-        /* Reorder instructions to match the new schedule. First remove
-         * existing instructions and then recreate the list */
+   /* Reorder instructions to match the new schedule. First remove
+    * existing instructions and then recreate the list */
 
-        bi_foreach_instr_in_block_safe(block, ins) {
-                list_del(&ins->link);
-        }
+   bi_foreach_instr_in_block_safe(block, ins) {
+      list_del(&ins->link);
+   }
 
-        bi_foreach_clause_in_block(block, clause) {
-                for (unsigned i = 0; i < clause->tuple_count; ++i)  {
-                        bi_foreach_instr_in_tuple(&clause->tuples[i], ins) {
-                                list_addtail(&ins->link, &block->instructions);
-                        }
-                }
-        }
+   bi_foreach_clause_in_block(block, clause) {
+      for (unsigned i = 0; i < clause->tuple_count; ++i) {
+         bi_foreach_instr_in_tuple(&clause->tuples[i], ins) {
+            list_addtail(&ins->link, &block->instructions);
+         }
+      }
+   }
 
-        block->scheduled = true;
+   block->scheduled = true;
 
 #ifndef NDEBUG
-        unsigned i;
-        bool incomplete = false;
+   unsigned i;
+   bool incomplete = false;
 
-        BITSET_FOREACH_SET(i, st.worklist, st.count) {
-                bi_print_instr(st.instructions[i], stderr);
-                incomplete = true;
-        }
+   BITSET_FOREACH_SET(i, st.worklist, st.count) {
+      bi_print_instr(st.instructions[i], stderr);
+      incomplete = true;
+   }
 
-        if (incomplete)
-                unreachable("The above instructions failed to schedule.");
+   if (incomplete)
+      unreachable("The above instructions failed to schedule.");
 #endif
 
-        bi_free_worklist(st);
+   bi_free_worklist(st);
 }
 
 static bool
-bi_check_fau_src(bi_instr *ins, unsigned s, uint32_t *constants, unsigned *cwords, bi_index *fau)
+bi_check_fau_src(bi_instr *ins, unsigned s, uint32_t *constants,
+                 unsigned *cwords, bi_index *fau)
 {
-        assert(s < ins->nr_srcs);
-        bi_index src = ins->src[s];
+   assert(s < ins->nr_srcs);
+   bi_index src = ins->src[s];
 
-        /* Staging registers can't have FAU accesses */
-        if (bi_is_staging_src(ins, s))
-                return (src.type != BI_INDEX_CONSTANT) && (src.type != BI_INDEX_FAU);
+   /* Staging registers can't have FAU accesses */
+   if (bi_is_staging_src(ins, s))
+      return (src.type != BI_INDEX_CONSTANT) && (src.type != BI_INDEX_FAU);
 
-        if (src.type == BI_INDEX_CONSTANT) {
-                /* Allow fast zero */
-                if (src.value == 0 && bi_opcode_props[ins->op].fma && bi_reads_zero(ins))
-                        return true;
+   if (src.type == BI_INDEX_CONSTANT) {
+      /* Allow fast zero */
+      if (src.value == 0 && bi_opcode_props[ins->op].fma && bi_reads_zero(ins))
+         return true;
 
-                if (!bi_is_null(*fau))
-                        return false;
+      if (!bi_is_null(*fau))
+         return false;
 
-                /* Else, try to inline a constant */
-                for (unsigned i = 0; i < *cwords; ++i) {
-                        if (src.value == constants[i])
-                                return true;
-                }
+      /* Else, try to inline a constant */
+      for (unsigned i = 0; i < *cwords; ++i) {
+         if (src.value == constants[i])
+            return true;
+      }
 
-                if (*cwords >= 2)
-                        return false;
+      if (*cwords >= 2)
+         return false;
 
-                constants[(*cwords)++] = src.value;
-        } else if (src.type == BI_INDEX_FAU) {
-                if (*cwords != 0)
-                        return false;
+      constants[(*cwords)++] = src.value;
+   } else if (src.type == BI_INDEX_FAU) {
+      if (*cwords != 0)
+         return false;
 
-                /* Can only read from one pair of FAU words */
-                if (!bi_is_null(*fau) && (src.value != fau->value))
-                        return false;
+      /* Can only read from one pair of FAU words */
+      if (!bi_is_null(*fau) && (src.value != fau->value))
+         return false;
 
-                /* If there is a target, we'll need a PC-relative constant */
-                if (ins->branch_target)
-                        return false;
+      /* If there is a target, we'll need a PC-relative constant */
+      if (ins->branch_target)
+         return false;
 
-                *fau = src;
-        }
+      *fau = src;
+   }
 
-        return true;
+   return true;
 }
 
 void
 bi_lower_fau(bi_context *ctx)
 {
-        bi_foreach_instr_global_safe(ctx, ins) {
-                bi_builder b = bi_init_builder(ctx, bi_before_instr(ins));
+   bi_foreach_instr_global_safe(ctx, ins) {
+      bi_builder b = bi_init_builder(ctx, bi_before_instr(ins));
 
-                uint32_t constants[2];
-                unsigned cwords = 0;
-                bi_index fau = bi_null();
+      uint32_t constants[2];
+      unsigned cwords = 0;
+      bi_index fau = bi_null();
 
-                /* ATEST must have the ATEST datum encoded, not any other
-                 * uniform. See to it this is the case. */
-                if (ins->op == BI_OPCODE_ATEST)
-                        fau = ins->src[2];
+      /* ATEST must have the ATEST datum encoded, not any other
+       * uniform. See to it this is the case. */
+      if (ins->op == BI_OPCODE_ATEST)
+         fau = ins->src[2];
 
-                /* Dual texturing requires the texture operation descriptor
-                 * encoded as an immediate so we can fix up.
-                 */
-                if (ins->op == BI_OPCODE_TEXC_DUAL) {
-                        assert(ins->src[3].type == BI_INDEX_CONSTANT);
-                        constants[cwords++] = ins->src[3].value;
-                }
+      /* Dual texturing requires the texture operation descriptor
+       * encoded as an immediate so we can fix up.
+       */
+      if (ins->op == BI_OPCODE_TEXC_DUAL) {
+         assert(ins->src[3].type == BI_INDEX_CONSTANT);
+         constants[cwords++] = ins->src[3].value;
+      }
 
-                /* Phis get split up into moves so are unrestricted */
-                if (ins->op == BI_OPCODE_PHI)
-                        continue;
+      /* Phis get split up into moves so are unrestricted */
+      if (ins->op == BI_OPCODE_PHI)
+         continue;
 
-                bi_foreach_src(ins, s) {
-                        if (bi_check_fau_src(ins, s, constants, &cwords, &fau)) continue;
+      bi_foreach_src(ins, s) {
+         if (bi_check_fau_src(ins, s, constants, &cwords, &fau))
+            continue;
 
-                        bi_index copy = bi_mov_i32(&b, ins->src[s]);
-                        bi_replace_src(ins, s, copy);
-                }
-        }
+         bi_index copy = bi_mov_i32(&b, ins->src[s]);
+         bi_replace_src(ins, s, copy);
+      }
+   }
 }
 
 /* Only v7 allows specifying a dependency on the tilebuffer for the first
@@ -2072,50 +2085,53 @@ bi_lower_fau(bi_context *ctx)
 static void
 bi_add_nop_for_atest(bi_context *ctx)
 {
-        /* Only needed on v6 */
-        if (ctx->arch >= 7)
-                return;
+   /* Only needed on v6 */
+   if (ctx->arch >= 7)
+      return;
 
-        if (list_is_empty(&ctx->blocks))
-                return;
+   if (list_is_empty(&ctx->blocks))
+      return;
 
-        /* Fetch the first clause of the shader */
-        bi_block *block = list_first_entry(&ctx->blocks, bi_block, link);
-        bi_clause *clause = bi_next_clause(ctx, block, NULL);
+   /* Fetch the first clause of the shader */
+   bi_block *block = list_first_entry(&ctx->blocks, bi_block, link);
+   bi_clause *clause = bi_next_clause(ctx, block, NULL);
 
-        if (!clause || !(clause->dependencies & ((1 << BIFROST_SLOT_ELDEST_DEPTH) |
-                                                 (1 << BIFROST_SLOT_ELDEST_COLOUR))))
-                return;
+   if (!clause || !(clause->dependencies & ((1 << BIFROST_SLOT_ELDEST_DEPTH) |
+                                            (1 << BIFROST_SLOT_ELDEST_COLOUR))))
+      return;
 
-        /* Add a NOP so we can wait for the dependencies required by the first
-         * clause */
+   /* Add a NOP so we can wait for the dependencies required by the first
+    * clause */
 
-        bi_instr *I = rzalloc(ctx, bi_instr);
-        I->op = BI_OPCODE_NOP;
+   bi_instr *I = rzalloc(ctx, bi_instr);
+   I->op = BI_OPCODE_NOP;
 
-        bi_clause *new_clause = ralloc(ctx, bi_clause);
-        *new_clause = (bi_clause) {
-                .flow_control = BIFROST_FLOW_NBTB,
-                .next_clause_prefetch = true,
-                .block = clause->block,
+   bi_clause *new_clause = ralloc(ctx, bi_clause);
+   *new_clause = (bi_clause){
+      .flow_control = BIFROST_FLOW_NBTB,
+      .next_clause_prefetch = true,
+      .block = clause->block,
 
-                .tuple_count = 1,
-                .tuples[0] = { .fma = I, },
-        };
+      .tuple_count = 1,
+      .tuples[0] =
+         {
+            .fma = I,
+         },
+   };
 
-        list_add(&new_clause->link, &clause->block->clauses);
+   list_add(&new_clause->link, &clause->block->clauses);
 }
 
 void
 bi_schedule(bi_context *ctx)
 {
-        /* Fed into both scheduling and DCE */
-        bi_postra_liveness(ctx);
+   /* Fed into both scheduling and DCE */
+   bi_postra_liveness(ctx);
 
-        bi_foreach_block(ctx, block) {
-                bi_schedule_block(ctx, block);
-        }
+   bi_foreach_block(ctx, block) {
+      bi_schedule_block(ctx, block);
+   }
 
-        bi_opt_dce_post_ra(ctx);
-        bi_add_nop_for_atest(ctx);
+   bi_opt_dce_post_ra(ctx);
+   bi_add_nop_for_atest(ctx);
 }
diff --git a/src/panfrost/bifrost/bi_scoreboard.c b/src/panfrost/bifrost/bi_scoreboard.c
index 04aa07b0c1f..735bcf4a677 100644
--- a/src/panfrost/bifrost/bi_scoreboard.c
+++ b/src/panfrost/bifrost/bi_scoreboard.c
@@ -54,9 +54,9 @@
  */
 
 #define BI_NUM_GENERAL_SLOTS 6
-#define BI_NUM_SLOTS 8
-#define BI_NUM_REGISTERS 64
-#define BI_SLOT_SERIAL 0 /* arbitrary */
+#define BI_NUM_SLOTS         8
+#define BI_NUM_REGISTERS     64
+#define BI_SLOT_SERIAL       0 /* arbitrary */
 
 /*
  * Due to the crude scoreboarding we do, we need to serialize varying loads and
@@ -65,26 +65,26 @@
 static bool
 bi_should_serialize(bi_instr *I)
 {
-        /* For debug, serialize everything to disable scoreboard opts */
-        if (bifrost_debug & BIFROST_DBG_NOSB)
-                return true;
+   /* For debug, serialize everything to disable scoreboard opts */
+   if (bifrost_debug & BIFROST_DBG_NOSB)
+      return true;
 
-        /* Although nominally on the attribute unit, image loads have the same
-         * coherency requirements as general memory loads. Serialize them for
-         * now until we can do something more clever.
-         */
-        if (I->op == BI_OPCODE_LD_ATTR_TEX)
-                return true;
+   /* Although nominally on the attribute unit, image loads have the same
+    * coherency requirements as general memory loads. Serialize them for
+    * now until we can do something more clever.
+    */
+   if (I->op == BI_OPCODE_LD_ATTR_TEX)
+      return true;
 
-        switch (bi_opcode_props[I->op].message) {
-        case BIFROST_MESSAGE_VARYING:
-        case BIFROST_MESSAGE_LOAD:
-        case BIFROST_MESSAGE_STORE:
-        case BIFROST_MESSAGE_ATOMIC:
-                return true;
-        default:
-                return false;
-        }
+   switch (bi_opcode_props[I->op].message) {
+   case BIFROST_MESSAGE_VARYING:
+   case BIFROST_MESSAGE_LOAD:
+   case BIFROST_MESSAGE_STORE:
+   case BIFROST_MESSAGE_ATOMIC:
+      return true;
+   default:
+      return false;
+   }
 }
 
 /* Given a scoreboard model, choose a slot for a clause wrapping a given
@@ -93,76 +93,77 @@ bi_should_serialize(bi_instr *I)
 static unsigned
 bi_choose_scoreboard_slot(bi_instr *message)
 {
-        /* ATEST, ZS_EMIT must be issued with slot #0 */
-        if (message->op == BI_OPCODE_ATEST || message->op == BI_OPCODE_ZS_EMIT)
-                return 0;
+   /* ATEST, ZS_EMIT must be issued with slot #0 */
+   if (message->op == BI_OPCODE_ATEST || message->op == BI_OPCODE_ZS_EMIT)
+      return 0;
 
-        /* BARRIER must be issued with slot #7 */
-        if (message->op == BI_OPCODE_BARRIER)
-                return 7;
+   /* BARRIER must be issued with slot #7 */
+   if (message->op == BI_OPCODE_BARRIER)
+      return 7;
 
-        /* For now, make serialization is easy */
-        if (bi_should_serialize(message))
-                return BI_SLOT_SERIAL;
+   /* For now, make serialization is easy */
+   if (bi_should_serialize(message))
+      return BI_SLOT_SERIAL;
 
-        return 0;
+   return 0;
 }
 
 static uint64_t
 bi_read_mask(bi_instr *I, bool staging_only)
 {
-        uint64_t mask = 0;
+   uint64_t mask = 0;
 
-        if (staging_only && !bi_opcode_props[I->op].sr_read)
-                return mask;
+   if (staging_only && !bi_opcode_props[I->op].sr_read)
+      return mask;
 
-        bi_foreach_src(I, s) {
-                if (I->src[s].type == BI_INDEX_REGISTER) {
-                        unsigned reg = I->src[s].value;
-                        unsigned count = bi_count_read_registers(I, s);
+   bi_foreach_src(I, s) {
+      if (I->src[s].type == BI_INDEX_REGISTER) {
+         unsigned reg = I->src[s].value;
+         unsigned count = bi_count_read_registers(I, s);
 
-                        mask |= (BITFIELD64_MASK(count) << reg);
-                }
+         mask |= (BITFIELD64_MASK(count) << reg);
+      }
 
-                if (staging_only)
-                        break;
-        }
+      if (staging_only)
+         break;
+   }
 
-        return mask;
+   return mask;
 }
 
 static uint64_t
 bi_write_mask(bi_instr *I)
 {
-        uint64_t mask = 0;
+   uint64_t mask = 0;
 
-        bi_foreach_dest(I, d) {
-                if (bi_is_null(I->dest[d])) continue;
+   bi_foreach_dest(I, d) {
+      if (bi_is_null(I->dest[d]))
+         continue;
 
-                assert(I->dest[d].type == BI_INDEX_REGISTER);
+      assert(I->dest[d].type == BI_INDEX_REGISTER);
 
-                unsigned reg = I->dest[d].value;
-                unsigned count = bi_count_write_registers(I, d);
+      unsigned reg = I->dest[d].value;
+      unsigned count = bi_count_write_registers(I, d);
 
-                mask |= (BITFIELD64_MASK(count) << reg);
-        }
+      mask |= (BITFIELD64_MASK(count) << reg);
+   }
 
-        /* Instructions like AXCHG.i32 unconditionally both read and write
-         * staging registers. Even if we discard the result, the write still
-         * happens logically and needs to be included in our calculations.
-         * Obscurely, ATOM_CX is sr_write but can ignore the staging register in
-         * certain circumstances; this does not require consideration.
-         */
-        if (bi_opcode_props[I->op].sr_write && I->nr_dests && I->nr_srcs &&
-            bi_is_null(I->dest[0]) && !bi_is_null(I->src[0])) {
+   /* Instructions like AXCHG.i32 unconditionally both read and write
+    * staging registers. Even if we discard the result, the write still
+    * happens logically and needs to be included in our calculations.
+    * Obscurely, ATOM_CX is sr_write but can ignore the staging register in
+    * certain circumstances; this does not require consideration.
+    */
+   if (bi_opcode_props[I->op].sr_write && I->nr_dests && I->nr_srcs &&
+       bi_is_null(I->dest[0]) && !bi_is_null(I->src[0])) {
 
-                unsigned reg = I->src[0].value;
-                unsigned count = bi_count_write_registers(I, 0);
+      unsigned reg = I->src[0].value;
+      unsigned count = bi_count_write_registers(I, 0);
 
-                mask |= (BITFIELD64_MASK(count) << reg);
-        }
+      mask |= (BITFIELD64_MASK(count) << reg);
+   }
 
-        return mask;
+   return mask;
 }
 
 /* Update the scoreboard model to assign an instruction to a given slot */
@@ -170,140 +171,143 @@ bi_write_mask(bi_instr *I)
 static void
 bi_push_clause(struct bi_scoreboard_state *st, bi_clause *clause)
 {
-        bi_instr *I = clause->message;
-        unsigned slot = clause->scoreboard_id;
+   bi_instr *I = clause->message;
+   unsigned slot = clause->scoreboard_id;
 
-        if (!I)
-                return;
+   if (!I)
+      return;
 
-        st->read[slot] |= bi_read_mask(I, true);
+   st->read[slot] |= bi_read_mask(I, true);
 
-        if (bi_opcode_props[I->op].sr_write)
-                st->write[slot] |= bi_write_mask(I);
+   if (bi_opcode_props[I->op].sr_write)
+      st->write[slot] |= bi_write_mask(I);
 }
 
 /* Adds a dependency on each slot writing any specified register */
 
 static void
-bi_depend_on_writers(bi_clause *clause, struct bi_scoreboard_state *st, uint64_t regmask)
+bi_depend_on_writers(bi_clause *clause, struct bi_scoreboard_state *st,
+                     uint64_t regmask)
 {
-        for (unsigned slot = 0; slot < ARRAY_SIZE(st->write); ++slot) {
-                if (!(st->write[slot] & regmask))
-                        continue;
+   for (unsigned slot = 0; slot < ARRAY_SIZE(st->write); ++slot) {
+      if (!(st->write[slot] & regmask))
+         continue;
 
-                st->write[slot] = 0;
-                st->read[slot] = 0;
+      st->write[slot] = 0;
+      st->read[slot] = 0;
 
-                clause->dependencies |= BITFIELD_BIT(slot);
-        }
+      clause->dependencies |= BITFIELD_BIT(slot);
+   }
 }
 
 static void
-bi_set_staging_barrier(bi_clause *clause, struct bi_scoreboard_state *st, uint64_t regmask)
+bi_set_staging_barrier(bi_clause *clause, struct bi_scoreboard_state *st,
+                       uint64_t regmask)
 {
-        for (unsigned slot = 0; slot < ARRAY_SIZE(st->read); ++slot) {
-                if (!(st->read[slot] & regmask))
-                        continue;
+   for (unsigned slot = 0; slot < ARRAY_SIZE(st->read); ++slot) {
+      if (!(st->read[slot] & regmask))
+         continue;
 
-                st->read[slot] = 0;
-                clause->staging_barrier = true;
-        }
+      st->read[slot] = 0;
+      clause->staging_barrier = true;
+   }
 }
 
 /* Sets the dependencies for a given clause, updating the model */
 
 static void
-bi_set_dependencies(bi_block *block, bi_clause *clause, struct bi_scoreboard_state *st)
+bi_set_dependencies(bi_block *block, bi_clause *clause,
+                    struct bi_scoreboard_state *st)
 {
-        bi_foreach_instr_in_clause(block, clause, I) {
-                uint64_t read = bi_read_mask(I, false);
-                uint64_t written = bi_write_mask(I);
+   bi_foreach_instr_in_clause(block, clause, I) {
+      uint64_t read = bi_read_mask(I, false);
+      uint64_t written = bi_write_mask(I);
 
-                /* Read-after-write; write-after-write */
-                bi_depend_on_writers(clause, st, read | written);
+      /* Read-after-write; write-after-write */
+      bi_depend_on_writers(clause, st, read | written);
 
-                /* Write-after-read */
-                bi_set_staging_barrier(clause, st, written);
-        }
+      /* Write-after-read */
+      bi_set_staging_barrier(clause, st, written);
+   }
 
-        /* LD_VAR instructions must be serialized per-quad. Just always depend
-         * on any LD_VAR instructions. This isn't optimal, but doing better
-         * requires divergence-aware data flow analysis.
-         *
-         * Similarly, memory loads/stores need to be synchronized. For now,
-         * force them to be serialized. This is not optimal.
-         */
-        if (clause->message && bi_should_serialize(clause->message))
-                clause->dependencies |= BITFIELD_BIT(BI_SLOT_SERIAL);
+   /* LD_VAR instructions must be serialized per-quad. Just always depend
+    * on any LD_VAR instructions. This isn't optimal, but doing better
+    * requires divergence-aware data flow analysis.
+    *
+    * Similarly, memory loads/stores need to be synchronized. For now,
+    * force them to be serialized. This is not optimal.
+    */
+   if (clause->message && bi_should_serialize(clause->message))
+      clause->dependencies |= BITFIELD_BIT(BI_SLOT_SERIAL);
 
-        /* Barriers must wait on all slots to flush existing work. It might be
-         * possible to skip this with more information about the barrier. For
-         * now, be conservative.
-         */
-        if (clause->message && clause->message->op == BI_OPCODE_BARRIER)
-                clause->dependencies |= BITFIELD_MASK(BI_NUM_GENERAL_SLOTS);
+   /* Barriers must wait on all slots to flush existing work. It might be
+    * possible to skip this with more information about the barrier. For
+    * now, be conservative.
+    */
+   if (clause->message && clause->message->op == BI_OPCODE_BARRIER)
+      clause->dependencies |= BITFIELD_MASK(BI_NUM_GENERAL_SLOTS);
 }
 
 static bool
 scoreboard_block_update(bi_block *blk)
 {
-        bool progress = false;
+   bool progress = false;
 
-        /* pending_in[s] = sum { p in pred[s] } ( pending_out[p] ) */
-        bi_foreach_predecessor(blk, pred) {
-                for (unsigned i = 0; i < BI_NUM_SLOTS; ++i) {
-                        blk->scoreboard_in.read[i] |= (*pred)->scoreboard_out.read[i];
-                        blk->scoreboard_in.write[i] |= (*pred)->scoreboard_out.write[i];
-                }
-        }
+   /* pending_in[s] = sum { p in pred[s] } ( pending_out[p] ) */
+   bi_foreach_predecessor(blk, pred) {
+      for (unsigned i = 0; i < BI_NUM_SLOTS; ++i) {
+         blk->scoreboard_in.read[i] |= (*pred)->scoreboard_out.read[i];
+         blk->scoreboard_in.write[i] |= (*pred)->scoreboard_out.write[i];
+      }
+   }
 
-        struct bi_scoreboard_state state = blk->scoreboard_in;
+   struct bi_scoreboard_state state = blk->scoreboard_in;
 
-        /* Assign locally */
+   /* Assign locally */
 
-        bi_foreach_clause_in_block(blk, clause) {
-                bi_set_dependencies(blk, clause, &state);
-                bi_push_clause(&state, clause);
-        }
+   bi_foreach_clause_in_block(blk, clause) {
+      bi_set_dependencies(blk, clause, &state);
+      bi_push_clause(&state, clause);
+   }
 
-        /* To figure out progress, diff scoreboard_out */
+   /* To figure out progress, diff scoreboard_out */
 
-        for (unsigned i = 0; i < BI_NUM_SLOTS; ++i)
-                progress |= !!memcmp(&state, &blk->scoreboard_out, sizeof(state));
+   for (unsigned i = 0; i < BI_NUM_SLOTS; ++i)
+      progress |= !!memcmp(&state, &blk->scoreboard_out, sizeof(state));
 
-        blk->scoreboard_out = state;
+   blk->scoreboard_out = state;
 
-        return progress;
+   return progress;
 }
 
 void
 bi_assign_scoreboard(bi_context *ctx)
 {
-        u_worklist worklist;
-        bi_worklist_init(ctx, &worklist);
+   u_worklist worklist;
+   bi_worklist_init(ctx, &worklist);
 
-        /* First, assign slots. */
-        bi_foreach_block(ctx, block) {
-                bi_foreach_clause_in_block(block, clause) {
-                        if (clause->message) {
-                                unsigned slot = bi_choose_scoreboard_slot(clause->message);
-                                clause->scoreboard_id = slot;
-                        }
-                }
+   /* First, assign slots. */
+   bi_foreach_block(ctx, block) {
+      bi_foreach_clause_in_block(block, clause) {
+         if (clause->message) {
+            unsigned slot = bi_choose_scoreboard_slot(clause->message);
+            clause->scoreboard_id = slot;
+         }
+      }
 
-                bi_worklist_push_tail(&worklist, block);
-        }
+      bi_worklist_push_tail(&worklist, block);
+   }
 
-        /* Next, perform forward data flow analysis to calculate dependencies */
-        while (!u_worklist_is_empty(&worklist)) {
-                /* Pop from the front for forward analysis */
-                bi_block *blk = bi_worklist_pop_head(&worklist);
+   /* Next, perform forward data flow analysis to calculate dependencies */
+   while (!u_worklist_is_empty(&worklist)) {
+      /* Pop from the front for forward analysis */
+      bi_block *blk = bi_worklist_pop_head(&worklist);
 
-                if (scoreboard_block_update(blk)) {
-                        bi_foreach_successor(blk, succ)
-                                bi_worklist_push_tail(&worklist, succ);
-                }
-        }
+      if (scoreboard_block_update(blk)) {
+         bi_foreach_successor(blk, succ)
+            bi_worklist_push_tail(&worklist, succ);
+      }
+   }
 
-        u_worklist_fini(&worklist);
+   u_worklist_fini(&worklist);
 }
diff --git a/src/panfrost/bifrost/bi_test.h b/src/panfrost/bifrost/bi_test.h
index 4b54944c0c4..00247c10e25 100644
--- a/src/panfrost/bifrost/bi_test.h
+++ b/src/panfrost/bifrost/bi_test.h
@@ -27,38 +27,38 @@
 #ifndef __BI_TEST_H
 #define __BI_TEST_H
 
-#include <stdio.h>
 #include <inttypes.h>
+#include <stdio.h>
 #include "compiler.h"
 
 /* Helper to generate a bi_builder suitable for creating test instructions */
 static inline bi_block *
 bit_block(bi_context *ctx)
 {
-        bi_block *blk = rzalloc(ctx, bi_block);
+   bi_block *blk = rzalloc(ctx, bi_block);
 
-        util_dynarray_init(&blk->predecessors, blk);
-        list_addtail(&blk->link, &ctx->blocks);
-        list_inithead(&blk->instructions);
+   util_dynarray_init(&blk->predecessors, blk);
+   list_addtail(&blk->link, &ctx->blocks);
+   list_inithead(&blk->instructions);
 
-        blk->index = ctx->num_blocks++;
+   blk->index = ctx->num_blocks++;
 
-        return blk;
+   return blk;
 }
 
 static inline bi_builder *
 bit_builder(void *memctx)
 {
-        bi_context *ctx = rzalloc(memctx, bi_context);
-        list_inithead(&ctx->blocks);
-        ctx->inputs = rzalloc(memctx, struct panfrost_compile_inputs);
+   bi_context *ctx = rzalloc(memctx, bi_context);
+   list_inithead(&ctx->blocks);
+   ctx->inputs = rzalloc(memctx, struct panfrost_compile_inputs);
 
-        bi_block *blk = bit_block(ctx);
+   bi_block *blk = bit_block(ctx);
 
-        bi_builder *b = rzalloc(memctx, bi_builder);
-        b->shader = ctx;
-        b->cursor = bi_after_block(blk);
-        return b;
+   bi_builder *b = rzalloc(memctx, bi_builder);
+   b->shader = ctx;
+   b->cursor = bi_after_block(blk);
+   return b;
 }
 
 /* Helper to compare for logical equality of instructions. Need to skip over
@@ -69,14 +69,15 @@ bit_instr_equal(bi_instr *A, bi_instr *B)
 {
    size_t skip = sizeof(struct list_head) + 2 * sizeof(bi_index *);
 
-   if (memcmp((uint8_t *) A + skip, (uint8_t *) B + skip, sizeof(bi_instr) - skip))
-           return false;
+   if (memcmp((uint8_t *)A + skip, (uint8_t *)B + skip,
+              sizeof(bi_instr) - skip))
+      return false;
 
    if (memcmp(A->dest, B->dest, sizeof(bi_index) * A->nr_dests))
-           return false;
+      return false;
 
    if (memcmp(A->src, B->src, sizeof(bi_index) * A->nr_srcs))
-           return false;
+      return false;
 
    return true;
 }
@@ -87,8 +88,9 @@ bit_block_equal(bi_block *A, bi_block *B)
    if (list_length(&A->instructions) != list_length(&B->instructions))
       return false;
 
-   list_pair_for_each_entry(bi_instr, insA, insB,
-                            &A->instructions, &B->instructions, link) {
+   list_pair_for_each_entry(bi_instr, insA, insB, &A->instructions,
+                            &B->instructions, link)
+   {
       if (!bit_instr_equal(insA, insB))
          return false;
    }
@@ -102,8 +104,9 @@ bit_shader_equal(bi_context *A, bi_context *B)
    if (list_length(&A->blocks) != list_length(&B->blocks))
       return false;
 
-   list_pair_for_each_entry(bi_block, blockA, blockB,
-                            &A->blocks, &B->blocks, link) {
+   list_pair_for_each_entry(bi_block, blockA, blockB, &A->blocks, &B->blocks,
+                            link)
+   {
       if (!bit_block_equal(blockA, blockB))
          return false;
    }
@@ -111,30 +114,31 @@ bit_shader_equal(bi_context *A, bi_context *B)
    return true;
 }
 
-#define ASSERT_SHADER_EQUAL(A, B) \
-   if (!bit_shader_equal(A, B)) { \
-      ADD_FAILURE(); \
-      fprintf(stderr, "Pass produced unexpected results"); \
-      fprintf(stderr, "  Actual:\n"); \
-      bi_print_shader(A, stderr); \
-      fprintf(stderr, " Expected:\n"); \
-      bi_print_shader(B, stderr); \
-      fprintf(stderr, "\n"); \
-   } \
+#define ASSERT_SHADER_EQUAL(A, B)                                              \
+   if (!bit_shader_equal(A, B)) {                                              \
+      ADD_FAILURE();                                                           \
+      fprintf(stderr, "Pass produced unexpected results");                     \
+      fprintf(stderr, "  Actual:\n");                                          \
+      bi_print_shader(A, stderr);                                              \
+      fprintf(stderr, " Expected:\n");                                         \
+      bi_print_shader(B, stderr);                                              \
+      fprintf(stderr, "\n");                                                   \
+   }
 
-#define INSTRUCTION_CASE(instr, expected, pass) do { \
-   bi_builder *A = bit_builder(mem_ctx); \
-   bi_builder *B = bit_builder(mem_ctx); \
-   { \
-      bi_builder *b = A; \
-      instr; \
-   } \
-   { \
-      bi_builder *b = B; \
-      expected; \
-   } \
-   pass(A->shader); \
-   ASSERT_SHADER_EQUAL(A->shader, B->shader); \
-} while(0)
+#define INSTRUCTION_CASE(instr, expected, pass)                                \
+   do {                                                                        \
+      bi_builder *A = bit_builder(mem_ctx);                                    \
+      bi_builder *B = bit_builder(mem_ctx);                                    \
+      {                                                                        \
+         bi_builder *b = A;                                                    \
+         instr;                                                                \
+      }                                                                        \
+      {                                                                        \
+         bi_builder *b = B;                                                    \
+         expected;                                                             \
+      }                                                                        \
+      pass(A->shader);                                                         \
+      ASSERT_SHADER_EQUAL(A->shader, B->shader);                               \
+   } while (0)
 
 #endif
diff --git a/src/panfrost/bifrost/bi_validate.c b/src/panfrost/bifrost/bi_validate.c
index 807aff18531..05b9cfa0347 100644
--- a/src/panfrost/bifrost/bi_validate.c
+++ b/src/panfrost/bifrost/bi_validate.c
@@ -21,8 +21,8 @@
  * SOFTWARE.
  */
 
-#include "compiler.h"
 #include "util/u_memory.h"
+#include "compiler.h"
 
 /* Validatation doesn't make sense in release builds */
 #ifndef NDEBUG
@@ -35,21 +35,21 @@
 bool
 bi_validate_initialization(bi_context *ctx)
 {
-        bool success = true;
+   bool success = true;
 
-        /* Calculate the live set */
-        bi_block *entry = bi_entry_block(ctx);
-        bi_compute_liveness_ssa(ctx);
+   /* Calculate the live set */
+   bi_block *entry = bi_entry_block(ctx);
+   bi_compute_liveness_ssa(ctx);
 
-        /* Validate that the live set is indeed empty */
-        for (unsigned i = 0; i < ctx->ssa_alloc; ++i) {
-                if (BITSET_TEST(entry->ssa_live_in, i)) {
-                        fprintf(stderr, "%u\n", i);
-                        success = false;
-                }
-        }
+   /* Validate that the live set is indeed empty */
+   for (unsigned i = 0; i < ctx->ssa_alloc; ++i) {
+      if (BITSET_TEST(entry->ssa_live_in, i)) {
+         fprintf(stderr, "%u\n", i);
+         success = false;
+      }
+   }
 
-        return success;
+   return success;
 }
 
 /*
@@ -60,47 +60,46 @@ bi_validate_initialization(bi_context *ctx)
 static bool
 bi_validate_preload(bi_context *ctx)
 {
-        bool start = true;
-        uint64_t preloaded = 0;
+   bool start = true;
+   uint64_t preloaded = 0;
 
-        bi_foreach_block(ctx, block) {
-                bi_foreach_instr_in_block(block, I) {
-                        /* No instruction should have a register destination */
-                        bi_foreach_dest(I, d) {
-                                if (I->dest[d].type == BI_INDEX_REGISTER)
-                                        return false;
-                        }
+   bi_foreach_block(ctx, block) {
+      bi_foreach_instr_in_block(block, I) {
+         /* No instruction should have a register destination */
+         bi_foreach_dest(I, d) {
+            if (I->dest[d].type == BI_INDEX_REGISTER)
+               return false;
+         }
 
-                        /* Preloads are register moves at the start */
-                        bool is_preload =
-                                start && I->op == BI_OPCODE_MOV_I32 &&
-                                I->src[0].type == BI_INDEX_REGISTER;
+         /* Preloads are register moves at the start */
+         bool is_preload = start && I->op == BI_OPCODE_MOV_I32 &&
+                           I->src[0].type == BI_INDEX_REGISTER;
 
-                        /* After the first nonpreload, we're done preloading */
-                        start &= is_preload;
+         /* After the first nonpreload, we're done preloading */
+         start &= is_preload;
 
-                        /* Only preloads may have a register source */
-                        bi_foreach_src(I, s) {
-                                if (I->src[s].type == BI_INDEX_REGISTER && !is_preload)
-                                        return false;
-                        }
+         /* Only preloads may have a register source */
+         bi_foreach_src(I, s) {
+            if (I->src[s].type == BI_INDEX_REGISTER && !is_preload)
+               return false;
+         }
 
-                        /* Check uniqueness */
-                        if (is_preload) {
-                                unsigned r = I->src[0].value;
+         /* Check uniqueness */
+         if (is_preload) {
+            unsigned r = I->src[0].value;
 
-                                if (preloaded & BITFIELD64_BIT(r))
-                                        return false;
+            if (preloaded & BITFIELD64_BIT(r))
+               return false;
 
-                                preloaded |= BITFIELD64_BIT(r);
-                        }
-                }
+            preloaded |= BITFIELD64_BIT(r);
+         }
+      }
 
-                /* Only the first block may preload */
-                start = false;
-        }
+      /* Only the first block may preload */
+      start = false;
+   }
 
-        return true;
+   return true;
 }
 
 /*
@@ -111,38 +110,37 @@ bi_validate_preload(bi_context *ctx)
 static bool
 bi_validate_width(bi_context *ctx)
 {
-        bool succ = true;
-        uint8_t *width = calloc(ctx->ssa_alloc, sizeof(uint8_t));
+   bool succ = true;
+   uint8_t *width = calloc(ctx->ssa_alloc, sizeof(uint8_t));
 
-        bi_foreach_instr_global(ctx, I) {
-                bi_foreach_dest(I, d) {
-                        assert(bi_is_ssa(I->dest[d]));
+   bi_foreach_instr_global(ctx, I) {
+      bi_foreach_dest(I, d) {
+         assert(bi_is_ssa(I->dest[d]));
 
-                        unsigned v = I->dest[d].value;
-                        assert(width[v] == 0 && "broken SSA");
+         unsigned v = I->dest[d].value;
+         assert(width[v] == 0 && "broken SSA");
 
-                        width[v] = bi_count_write_registers(I, d);
-                }
-        }
+         width[v] = bi_count_write_registers(I, d);
+      }
+   }
 
-        bi_foreach_instr_global(ctx, I) {
-                bi_foreach_ssa_src(I, s) {
-                        unsigned v = I->src[s].value;
-                        unsigned n = bi_count_read_registers(I, s);
+   bi_foreach_instr_global(ctx, I) {
+      bi_foreach_ssa_src(I, s) {
+         unsigned v = I->src[s].value;
+         unsigned n = bi_count_read_registers(I, s);
 
-                        if (width[v] != n) {
-                                succ = false;
-                                fprintf(stderr,
-                                        "source %u, expected width %u, got width %u\n",
-                                        s, n, width[v]);
-                                bi_print_instr(I, stderr);
-                                fprintf(stderr, "\n");
-                        }
-                }
-        }
+         if (width[v] != n) {
+            succ = false;
+            fprintf(stderr, "source %u, expected width %u, got width %u\n", s,
+                    n, width[v]);
+            bi_print_instr(I, stderr);
+            fprintf(stderr, "\n");
+         }
+      }
+   }
 
-        free(width);
-        return succ;
+   free(width);
+   return succ;
 }
 
 /*
@@ -151,20 +149,20 @@ bi_validate_width(bi_context *ctx)
 static bool
 bi_validate_dest(bi_context *ctx)
 {
-        bool succ = true;
+   bool succ = true;
 
-        bi_foreach_instr_global(ctx, I) {
-                bi_foreach_dest(I, d) {
-                        if (bi_is_null(I->dest[d])) {
-                                succ = false;
-                                fprintf(stderr, "expected dest %u", d);
-                                bi_print_instr(I, stderr);
-                                fprintf(stderr, "\n");
-                        }
-                }
-        }
+   bi_foreach_instr_global(ctx, I) {
+      bi_foreach_dest(I, d) {
+         if (bi_is_null(I->dest[d])) {
+            succ = false;
+            fprintf(stderr, "expected dest %u", d);
+            bi_print_instr(I, stderr);
+            fprintf(stderr, "\n");
+         }
+      }
+   }
 
-        return succ;
+   return succ;
 }
 
 /*
@@ -173,57 +171,57 @@ bi_validate_dest(bi_context *ctx)
 static bool
 bi_validate_phi_ordering(bi_context *ctx)
 {
-        bi_foreach_block(ctx, block) {
-                bool start = true;
+   bi_foreach_block(ctx, block) {
+      bool start = true;
 
-                bi_foreach_instr_in_block(block, I) {
-                        if (start)
-                                start = I->op == BI_OPCODE_PHI;
-                        else if (I->op == BI_OPCODE_PHI)
-                                return false;
-                }
-        }
+      bi_foreach_instr_in_block(block, I) {
+         if (start)
+            start = I->op == BI_OPCODE_PHI;
+         else if (I->op == BI_OPCODE_PHI)
+            return false;
+      }
+   }
 
-        return true;
+   return true;
 }
 
 void
 bi_validate(bi_context *ctx, const char *after)
 {
-        bool fail = false;
+   bool fail = false;
 
-        if (bifrost_debug & BIFROST_DBG_NOVALIDATE)
-                return;
+   if (bifrost_debug & BIFROST_DBG_NOVALIDATE)
+      return;
 
-        if (!bi_validate_initialization(ctx)) {
-                fprintf(stderr, "Uninitialized data read after %s\n", after);
-                fail = true;
-        }
+   if (!bi_validate_initialization(ctx)) {
+      fprintf(stderr, "Uninitialized data read after %s\n", after);
+      fail = true;
+   }
 
-        if (!bi_validate_preload(ctx)) {
-                fprintf(stderr, "Unexpected preload after %s\n", after);
-                fail = true;
-        }
+   if (!bi_validate_preload(ctx)) {
+      fprintf(stderr, "Unexpected preload after %s\n", after);
+      fail = true;
+   }
 
-        if (!bi_validate_width(ctx)) {
-                fprintf(stderr, "Unexpected vector with after %s\n", after);
-                fail = true;
-        }
+   if (!bi_validate_width(ctx)) {
+      fprintf(stderr, "Unexpected vector with after %s\n", after);
+      fail = true;
+   }
 
-        if (!bi_validate_dest(ctx)) {
-                fprintf(stderr, "Unexpected source/dest after %s\n", after);
-                fail = true;
-        }
+   if (!bi_validate_dest(ctx)) {
+      fprintf(stderr, "Unexpected source/dest after %s\n", after);
+      fail = true;
+   }
 
-        if (!bi_validate_phi_ordering(ctx)) {
-                fprintf(stderr, "Unexpected phi ordering after %s\n", after);
-                fail = true;
-        }
+   if (!bi_validate_phi_ordering(ctx)) {
+      fprintf(stderr, "Unexpected phi ordering after %s\n", after);
+      fail = true;
+   }
 
-        if (fail) {
-                bi_print_shader(ctx, stderr);
-                exit(1);
-        }
+   if (fail) {
+      bi_print_shader(ctx, stderr);
+      exit(1);
+   }
 }
 
 #endif /* NDEBUG */
diff --git a/src/panfrost/bifrost/bifrost.h b/src/panfrost/bifrost/bifrost.h
index b5a9b7e49ab..2fa43f368b1 100644
--- a/src/panfrost/bifrost/bifrost.h
+++ b/src/panfrost/bifrost/bifrost.h
@@ -26,63 +26,63 @@
 #ifndef __bifrost_h__
 #define __bifrost_h__
 
-#include <stdint.h>
-#include <stdbool.h>
-#include <string.h>
 #include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#define BIFROST_DBG_MSGS        0x0001
-#define BIFROST_DBG_SHADERS     0x0002
-#define BIFROST_DBG_SHADERDB    0x0004
-#define BIFROST_DBG_VERBOSE     0x0008
-#define BIFROST_DBG_INTERNAL    0x0010
-#define BIFROST_DBG_NOSCHED     0x0020
-#define BIFROST_DBG_INORDER     0x0040
-#define BIFROST_DBG_NOVALIDATE  0x0080
-#define BIFROST_DBG_NOOPT       0x0100
-#define BIFROST_DBG_NOIDVS      0x0200
-#define BIFROST_DBG_NOSB        0x0400
-#define BIFROST_DBG_NOPRELOAD   0x0800
-#define BIFROST_DBG_SPILL       0x1000
-#define BIFROST_DBG_NOPSCHED    0x2000
+#define BIFROST_DBG_MSGS       0x0001
+#define BIFROST_DBG_SHADERS    0x0002
+#define BIFROST_DBG_SHADERDB   0x0004
+#define BIFROST_DBG_VERBOSE    0x0008
+#define BIFROST_DBG_INTERNAL   0x0010
+#define BIFROST_DBG_NOSCHED    0x0020
+#define BIFROST_DBG_INORDER    0x0040
+#define BIFROST_DBG_NOVALIDATE 0x0080
+#define BIFROST_DBG_NOOPT      0x0100
+#define BIFROST_DBG_NOIDVS     0x0200
+#define BIFROST_DBG_NOSB       0x0400
+#define BIFROST_DBG_NOPRELOAD  0x0800
+#define BIFROST_DBG_SPILL      0x1000
+#define BIFROST_DBG_NOPSCHED   0x2000
 
 extern int bifrost_debug;
 
 enum bifrost_message_type {
-        BIFROST_MESSAGE_NONE       = 0,
-        BIFROST_MESSAGE_VARYING    = 1,
-        BIFROST_MESSAGE_ATTRIBUTE  = 2,
-        BIFROST_MESSAGE_TEX        = 3,
-        BIFROST_MESSAGE_VARTEX     = 4,
-        BIFROST_MESSAGE_LOAD       = 5,
-        BIFROST_MESSAGE_STORE      = 6,
-        BIFROST_MESSAGE_ATOMIC     = 7,
-        BIFROST_MESSAGE_BARRIER    = 8,
-        BIFROST_MESSAGE_BLEND      = 9,
-        BIFROST_MESSAGE_TILE       = 10,
-        /* type 11 reserved */
-        BIFROST_MESSAGE_Z_STENCIL  = 12,
-        BIFROST_MESSAGE_ATEST      = 13,
-        BIFROST_MESSAGE_JOB        = 14,
-        BIFROST_MESSAGE_64BIT      = 15
+   BIFROST_MESSAGE_NONE = 0,
+   BIFROST_MESSAGE_VARYING = 1,
+   BIFROST_MESSAGE_ATTRIBUTE = 2,
+   BIFROST_MESSAGE_TEX = 3,
+   BIFROST_MESSAGE_VARTEX = 4,
+   BIFROST_MESSAGE_LOAD = 5,
+   BIFROST_MESSAGE_STORE = 6,
+   BIFROST_MESSAGE_ATOMIC = 7,
+   BIFROST_MESSAGE_BARRIER = 8,
+   BIFROST_MESSAGE_BLEND = 9,
+   BIFROST_MESSAGE_TILE = 10,
+   /* type 11 reserved */
+   BIFROST_MESSAGE_Z_STENCIL = 12,
+   BIFROST_MESSAGE_ATEST = 13,
+   BIFROST_MESSAGE_JOB = 14,
+   BIFROST_MESSAGE_64BIT = 15
 };
 
 enum bifrost_ftz {
-        BIFROST_FTZ_DISABLE = 0,
-        BIFROST_FTZ_DX11 = 1,
-        BIFROST_FTZ_ALWAYS = 2,
-        BIFROST_FTZ_ABRUPT = 3
+   BIFROST_FTZ_DISABLE = 0,
+   BIFROST_FTZ_DX11 = 1,
+   BIFROST_FTZ_ALWAYS = 2,
+   BIFROST_FTZ_ABRUPT = 3
 };
 
 enum bifrost_exceptions {
-        BIFROST_EXCEPTIONS_ENABLED = 0,
-        BIFROST_EXCEPTIONS_DISABLED = 1,
-        BIFROST_EXCEPTIONS_PRECISE_DIVISION = 2,
-        BIFROST_EXCEPTIONS_PRECISE_SQRT = 3,
+   BIFROST_EXCEPTIONS_ENABLED = 0,
+   BIFROST_EXCEPTIONS_DISABLED = 1,
+   BIFROST_EXCEPTIONS_PRECISE_DIVISION = 2,
+   BIFROST_EXCEPTIONS_PRECISE_SQRT = 3,
 };
 
 /* Describes clause flow control, with respect to control flow and branch
@@ -102,182 +102,182 @@ enum bifrost_exceptions {
  */
 
 enum bifrost_flow {
-        /* End-of-shader */
-        BIFROST_FLOW_END = 0,
+   /* End-of-shader */
+   BIFROST_FLOW_END = 0,
 
-        /* Non back-to-back, PC-encoded reconvergence */
-        BIFROST_FLOW_NBTB_PC = 1,
+   /* Non back-to-back, PC-encoded reconvergence */
+   BIFROST_FLOW_NBTB_PC = 1,
 
-        /* Non back-to-back, unconditional reconvergence */
-        BIFROST_FLOW_NBTB_UNCONDITIONAL = 2,
+   /* Non back-to-back, unconditional reconvergence */
+   BIFROST_FLOW_NBTB_UNCONDITIONAL = 2,
 
-        /* Non back-to-back, no reconvergence */
-        BIFROST_FLOW_NBTB = 3,
+   /* Non back-to-back, no reconvergence */
+   BIFROST_FLOW_NBTB = 3,
 
-        /* Back-to-back, unconditional reconvergence */
-        BIFROST_FLOW_BTB_UNCONDITIONAL = 4,
+   /* Back-to-back, unconditional reconvergence */
+   BIFROST_FLOW_BTB_UNCONDITIONAL = 4,
 
-        /* Back-to-back, no reconvergence */
-        BIFROST_FLOW_BTB_NONE = 5,
+   /* Back-to-back, no reconvergence */
+   BIFROST_FLOW_BTB_NONE = 5,
 
-        /* Write elision, unconditional reconvergence */
-        BIFROST_FLOW_WE_UNCONDITIONAL = 6,
+   /* Write elision, unconditional reconvergence */
+   BIFROST_FLOW_WE_UNCONDITIONAL = 6,
 
-        /* Write elision, no reconvergence */
-        BIFROST_FLOW_WE = 7,
+   /* Write elision, no reconvergence */
+   BIFROST_FLOW_WE = 7,
 };
 
 enum bifrost_slot {
-        /* 0-5 are general purpose */
-        BIFROST_SLOT_ELDEST_DEPTH = 6,
-        BIFROST_SLOT_ELDEST_COLOUR = 7,
+   /* 0-5 are general purpose */
+   BIFROST_SLOT_ELDEST_DEPTH = 6,
+   BIFROST_SLOT_ELDEST_COLOUR = 7,
 };
 
 struct bifrost_header {
-        /* Reserved */
-        unsigned zero1 : 5;
+   /* Reserved */
+   unsigned zero1 : 5;
 
-        /* Flush-to-zero mode, leave zero for GL */
-        enum bifrost_ftz flush_to_zero : 2;
+   /* Flush-to-zero mode, leave zero for GL */
+   enum bifrost_ftz flush_to_zero : 2;
 
-        /* Convert any infinite result of any floating-point operation to the
-         * biggest representable number */
-        unsigned suppress_inf: 1;
+   /* Convert any infinite result of any floating-point operation to the
+    * biggest representable number */
+   unsigned suppress_inf : 1;
 
-        /* Convert NaN to +0.0 */
-        unsigned suppress_nan : 1;
+   /* Convert NaN to +0.0 */
+   unsigned suppress_nan : 1;
 
-        /* Floating-point excception handling mode */
-        enum bifrost_exceptions float_exceptions : 2;
+   /* Floating-point excception handling mode */
+   enum bifrost_exceptions float_exceptions : 2;
 
-        /* Enum describing the flow control, which matters for handling
-         * divergence and reconvergence efficiently */
-        enum bifrost_flow flow_control : 3;
+   /* Enum describing the flow control, which matters for handling
+    * divergence and reconvergence efficiently */
+   enum bifrost_flow flow_control : 3;
 
-        /* Reserved */
-        unsigned zero2 : 1;
+   /* Reserved */
+   unsigned zero2 : 1;
 
-        /* Terminate discarded threads, rather than continuing execution. Set
-         * for fragment shaders for standard GL behaviour of DISCARD. Also in a
-         * fragment shader, this disables helper invocations, so cannot be used
-         * in a shader that requires derivatives or texture LOD computation */
-        unsigned terminate_discarded_threads : 1;
+   /* Terminate discarded threads, rather than continuing execution. Set
+    * for fragment shaders for standard GL behaviour of DISCARD. Also in a
+    * fragment shader, this disables helper invocations, so cannot be used
+    * in a shader that requires derivatives or texture LOD computation */
+   unsigned terminate_discarded_threads : 1;
 
-        /* If set, the hardware may prefetch the next clause. If false, the
-         * hardware may not. Clear for unconditional branches. */
-        unsigned next_clause_prefetch : 1;
+   /* If set, the hardware may prefetch the next clause. If false, the
+    * hardware may not. Clear for unconditional branches. */
+   unsigned next_clause_prefetch : 1;
 
-        /* If set, a barrier will be inserted after the clause waiting for all
-         * message passing instructions to read their staging registers, such
-         * that it is safe for the next clause to write them. */
-        unsigned staging_barrier: 1;
-        unsigned staging_register : 6;
+   /* If set, a barrier will be inserted after the clause waiting for all
+    * message passing instructions to read their staging registers, such
+    * that it is safe for the next clause to write them. */
+   unsigned staging_barrier  : 1;
+   unsigned staging_register : 6;
 
-        /* Slots to wait on and slot to be used for message passing
-         * instructions respectively */
-        unsigned dependency_wait : 8;
-        unsigned dependency_slot : 3;
+   /* Slots to wait on and slot to be used for message passing
+    * instructions respectively */
+   unsigned dependency_wait : 8;
+   unsigned dependency_slot : 3;
 
-        enum bifrost_message_type message_type : 5;
-        enum bifrost_message_type next_message_type : 5;
+   enum bifrost_message_type message_type      : 5;
+   enum bifrost_message_type next_message_type : 5;
 } __attribute__((packed));
 
 enum bifrost_packed_src {
-        BIFROST_SRC_PORT0    = 0,
-        BIFROST_SRC_PORT1    = 1,
-        BIFROST_SRC_PORT2    = 2,
-        BIFROST_SRC_STAGE    = 3,
-        BIFROST_SRC_FAU_LO   = 4,
-        BIFROST_SRC_FAU_HI   = 5,
-        BIFROST_SRC_PASS_FMA = 6,
-        BIFROST_SRC_PASS_ADD = 7,
+   BIFROST_SRC_PORT0 = 0,
+   BIFROST_SRC_PORT1 = 1,
+   BIFROST_SRC_PORT2 = 2,
+   BIFROST_SRC_STAGE = 3,
+   BIFROST_SRC_FAU_LO = 4,
+   BIFROST_SRC_FAU_HI = 5,
+   BIFROST_SRC_PASS_FMA = 6,
+   BIFROST_SRC_PASS_ADD = 7,
 };
 
 struct bifrost_fma_inst {
-        unsigned src0 : 3;
-        unsigned op   : 20;
+   unsigned src0 : 3;
+   unsigned op   : 20;
 } __attribute__((packed));
 
 struct bifrost_add_inst {
-        unsigned src0 : 3;
-        unsigned op   : 17;
+   unsigned src0 : 3;
+   unsigned op   : 17;
 } __attribute__((packed));
 
 enum branch_bit_size {
-        BR_SIZE_32 = 0,
-        BR_SIZE_16XX = 1,
-        BR_SIZE_16YY = 2,
-        // For the above combinations of bitsize and location, an extra bit is
-        // encoded via comparing the sources. The only possible source of ambiguity
-        // would be if the sources were the same, but then the branch condition
-        // would be always true or always false anyways, so we can ignore it. But
-        // this no longer works when comparing the y component to the x component,
-        // since it's valid to compare the y component of a source against its own
-        // x component. Instead, the extra bit is encoded via an extra bitsize.
-        BR_SIZE_16YX0 = 3,
-        BR_SIZE_16YX1 = 4,
-        BR_SIZE_32_AND_16X = 5,
-        BR_SIZE_32_AND_16Y = 6,
-        // Used for comparisons with zero and always-true, see below. I think this
-        // only works for integer comparisons.
-        BR_SIZE_ZERO = 7,
+   BR_SIZE_32 = 0,
+   BR_SIZE_16XX = 1,
+   BR_SIZE_16YY = 2,
+   // For the above combinations of bitsize and location, an extra bit is
+   // encoded via comparing the sources. The only possible source of ambiguity
+   // would be if the sources were the same, but then the branch condition
+   // would be always true or always false anyways, so we can ignore it. But
+   // this no longer works when comparing the y component to the x component,
+   // since it's valid to compare the y component of a source against its own
+   // x component. Instead, the extra bit is encoded via an extra bitsize.
+   BR_SIZE_16YX0 = 3,
+   BR_SIZE_16YX1 = 4,
+   BR_SIZE_32_AND_16X = 5,
+   BR_SIZE_32_AND_16Y = 6,
+   // Used for comparisons with zero and always-true, see below. I think this
+   // only works for integer comparisons.
+   BR_SIZE_ZERO = 7,
 };
 
 struct bifrost_regs {
-        unsigned fau_idx : 8;
-        unsigned reg3 : 6;
-        unsigned reg2 : 6;
-        unsigned reg0 : 5;
-        unsigned reg1 : 6;
-        unsigned ctrl : 4;
+   unsigned fau_idx : 8;
+   unsigned reg3    : 6;
+   unsigned reg2    : 6;
+   unsigned reg0    : 5;
+   unsigned reg1    : 6;
+   unsigned ctrl    : 4;
 } __attribute__((packed));
 
-#define BIFROST_FMTC_CONSTANTS       0b0011
-#define BIFROST_FMTC_FINAL           0b0111
+#define BIFROST_FMTC_CONSTANTS 0b0011
+#define BIFROST_FMTC_FINAL     0b0111
 
 struct bifrost_fmt_constant {
-        unsigned pos : 4;
-        unsigned tag : 4;
-        uint64_t imm_1 : 60;
-        uint64_t imm_2 : 60;
+   unsigned pos   : 4;
+   unsigned tag   : 4;
+   uint64_t imm_1 : 60;
+   uint64_t imm_2 : 60;
 } __attribute__((packed));
 
 /* Clause formats, encoded in a table */
 
 enum bi_clause_subword {
-        /* Literal 3-bit values */
-        BI_CLAUSE_SUBWORD_LITERAL_0 = 0,
-        /* etc */
-        BI_CLAUSE_SUBWORD_LITERAL_7 = 7,
+   /* Literal 3-bit values */
+   BI_CLAUSE_SUBWORD_LITERAL_0 = 0,
+   /* etc */
+   BI_CLAUSE_SUBWORD_LITERAL_7 = 7,
 
-        /* The value of the corresponding tuple in the corresponding bits */
-        BI_CLAUSE_SUBWORD_TUPLE_0 = 8,
-        /* etc */
-        BI_CLAUSE_SUBWORD_TUPLE_7 = 15,
+   /* The value of the corresponding tuple in the corresponding bits */
+   BI_CLAUSE_SUBWORD_TUPLE_0 = 8,
+   /* etc */
+   BI_CLAUSE_SUBWORD_TUPLE_7 = 15,
 
-        /* Clause header */
-        BI_CLAUSE_SUBWORD_HEADER = 16,
+   /* Clause header */
+   BI_CLAUSE_SUBWORD_HEADER = 16,
 
-        /* Leave zero, but semantically distinct from literal 0 */
-        BI_CLAUSE_SUBWORD_RESERVED = 17,
+   /* Leave zero, but semantically distinct from literal 0 */
+   BI_CLAUSE_SUBWORD_RESERVED = 17,
 
-        /* Embedded constant 0 */
-        BI_CLAUSE_SUBWORD_CONSTANT = 18,
+   /* Embedded constant 0 */
+   BI_CLAUSE_SUBWORD_CONSTANT = 18,
 
-        /* M bits controlling modifier for the constant */
-        BI_CLAUSE_SUBWORD_M = 19,
+   /* M bits controlling modifier for the constant */
+   BI_CLAUSE_SUBWORD_M = 19,
 
-        /* Z bit: 1 to begin encoding constants, 0 to terminate the clause */
-        BI_CLAUSE_SUBWORD_Z = 20,
+   /* Z bit: 1 to begin encoding constants, 0 to terminate the clause */
+   BI_CLAUSE_SUBWORD_Z = 20,
 
-        /* Upper 3-bits of a given tuple and zero extended */
-        BI_CLAUSE_SUBWORD_UPPER_0 = 32,
-        /* etc */
-        BI_CLAUSE_SUBWORD_UPPER_7 = BI_CLAUSE_SUBWORD_UPPER_0 + 7,
+   /* Upper 3-bits of a given tuple and zero extended */
+   BI_CLAUSE_SUBWORD_UPPER_0 = 32,
+   /* etc */
+   BI_CLAUSE_SUBWORD_UPPER_7 = BI_CLAUSE_SUBWORD_UPPER_0 + 7,
 
-        /* Upper 3-bits of two tuples, concatenated and zero-extended */
-        BI_CLAUSE_SUBWORD_UPPER_23 = BI_CLAUSE_SUBWORD_UPPER_0 + 23,
-        BI_CLAUSE_SUBWORD_UPPER_56 = BI_CLAUSE_SUBWORD_UPPER_0 + 56,
+   /* Upper 3-bits of two tuples, concatenated and zero-extended */
+   BI_CLAUSE_SUBWORD_UPPER_23 = BI_CLAUSE_SUBWORD_UPPER_0 + 23,
+   BI_CLAUSE_SUBWORD_UPPER_56 = BI_CLAUSE_SUBWORD_UPPER_0 + 56,
 };
 
 #define L(x) ((enum bi_clause_subword)(BI_CLAUSE_SUBWORD_LITERAL_0 + x))
@@ -290,15 +290,15 @@ enum bi_clause_subword {
 #define R    BI_CLAUSE_SUBWORD_RESERVED
 
 struct bi_clause_format {
-        unsigned format; /* format number */
-        unsigned pos; /* index in the clause */
-        enum bi_clause_subword tag_1; /* 2-bits */
-        enum bi_clause_subword tag_2; /* 3-bits */
-        enum bi_clause_subword tag_3; /* 3-bits */
-        enum bi_clause_subword s0_s3; /* 60 bits */
-        enum bi_clause_subword s4; /* 15 bits */
-        enum bi_clause_subword s5_s6; /* 30 bits */
-        enum bi_clause_subword s7; /* 15 bits */
+   unsigned format;              /* format number */
+   unsigned pos;                 /* index in the clause */
+   enum bi_clause_subword tag_1; /* 2-bits */
+   enum bi_clause_subword tag_2; /* 3-bits */
+   enum bi_clause_subword tag_3; /* 3-bits */
+   enum bi_clause_subword s0_s3; /* 60 bits */
+   enum bi_clause_subword s4;    /* 15 bits */
+   enum bi_clause_subword s5_s6; /* 30 bits */
+   enum bi_clause_subword s7;    /* 15 bits */
 };
 
 /* clang-format off */
@@ -341,46 +341,46 @@ static const struct bi_clause_format bi_clause_formats[] = {
  * set (and ignored) as a placeholder to differentiate from reserved.
  */
 enum bifrost_reg_mode {
-        BIFROST_R_WL_FMA  = 1,
-        BIFROST_R_WH_FMA  = 2,
-        BIFROST_R_W_FMA   = 3,
-        BIFROST_R_WL_ADD  = 4,
-        BIFROST_R_WH_ADD  = 5,
-        BIFROST_R_W_ADD   = 6,
-        BIFROST_WL_WL_ADD = 7,
-        BIFROST_WL_WH_ADD = 8,
-        BIFROST_WL_W_ADD  = 9,
-        BIFROST_WH_WL_ADD = 10,
-        BIFROST_WH_WH_ADD = 11,
-        BIFROST_WH_W_ADD  = 12,
-        BIFROST_W_WL_ADD  = 13,
-        BIFROST_W_WH_ADD  = 14,
-        BIFROST_W_W_ADD   = 15,
-        BIFROST_IDLE_1    = 16,
-        BIFROST_I_W_FMA   = 17,
-        BIFROST_I_WL_FMA  = 18,
-        BIFROST_I_WH_FMA  = 19,
-        BIFROST_R_I       = 20,
-        BIFROST_I_W_ADD   = 21,
-        BIFROST_I_WL_ADD  = 22,
-        BIFROST_I_WH_ADD  = 23,
-        BIFROST_WL_WH_MIX = 24,
-        BIFROST_WH_WL_MIX = 26,
-        BIFROST_IDLE      = 27,
+   BIFROST_R_WL_FMA = 1,
+   BIFROST_R_WH_FMA = 2,
+   BIFROST_R_W_FMA = 3,
+   BIFROST_R_WL_ADD = 4,
+   BIFROST_R_WH_ADD = 5,
+   BIFROST_R_W_ADD = 6,
+   BIFROST_WL_WL_ADD = 7,
+   BIFROST_WL_WH_ADD = 8,
+   BIFROST_WL_W_ADD = 9,
+   BIFROST_WH_WL_ADD = 10,
+   BIFROST_WH_WH_ADD = 11,
+   BIFROST_WH_W_ADD = 12,
+   BIFROST_W_WL_ADD = 13,
+   BIFROST_W_WH_ADD = 14,
+   BIFROST_W_W_ADD = 15,
+   BIFROST_IDLE_1 = 16,
+   BIFROST_I_W_FMA = 17,
+   BIFROST_I_WL_FMA = 18,
+   BIFROST_I_WH_FMA = 19,
+   BIFROST_R_I = 20,
+   BIFROST_I_W_ADD = 21,
+   BIFROST_I_WL_ADD = 22,
+   BIFROST_I_WH_ADD = 23,
+   BIFROST_WL_WH_MIX = 24,
+   BIFROST_WH_WL_MIX = 26,
+   BIFROST_IDLE = 27,
 };
 
 enum bifrost_reg_op {
-        BIFROST_OP_IDLE = 0,
-        BIFROST_OP_READ = 1,
-        BIFROST_OP_WRITE = 2,
-        BIFROST_OP_WRITE_LO = 3,
-        BIFROST_OP_WRITE_HI = 4,
+   BIFROST_OP_IDLE = 0,
+   BIFROST_OP_READ = 1,
+   BIFROST_OP_WRITE = 2,
+   BIFROST_OP_WRITE_LO = 3,
+   BIFROST_OP_WRITE_HI = 4,
 };
 
 struct bifrost_reg_ctrl_23 {
-        enum bifrost_reg_op slot2;
-        enum bifrost_reg_op slot3;
-        bool slot3_fma;
+   enum bifrost_reg_op slot2;
+   enum bifrost_reg_op slot3;
+   bool slot3_fma;
 };
 
 /* clang-format off */
@@ -420,201 +420,201 @@ static const struct bifrost_reg_ctrl_23 bifrost_reg_ctrl_lut[32] = {
  * compiler and stored as a constant */
 
 enum bifrost_texture_operation_mode {
-        /* Dual texturing */
-        BIFROST_TEXTURE_OPERATION_DUAL = 1,
+   /* Dual texturing */
+   BIFROST_TEXTURE_OPERATION_DUAL = 1,
 
-        /* Single texturing */
-        BIFROST_TEXTURE_OPERATION_SINGLE = 3,
+   /* Single texturing */
+   BIFROST_TEXTURE_OPERATION_SINGLE = 3,
 };
 
 enum bifrost_index {
-        /* Both texture/sampler index immediate */
-        BIFROST_INDEX_IMMEDIATE_SHARED = 0,
+   /* Both texture/sampler index immediate */
+   BIFROST_INDEX_IMMEDIATE_SHARED = 0,
 
-        /* Sampler index immediate, texture index from staging */
-        BIFROST_INDEX_IMMEDIATE_SAMPLER = 1,
+   /* Sampler index immediate, texture index from staging */
+   BIFROST_INDEX_IMMEDIATE_SAMPLER = 1,
 
-        /* Texture index immediate, sampler index from staging */
-        BIFROST_INDEX_IMMEDIATE_TEXTURE = 2,
+   /* Texture index immediate, sampler index from staging */
+   BIFROST_INDEX_IMMEDIATE_TEXTURE = 2,
 
-        /* Both indices from (separate) staging registers */
-        BIFROST_INDEX_REGISTER = 3,
+   /* Both indices from (separate) staging registers */
+   BIFROST_INDEX_REGISTER = 3,
 };
 
 enum bifrost_tex_op {
-        /* Given explicit derivatives, compute a gradient descriptor */
-        BIFROST_TEX_OP_GRDESC_DER = 4,
+   /* Given explicit derivatives, compute a gradient descriptor */
+   BIFROST_TEX_OP_GRDESC_DER = 4,
 
-        /* Given implicit derivatives (texture coordinates in a fragment
-         * shader), compute a gradient descriptor */
-        BIFROST_TEX_OP_GRDESC = 5,
+   /* Given implicit derivatives (texture coordinates in a fragment
+    * shader), compute a gradient descriptor */
+   BIFROST_TEX_OP_GRDESC = 5,
 
-        /* Fetch a texel. Takes a staging register with LOD level / face index
-         * packed 16:16 */
-        BIFROST_TEX_OP_FETCH = 6,
+   /* Fetch a texel. Takes a staging register with LOD level / face index
+    * packed 16:16 */
+   BIFROST_TEX_OP_FETCH = 6,
 
-        /* Filtered texture */
-        BIFROST_TEX_OP_TEX = 7,
+   /* Filtered texture */
+   BIFROST_TEX_OP_TEX = 7,
 };
 
 enum bifrost_lod_mode {
-        /* Takes two staging registers forming a 64-bit gradient descriptor
-         * (computed by a previous GRDESC or GRDESC_DER operation) */
-        BIFROST_LOD_MODE_GRDESC = 3,
+   /* Takes two staging registers forming a 64-bit gradient descriptor
+    * (computed by a previous GRDESC or GRDESC_DER operation) */
+   BIFROST_LOD_MODE_GRDESC = 3,
 
-        /* Take a staging register with 8:8 fixed-point in bottom 16-bits
-         * specifying an explicit LOD */
-        BIFROST_LOD_MODE_EXPLICIT = 4,
+   /* Take a staging register with 8:8 fixed-point in bottom 16-bits
+    * specifying an explicit LOD */
+   BIFROST_LOD_MODE_EXPLICIT = 4,
 
-        /* Takes a staging register with bottom 16-bits as 8:8 fixed-point LOD
-         * bias and top 16-bit as 8:8 fixed-point lower bound (generally left
-         * zero), added and clamped to a computed LOD */
-        BIFROST_LOD_MODE_BIAS = 5,
+   /* Takes a staging register with bottom 16-bits as 8:8 fixed-point LOD
+    * bias and top 16-bit as 8:8 fixed-point lower bound (generally left
+    * zero), added and clamped to a computed LOD */
+   BIFROST_LOD_MODE_BIAS = 5,
 
-        /* Set LOD to zero */
-        BIFROST_LOD_MODE_ZERO = 6,
+   /* Set LOD to zero */
+   BIFROST_LOD_MODE_ZERO = 6,
 
-        /* Compute LOD */
-        BIFROST_LOD_MODE_COMPUTE = 7,
+   /* Compute LOD */
+   BIFROST_LOD_MODE_COMPUTE = 7,
 };
 
 enum bifrost_texture_format {
-        /* 16-bit floating point, with optional clamping */
-        BIFROST_TEXTURE_FORMAT_F16 = 0,
-        BIFROST_TEXTURE_FORMAT_F16_POS = 1,
-        BIFROST_TEXTURE_FORMAT_F16_PM1 = 2,
-        BIFROST_TEXTURE_FORMAT_F16_1 = 3,
+   /* 16-bit floating point, with optional clamping */
+   BIFROST_TEXTURE_FORMAT_F16 = 0,
+   BIFROST_TEXTURE_FORMAT_F16_POS = 1,
+   BIFROST_TEXTURE_FORMAT_F16_PM1 = 2,
+   BIFROST_TEXTURE_FORMAT_F16_1 = 3,
 
-        /* 32-bit floating point, with optional clamping */
-        BIFROST_TEXTURE_FORMAT_F32 = 4,
-        BIFROST_TEXTURE_FORMAT_F32_POS = 5,
-        BIFROST_TEXTURE_FORMAT_F32_PM1 = 6,
-        BIFROST_TEXTURE_FORMAT_F32_1 = 7,
+   /* 32-bit floating point, with optional clamping */
+   BIFROST_TEXTURE_FORMAT_F32 = 4,
+   BIFROST_TEXTURE_FORMAT_F32_POS = 5,
+   BIFROST_TEXTURE_FORMAT_F32_PM1 = 6,
+   BIFROST_TEXTURE_FORMAT_F32_1 = 7,
 };
 
 enum bifrost_texture_format_full {
-        /* Transclude bifrost_texture_format from above */
+   /* Transclude bifrost_texture_format from above */
 
-        /* Integers, unclamped */
-        BIFROST_TEXTURE_FORMAT_U16 = 12,
-        BIFROST_TEXTURE_FORMAT_S16 = 13,
-        BIFROST_TEXTURE_FORMAT_U32 = 14,
-        BIFROST_TEXTURE_FORMAT_S32 = 15,
+   /* Integers, unclamped */
+   BIFROST_TEXTURE_FORMAT_U16 = 12,
+   BIFROST_TEXTURE_FORMAT_S16 = 13,
+   BIFROST_TEXTURE_FORMAT_U32 = 14,
+   BIFROST_TEXTURE_FORMAT_S32 = 15,
 };
 
 enum bifrost_texture_fetch {
-        /* Default texelFetch */
-        BIFROST_TEXTURE_FETCH_TEXEL = 1,
+   /* Default texelFetch */
+   BIFROST_TEXTURE_FETCH_TEXEL = 1,
 
-        /* Deprecated, fetches 4x U32 of a U8 x 4 texture. Do not use. */
-        BIFROST_TEXTURE_FETCH_GATHER4_RGBA = 3,
+   /* Deprecated, fetches 4x U32 of a U8 x 4 texture. Do not use. */
+   BIFROST_TEXTURE_FETCH_GATHER4_RGBA = 3,
 
-        /* Gathers */
-        BIFROST_TEXTURE_FETCH_GATHER4_R = 4,
-        BIFROST_TEXTURE_FETCH_GATHER4_G = 5,
-        BIFROST_TEXTURE_FETCH_GATHER4_B = 6,
-        BIFROST_TEXTURE_FETCH_GATHER4_A = 7
+   /* Gathers */
+   BIFROST_TEXTURE_FETCH_GATHER4_R = 4,
+   BIFROST_TEXTURE_FETCH_GATHER4_G = 5,
+   BIFROST_TEXTURE_FETCH_GATHER4_B = 6,
+   BIFROST_TEXTURE_FETCH_GATHER4_A = 7
 };
 
 struct bifrost_texture_operation {
-        /* If immediate_indices is set:
-         *     - immediate sampler index
-         *     - index used as texture index
-         * Otherwise:
-         *      - bifrost_single_index in lower 2 bits
-         *      - 0x3 in upper 2 bits (single-texturing)
-         */
-        unsigned sampler_index_or_mode : 4;
-        unsigned index : 7;
-        bool immediate_indices : 1;
-        enum bifrost_tex_op op : 3;
+   /* If immediate_indices is set:
+    *     - immediate sampler index
+    *     - index used as texture index
+    * Otherwise:
+    *      - bifrost_single_index in lower 2 bits
+    *      - 0x3 in upper 2 bits (single-texturing)
+    */
+   unsigned sampler_index_or_mode : 4;
+   unsigned index                 : 7;
+   bool immediate_indices         : 1;
+   enum bifrost_tex_op op         : 3;
 
-        /* If set for TEX/FETCH, loads texel offsets and multisample index from
-         * a staging register containing offset_x:offset_y:offset_z:ms_index
-         * packed 8:8:8:8. Offsets must be in [-31, +31]. If set for
-         * GRDESC(_DER), disable LOD bias. */
-        bool offset_or_bias_disable : 1;
+   /* If set for TEX/FETCH, loads texel offsets and multisample index from
+    * a staging register containing offset_x:offset_y:offset_z:ms_index
+    * packed 8:8:8:8. Offsets must be in [-31, +31]. If set for
+    * GRDESC(_DER), disable LOD bias. */
+   bool offset_or_bias_disable : 1;
 
-        /* If set for TEX/FETCH, loads fp32 shadow comparison value from a
-         * staging register. Implies fetch_component = gather4_r. If set for
-         * GRDESC(_DER), disables LOD clamping. */
-        bool shadow_or_clamp_disable : 1;
+   /* If set for TEX/FETCH, loads fp32 shadow comparison value from a
+    * staging register. Implies fetch_component = gather4_r. If set for
+    * GRDESC(_DER), disables LOD clamping. */
+   bool shadow_or_clamp_disable : 1;
 
-        /* If set, loads an uint32 array index from a staging register. */
-        bool array : 1;
+   /* If set, loads an uint32 array index from a staging register. */
+   bool array : 1;
 
-        /* Texture dimension, or 0 for a cubemap */
-        unsigned dimension : 2;
+   /* Texture dimension, or 0 for a cubemap */
+   unsigned dimension : 2;
 
-        /* Method to compute LOD value or for a FETCH, the
-         * bifrost_texture_fetch component specification */
-        enum bifrost_lod_mode lod_or_fetch : 3;
+   /* Method to compute LOD value or for a FETCH, the
+    * bifrost_texture_fetch component specification */
+   enum bifrost_lod_mode lod_or_fetch : 3;
 
-        /* Reserved */
-        unsigned zero : 1;
+   /* Reserved */
+   unsigned zero : 1;
 
-        /* Register format for the result */
-        enum bifrost_texture_format_full format : 4;
+   /* Register format for the result */
+   enum bifrost_texture_format_full format : 4;
 
-        /* Write mask for the result */
-        unsigned mask : 4;
+   /* Write mask for the result */
+   unsigned mask : 4;
 } __attribute__((packed));
 
 struct bifrost_dual_texture_operation {
-        unsigned primary_sampler_index : 2;
-        unsigned mode : 2; /* 0x1 for dual */
-        unsigned primary_texture_index : 2;
-        unsigned secondary_sampler_index : 2;
-        unsigned secondary_texture_index : 2;
+   unsigned primary_sampler_index   : 2;
+   unsigned mode                    : 2; /* 0x1 for dual */
+   unsigned primary_texture_index   : 2;
+   unsigned secondary_sampler_index : 2;
+   unsigned secondary_texture_index : 2;
 
-        /* Leave zero for dual texturing */
-        unsigned reserved : 1;
-        unsigned index_mode_zero : 1;
+   /* Leave zero for dual texturing */
+   unsigned reserved        : 1;
+   unsigned index_mode_zero : 1;
 
-        /* Base staging register to write the secondary results to */
-        unsigned secondary_register : 6;
+   /* Base staging register to write the secondary results to */
+   unsigned secondary_register : 6;
 
-        /* Format/mask for each texture */
-        enum bifrost_texture_format secondary_format : 3;
-        unsigned secondary_mask : 4;
+   /* Format/mask for each texture */
+   enum bifrost_texture_format secondary_format : 3;
+   unsigned secondary_mask                      : 4;
 
-        enum bifrost_texture_format primary_format : 3;
-        unsigned primary_mask : 4;
+   enum bifrost_texture_format primary_format : 3;
+   unsigned primary_mask                      : 4;
 } __attribute__((packed));
 
 static inline uint32_t
 bi_dual_tex_as_u32(struct bifrost_dual_texture_operation desc)
 {
-        uint32_t desc_u;
-        memcpy(&desc_u, &desc, sizeof(desc));
+   uint32_t desc_u;
+   memcpy(&desc_u, &desc, sizeof(desc));
 
-        return desc_u;
+   return desc_u;
 }
 
-#define BIFROST_MEGA_SAMPLE 128
-#define BIFROST_ALL_SAMPLES 255
+#define BIFROST_MEGA_SAMPLE   128
+#define BIFROST_ALL_SAMPLES   255
 #define BIFROST_CURRENT_PIXEL 255
 
 struct bifrost_pixel_indices {
-        unsigned sample : 8;
-        unsigned rt : 8;
-        unsigned x : 8;
-        unsigned y : 8;
+   unsigned sample : 8;
+   unsigned rt     : 8;
+   unsigned x      : 8;
+   unsigned y      : 8;
 } __attribute__((packed));
 
 enum bi_constmod {
-        BI_CONSTMOD_NONE,
-        BI_CONSTMOD_PC_LO,
-        BI_CONSTMOD_PC_HI,
-        BI_CONSTMOD_PC_LO_HI
+   BI_CONSTMOD_NONE,
+   BI_CONSTMOD_PC_LO,
+   BI_CONSTMOD_PC_HI,
+   BI_CONSTMOD_PC_LO_HI
 };
 
 struct bi_constants {
-        /* Raw constant values */
-        uint64_t raw[6];
+   /* Raw constant values */
+   uint64_t raw[6];
 
-        /* Associated modifier derived from M values */
-        enum bi_constmod mods[6];
+   /* Associated modifier derived from M values */
+   enum bi_constmod mods[6];
 };
 
 /* FAU selectors for constants are out-of-order, construct the top bits
@@ -623,12 +623,10 @@ struct bi_constants {
 static inline unsigned
 bi_constant_field(unsigned idx)
 {
-        const unsigned values[] = {
-                4, 5, 6, 7, 2, 3
-        };
+   const unsigned values[] = {4, 5, 6, 7, 2, 3};
 
-        assert(idx <= 5);
-        return values[idx] << 4;
+   assert(idx <= 5);
+   return values[idx] << 4;
 }
 
 #ifdef __cplusplus
diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c
index 50f0cd37699..1aafda25483 100644
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
@@ -26,20 +26,19 @@
  */
 
 #include "compiler/glsl/glsl_to_nir.h"
-#include "compiler/nir_types.h"
 #include "compiler/nir/nir_builder.h"
 #include "compiler/nir/nir_schedule.h"
+#include "compiler/nir_types.h"
 #include "util/u_debug.h"
 
-#include "disassemble.h"
-#include "valhall/va_compiler.h"
 #include "valhall/disassemble.h"
-#include "bifrost_compile.h"
-#include "compiler.h"
 #include "valhall/va_compiler.h"
-#include "bi_quirks.h"
 #include "bi_builder.h"
+#include "bi_quirks.h"
+#include "bifrost_compile.h"
 #include "bifrost_nir.h"
+#include "compiler.h"
+#include "disassemble.h"
 
 /* clang-format off */
 static const struct debug_named_value bifrost_debug_options[] = {
@@ -61,7 +60,8 @@ static const struct debug_named_value bifrost_debug_options[] = {
 };
 /* clang-format on */
 
-DEBUG_GET_ONCE_FLAGS_OPTION(bifrost_debug, "BIFROST_MESA_DEBUG", bifrost_debug_options, 0)
+DEBUG_GET_ONCE_FLAGS_OPTION(bifrost_debug, "BIFROST_MESA_DEBUG",
+                            bifrost_debug_options, 0)
 
 /* How many bytes are prefetched by the Bifrost shader core. From the final
  * clause of the shader, this range must be valid instructions or zero. */
@@ -69,35 +69,36 @@ DEBUG_GET_ONCE_FLAGS_OPTION(bifrost_debug, "BIFROST_MESA_DEBUG", bifrost_debug_o
 
 int bifrost_debug = 0;
 
-#define DBG(fmt, ...) \
-		do { if (bifrost_debug & BIFROST_DBG_MSGS) \
-			fprintf(stderr, "%s:%d: "fmt, \
-				__func__, __LINE__, ##__VA_ARGS__); } while (0)
+#define DBG(fmt, ...)                                                          \
+   do {                                                                        \
+      if (bifrost_debug & BIFROST_DBG_MSGS)                                    \
+         fprintf(stderr, "%s:%d: " fmt, __func__, __LINE__, ##__VA_ARGS__);    \
+   } while (0)
 
 static bi_block *emit_cf_list(bi_context *ctx, struct exec_list *list);
 
 static bi_index
 bi_preload(bi_builder *b, unsigned reg)
 {
-        if (bi_is_null(b->shader->preloaded[reg])) {
-                /* Insert at the beginning of the shader */
-                bi_builder b_ = *b;
-                b_.cursor = bi_before_block(bi_start_block(&b->shader->blocks));
+   if (bi_is_null(b->shader->preloaded[reg])) {
+      /* Insert at the beginning of the shader */
+      bi_builder b_ = *b;
+      b_.cursor = bi_before_block(bi_start_block(&b->shader->blocks));
 
-                /* Cache the result */
-                b->shader->preloaded[reg] = bi_mov_i32(&b_, bi_register(reg));
-        }
+      /* Cache the result */
+      b->shader->preloaded[reg] = bi_mov_i32(&b_, bi_register(reg));
+   }
 
-        return b->shader->preloaded[reg];
+   return b->shader->preloaded[reg];
 }
 
 static bi_index
 bi_coverage(bi_builder *b)
 {
-        if (bi_is_null(b->shader->coverage))
-                b->shader->coverage = bi_preload(b, 60);
+   if (bi_is_null(b->shader->coverage))
+      b->shader->coverage = bi_preload(b, 60);
 
-        return b->shader->coverage;
+   return b->shader->coverage;
 }
 
 /*
@@ -108,44 +109,44 @@ bi_coverage(bi_builder *b)
 static inline bi_index
 bi_vertex_id(bi_builder *b)
 {
-        return bi_preload(b, (b->shader->arch >= 9) ? 60 : 61);
+   return bi_preload(b, (b->shader->arch >= 9) ? 60 : 61);
 }
 
 static inline bi_index
 bi_instance_id(bi_builder *b)
 {
-        return bi_preload(b, (b->shader->arch >= 9) ? 61 : 62);
+   return bi_preload(b, (b->shader->arch >= 9) ? 61 : 62);
 }
 
 static void
 bi_emit_jump(bi_builder *b, nir_jump_instr *instr)
 {
-        bi_instr *branch = bi_jump(b, bi_zero());
+   bi_instr *branch = bi_jump(b, bi_zero());
 
-        switch (instr->type) {
-        case nir_jump_break:
-                branch->branch_target = b->shader->break_block;
-                break;
-        case nir_jump_continue:
-                branch->branch_target = b->shader->continue_block;
-                break;
-        default:
-                unreachable("Unhandled jump type");
-        }
+   switch (instr->type) {
+   case nir_jump_break:
+      branch->branch_target = b->shader->break_block;
+      break;
+   case nir_jump_continue:
+      branch->branch_target = b->shader->continue_block;
+      break;
+   default:
+      unreachable("Unhandled jump type");
+   }
 
-        bi_block_add_successor(b->shader->current_block, branch->branch_target);
-        b->shader->current_block->unconditional_jumps = true;
+   bi_block_add_successor(b->shader->current_block, branch->branch_target);
+   b->shader->current_block->unconditional_jumps = true;
 }
 
 /* Builds a 64-bit hash table key for an index */
 static uint64_t
 bi_index_to_key(bi_index idx)
 {
-        static_assert(sizeof(idx) <= sizeof(uint64_t), "too much padding");
+   static_assert(sizeof(idx) <= sizeof(uint64_t), "too much padding");
 
-        uint64_t key = 0;
-        memcpy(&key, &idx, sizeof(idx));
-        return key;
+   uint64_t key = 0;
+   memcpy(&key, &idx, sizeof(idx));
+   return key;
 }
 
 /*
@@ -156,32 +157,31 @@ bi_index_to_key(bi_index idx)
 static bi_index
 bi_extract(bi_builder *b, bi_index vec, unsigned channel)
 {
-        bi_index *components =
-                _mesa_hash_table_u64_search(b->shader->allocated_vec,
-                                            bi_index_to_key(vec));
+   bi_index *components = _mesa_hash_table_u64_search(b->shader->allocated_vec,
+                                                      bi_index_to_key(vec));
 
-        /* No extract needed for scalars.
-         *
-         * This is a bit imprecise, but actual bugs (missing splits for vectors)
-         * should be caught by the following assertion. It is too difficult to
-         * ensure bi_extract is only called for real vectors.
-         */
-        if (components == NULL && channel == 0)
-                return vec;
+   /* No extract needed for scalars.
+    *
+    * This is a bit imprecise, but actual bugs (missing splits for vectors)
+    * should be caught by the following assertion. It is too difficult to
+    * ensure bi_extract is only called for real vectors.
+    */
+   if (components == NULL && channel == 0)
+      return vec;
 
-        assert(components != NULL && "missing bi_cache_collect()");
-        return components[channel];
+   assert(components != NULL && "missing bi_cache_collect()");
+   return components[channel];
 }
 
 static void
 bi_cache_collect(bi_builder *b, bi_index dst, bi_index *s, unsigned n)
 {
-        /* Lifetime of a hash table entry has to be at least as long as the table */
-        bi_index *channels = ralloc_array(b->shader, bi_index, n);
-        memcpy(channels, s, sizeof(bi_index) * n);
+   /* Lifetime of a hash table entry has to be at least as long as the table */
+   bi_index *channels = ralloc_array(b->shader, bi_index, n);
+   memcpy(channels, s, sizeof(bi_index) * n);
 
-        _mesa_hash_table_u64_insert(b->shader->allocated_vec,
-                                    bi_index_to_key(dst), channels);
+   _mesa_hash_table_u64_insert(b->shader->allocated_vec, bi_index_to_key(dst),
+                               channels);
 }
 
 /*
@@ -193,28 +193,28 @@ bi_cache_collect(bi_builder *b, bi_index dst, bi_index *s, unsigned n)
 static void
 bi_emit_split_i32(bi_builder *b, bi_index dests[4], bi_index vec, unsigned n)
 {
-        /* Setup the destinations */
-        for (unsigned i = 0; i < n; ++i) {
-                dests[i] = bi_temp(b->shader);
-        }
+   /* Setup the destinations */
+   for (unsigned i = 0; i < n; ++i) {
+      dests[i] = bi_temp(b->shader);
+   }
 
-        /* Emit the split */
-        if (n == 1) {
-                bi_mov_i32_to(b, dests[0], vec);
-        } else {
-                bi_instr *I = bi_split_i32_to(b, n, vec);
+   /* Emit the split */
+   if (n == 1) {
+      bi_mov_i32_to(b, dests[0], vec);
+   } else {
+      bi_instr *I = bi_split_i32_to(b, n, vec);
 
-                bi_foreach_dest(I, j)
-                        I->dest[j] = dests[j];
-        }
+      bi_foreach_dest(I, j)
+         I->dest[j] = dests[j];
+   }
 }
 
 static void
 bi_emit_cached_split_i32(bi_builder *b, bi_index vec, unsigned n)
 {
-        bi_index dests[4] = { bi_null(), bi_null(), bi_null(), bi_null() };
-        bi_emit_split_i32(b, dests, vec, n);
-        bi_cache_collect(b, vec, dests, n);
+   bi_index dests[4] = {bi_null(), bi_null(), bi_null(), bi_null()};
+   bi_emit_split_i32(b, dests, vec, n);
+   bi_cache_collect(b, vec, dests, n);
 }
 
 /*
@@ -224,161 +224,164 @@ bi_emit_cached_split_i32(bi_builder *b, bi_index vec, unsigned n)
 static void
 bi_emit_cached_split(bi_builder *b, bi_index vec, unsigned bits)
 {
-        bi_emit_cached_split_i32(b, vec, DIV_ROUND_UP(bits, 32));
+   bi_emit_cached_split_i32(b, vec, DIV_ROUND_UP(bits, 32));
 }
 
 static void
 bi_split_dest(bi_builder *b, nir_dest dest)
 {
-        bi_emit_cached_split(b, bi_dest_index(&dest),
-                                nir_dest_bit_size(dest) *
-                                nir_dest_num_components(dest));
+   bi_emit_cached_split(
+      b, bi_dest_index(&dest),
+      nir_dest_bit_size(dest) * nir_dest_num_components(dest));
 }
 
 static bi_instr *
 bi_emit_collect_to(bi_builder *b, bi_index dst, bi_index *chan, unsigned n)
 {
-        /* Special case: COLLECT of a single value is a scalar move */
-        if (n == 1)
-                return bi_mov_i32_to(b, dst, chan[0]);
+   /* Special case: COLLECT of a single value is a scalar move */
+   if (n == 1)
+      return bi_mov_i32_to(b, dst, chan[0]);
 
-        bi_instr *I = bi_collect_i32_to(b, dst, n);
+   bi_instr *I = bi_collect_i32_to(b, dst, n);
 
-        bi_foreach_src(I, i)
-                I->src[i] = chan[i];
+   bi_foreach_src(I, i)
+      I->src[i] = chan[i];
 
-        bi_cache_collect(b, dst, chan, n);
-        return I;
+   bi_cache_collect(b, dst, chan, n);
+   return I;
 }
 
 static bi_instr *
 bi_collect_v2i32_to(bi_builder *b, bi_index dst, bi_index s0, bi_index s1)
 {
-        return bi_emit_collect_to(b, dst, (bi_index[]) { s0, s1 }, 2);
+   return bi_emit_collect_to(b, dst, (bi_index[]){s0, s1}, 2);
 }
 
 static bi_instr *
-bi_collect_v3i32_to(bi_builder *b, bi_index dst, bi_index s0, bi_index s1, bi_index s2)
+bi_collect_v3i32_to(bi_builder *b, bi_index dst, bi_index s0, bi_index s1,
+                    bi_index s2)
 {
-        return bi_emit_collect_to(b, dst, (bi_index[]) { s0, s1, s2 }, 3);
+   return bi_emit_collect_to(b, dst, (bi_index[]){s0, s1, s2}, 3);
 }
 
 static bi_index
 bi_collect_v2i32(bi_builder *b, bi_index s0, bi_index s1)
 {
-        bi_index dst = bi_temp(b->shader);
-        bi_collect_v2i32_to(b, dst, s0, s1);
-        return dst;
+   bi_index dst = bi_temp(b->shader);
+   bi_collect_v2i32_to(b, dst, s0, s1);
+   return dst;
 }
 
 static bi_index
 bi_varying_src0_for_barycentric(bi_builder *b, nir_intrinsic_instr *intr)
 {
-        switch (intr->intrinsic) {
-        case nir_intrinsic_load_barycentric_centroid:
-        case nir_intrinsic_load_barycentric_sample:
-                return bi_preload(b, 61);
+   switch (intr->intrinsic) {
+   case nir_intrinsic_load_barycentric_centroid:
+   case nir_intrinsic_load_barycentric_sample:
+      return bi_preload(b, 61);
 
-        /* Need to put the sample ID in the top 16-bits */
-        case nir_intrinsic_load_barycentric_at_sample:
-                return bi_mkvec_v2i16(b, bi_half(bi_dontcare(b), false),
-                                bi_half(bi_src_index(&intr->src[0]), false));
+   /* Need to put the sample ID in the top 16-bits */
+   case nir_intrinsic_load_barycentric_at_sample:
+      return bi_mkvec_v2i16(b, bi_half(bi_dontcare(b), false),
+                            bi_half(bi_src_index(&intr->src[0]), false));
 
-        /* Interpret as 8:8 signed fixed point positions in pixels along X and
-         * Y axes respectively, relative to top-left of pixel. In NIR, (0, 0)
-         * is the center of the pixel so we first fixup and then convert. For
-         * fp16 input:
-         *
-         * f2i16(((x, y) + (0.5, 0.5)) * 2**8) =
-         * f2i16((256 * (x, y)) + (128, 128)) =
-         * V2F16_TO_V2S16(FMA.v2f16((x, y), #256, #128))
-         *
-         * For fp32 input, that lacks enough precision for MSAA 16x, but the
-         * idea is the same. FIXME: still doesn't pass
-         */
-        case nir_intrinsic_load_barycentric_at_offset: {
-                bi_index offset = bi_src_index(&intr->src[0]);
-                bi_index f16 = bi_null();
-                unsigned sz = nir_src_bit_size(intr->src[0]);
+   /* Interpret as 8:8 signed fixed point positions in pixels along X and
+    * Y axes respectively, relative to top-left of pixel. In NIR, (0, 0)
+    * is the center of the pixel so we first fixup and then convert. For
+    * fp16 input:
+    *
+    * f2i16(((x, y) + (0.5, 0.5)) * 2**8) =
+    * f2i16((256 * (x, y)) + (128, 128)) =
+    * V2F16_TO_V2S16(FMA.v2f16((x, y), #256, #128))
+    *
+    * For fp32 input, that lacks enough precision for MSAA 16x, but the
+    * idea is the same. FIXME: still doesn't pass
+    */
+   case nir_intrinsic_load_barycentric_at_offset: {
+      bi_index offset = bi_src_index(&intr->src[0]);
+      bi_index f16 = bi_null();
+      unsigned sz = nir_src_bit_size(intr->src[0]);
 
-                if (sz == 16) {
-                        f16 = bi_fma_v2f16(b, offset, bi_imm_f16(256.0),
-                                        bi_imm_f16(128.0));
-                } else {
-                        assert(sz == 32);
-                        bi_index f[2];
-                        for (unsigned i = 0; i < 2; ++i) {
-                                f[i] = bi_fadd_rscale_f32(b,
-                                                bi_extract(b, offset, i),
-                                                bi_imm_f32(0.5), bi_imm_u32(8),
-                                                BI_SPECIAL_NONE);
-                        }
+      if (sz == 16) {
+         f16 = bi_fma_v2f16(b, offset, bi_imm_f16(256.0), bi_imm_f16(128.0));
+      } else {
+         assert(sz == 32);
+         bi_index f[2];
+         for (unsigned i = 0; i < 2; ++i) {
+            f[i] =
+               bi_fadd_rscale_f32(b, bi_extract(b, offset, i), bi_imm_f32(0.5),
+                                  bi_imm_u32(8), BI_SPECIAL_NONE);
+         }
 
-                        f16 = bi_v2f32_to_v2f16(b, f[0], f[1]);
-                }
+         f16 = bi_v2f32_to_v2f16(b, f[0], f[1]);
+      }
 
-                return bi_v2f16_to_v2s16(b, f16);
-        }
+      return bi_v2f16_to_v2s16(b, f16);
+   }
 
-        case nir_intrinsic_load_barycentric_pixel:
-        default:
-                return b->shader->arch >= 9 ? bi_preload(b, 61) : bi_dontcare(b);
-        }
+   case nir_intrinsic_load_barycentric_pixel:
+   default:
+      return b->shader->arch >= 9 ? bi_preload(b, 61) : bi_dontcare(b);
+   }
 }
 
 static enum bi_sample
 bi_interp_for_intrinsic(nir_intrinsic_op op)
 {
-        switch (op) {
-        case nir_intrinsic_load_barycentric_centroid:
-                return BI_SAMPLE_CENTROID;
-        case nir_intrinsic_load_barycentric_sample:
-        case nir_intrinsic_load_barycentric_at_sample:
-                return BI_SAMPLE_SAMPLE;
-        case nir_intrinsic_load_barycentric_at_offset:
-                return BI_SAMPLE_EXPLICIT;
-        case nir_intrinsic_load_barycentric_pixel:
-        default:
-                return BI_SAMPLE_CENTER;
-        }
+   switch (op) {
+   case nir_intrinsic_load_barycentric_centroid:
+      return BI_SAMPLE_CENTROID;
+   case nir_intrinsic_load_barycentric_sample:
+   case nir_intrinsic_load_barycentric_at_sample:
+      return BI_SAMPLE_SAMPLE;
+   case nir_intrinsic_load_barycentric_at_offset:
+      return BI_SAMPLE_EXPLICIT;
+   case nir_intrinsic_load_barycentric_pixel:
+   default:
+      return BI_SAMPLE_CENTER;
+   }
 }
 
 /* auto, 64-bit omitted */
 static enum bi_register_format
 bi_reg_fmt_for_nir(nir_alu_type T)
 {
-        switch (T) {
-        case nir_type_float16: return BI_REGISTER_FORMAT_F16;
-        case nir_type_float32: return BI_REGISTER_FORMAT_F32;
-        case nir_type_int16:   return BI_REGISTER_FORMAT_S16;
-        case nir_type_uint16:  return BI_REGISTER_FORMAT_U16;
-        case nir_type_int32:   return BI_REGISTER_FORMAT_S32;
-        case nir_type_uint32:  return BI_REGISTER_FORMAT_U32;
-        default: unreachable("Invalid type for register format");
-        }
+   switch (T) {
+   case nir_type_float16:
+      return BI_REGISTER_FORMAT_F16;
+   case nir_type_float32:
+      return BI_REGISTER_FORMAT_F32;
+   case nir_type_int16:
+      return BI_REGISTER_FORMAT_S16;
+   case nir_type_uint16:
+      return BI_REGISTER_FORMAT_U16;
+   case nir_type_int32:
+      return BI_REGISTER_FORMAT_S32;
+   case nir_type_uint32:
+      return BI_REGISTER_FORMAT_U32;
+   default:
+      unreachable("Invalid type for register format");
+   }
 }
 
 /* Checks if the _IMM variant of an intrinsic can be used, returning in imm the
  * immediate to be used (which applies even if _IMM can't be used) */
 
 static bool
-bi_is_intr_immediate(nir_intrinsic_instr *instr, unsigned *immediate, unsigned max)
+bi_is_intr_immediate(nir_intrinsic_instr *instr, unsigned *immediate,
+                     unsigned max)
 {
-        nir_src *offset = nir_get_io_offset_src(instr);
+   nir_src *offset = nir_get_io_offset_src(instr);
 
-        if (!nir_src_is_const(*offset))
-                return false;
+   if (!nir_src_is_const(*offset))
+      return false;
 
-        *immediate = nir_intrinsic_base(instr) + nir_src_as_uint(*offset);
-        return (*immediate) < max;
+   *immediate = nir_intrinsic_base(instr) + nir_src_as_uint(*offset);
+   return (*immediate) < max;
 }
 
-static void
-bi_make_vec_to(bi_builder *b, bi_index final_dst,
-                bi_index *src,
-                unsigned *channel,
-                unsigned count,
-                unsigned bitsize);
+static void bi_make_vec_to(bi_builder *b, bi_index final_dst, bi_index *src,
+                           unsigned *channel, unsigned count, unsigned bitsize);
 
 /* Bifrost's load instructions lack a component offset despite operating in
  * terms of vec4 slots. Usually I/O vectorization avoids nonzero components,
@@ -388,59 +391,59 @@ bi_make_vec_to(bi_builder *b, bi_index final_dst,
 static void
 bi_copy_component(bi_builder *b, nir_intrinsic_instr *instr, bi_index tmp)
 {
-        unsigned component = nir_intrinsic_component(instr);
-        unsigned nr = instr->num_components;
-        unsigned total = nr + component;
-        unsigned bitsize = nir_dest_bit_size(instr->dest);
+   unsigned component = nir_intrinsic_component(instr);
+   unsigned nr = instr->num_components;
+   unsigned total = nr + component;
+   unsigned bitsize = nir_dest_bit_size(instr->dest);
 
-        assert(total <= 4 && "should be vec4");
-        bi_emit_cached_split(b, tmp, total * bitsize);
+   assert(total <= 4 && "should be vec4");
+   bi_emit_cached_split(b, tmp, total * bitsize);
 
-        if (component == 0)
-                return;
+   if (component == 0)
+      return;
 
-        bi_index srcs[] = { tmp, tmp, tmp };
-        unsigned channels[] = { component, component + 1, component + 2 };
+   bi_index srcs[] = {tmp, tmp, tmp};
+   unsigned channels[] = {component, component + 1, component + 2};
 
-        bi_make_vec_to(b, bi_dest_index(&instr->dest),
-                       srcs, channels, nr, nir_dest_bit_size(instr->dest));
+   bi_make_vec_to(b, bi_dest_index(&instr->dest), srcs, channels, nr,
+                  nir_dest_bit_size(instr->dest));
 }
 
 static void
 bi_emit_load_attr(bi_builder *b, nir_intrinsic_instr *instr)
 {
-        nir_alu_type T = nir_intrinsic_dest_type(instr);
-        enum bi_register_format regfmt = bi_reg_fmt_for_nir(T);
-        nir_src *offset = nir_get_io_offset_src(instr);
-        unsigned component = nir_intrinsic_component(instr);
-        enum bi_vecsize vecsize = (instr->num_components + component - 1);
-        unsigned imm_index = 0;
-        unsigned base = nir_intrinsic_base(instr);
-        bool constant = nir_src_is_const(*offset);
-        bool immediate = bi_is_intr_immediate(instr, &imm_index, 16);
-        bi_index dest = (component == 0) ? bi_dest_index(&instr->dest) : bi_temp(b->shader);
-        bi_instr *I;
+   nir_alu_type T = nir_intrinsic_dest_type(instr);
+   enum bi_register_format regfmt = bi_reg_fmt_for_nir(T);
+   nir_src *offset = nir_get_io_offset_src(instr);
+   unsigned component = nir_intrinsic_component(instr);
+   enum bi_vecsize vecsize = (instr->num_components + component - 1);
+   unsigned imm_index = 0;
+   unsigned base = nir_intrinsic_base(instr);
+   bool constant = nir_src_is_const(*offset);
+   bool immediate = bi_is_intr_immediate(instr, &imm_index, 16);
+   bi_index dest =
+      (component == 0) ? bi_dest_index(&instr->dest) : bi_temp(b->shader);
+   bi_instr *I;
 
-        if (immediate) {
-                I = bi_ld_attr_imm_to(b, dest, bi_vertex_id(b),
-                                      bi_instance_id(b), regfmt, vecsize,
-                                      imm_index);
-        } else {
-                bi_index idx = bi_src_index(&instr->src[0]);
+   if (immediate) {
+      I = bi_ld_attr_imm_to(b, dest, bi_vertex_id(b), bi_instance_id(b), regfmt,
+                            vecsize, imm_index);
+   } else {
+      bi_index idx = bi_src_index(&instr->src[0]);
 
-                if (constant)
-                        idx = bi_imm_u32(imm_index);
-                else if (base != 0)
-                        idx = bi_iadd_u32(b, idx, bi_imm_u32(base), false);
+      if (constant)
+         idx = bi_imm_u32(imm_index);
+      else if (base != 0)
+         idx = bi_iadd_u32(b, idx, bi_imm_u32(base), false);
 
-                I = bi_ld_attr_to(b, dest, bi_vertex_id(b), bi_instance_id(b),
-                                  idx, regfmt, vecsize);
-        }
+      I = bi_ld_attr_to(b, dest, bi_vertex_id(b), bi_instance_id(b), idx,
+                        regfmt, vecsize);
+   }
 
-        if (b->shader->arch >= 9)
-                I->table = PAN_TABLE_ATTRIBUTE;
+   if (b->shader->arch >= 9)
+      I->table = PAN_TABLE_ATTRIBUTE;
 
-        bi_copy_component(b, instr, dest);
+   bi_copy_component(b, instr, dest);
 }
 
 /*
@@ -452,17 +455,17 @@ bi_emit_load_attr(bi_builder *b, nir_intrinsic_instr *instr)
 static unsigned
 bi_varying_base_bytes(bi_context *ctx, nir_intrinsic_instr *intr)
 {
-        nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
-        uint32_t mask = ctx->inputs->fixed_varying_mask;
+   nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
+   uint32_t mask = ctx->inputs->fixed_varying_mask;
 
-        if (sem.location >= VARYING_SLOT_VAR0) {
-                unsigned nr_special = util_bitcount(mask);
-                unsigned general_index = (sem.location - VARYING_SLOT_VAR0);
+   if (sem.location >= VARYING_SLOT_VAR0) {
+      unsigned nr_special = util_bitcount(mask);
+      unsigned general_index = (sem.location - VARYING_SLOT_VAR0);
 
-                return 16 * (nr_special + general_index);
-        } else {
-                return 16 * (util_bitcount(mask & BITFIELD_MASK(sem.location)));
-        }
+      return 16 * (nr_special + general_index);
+   } else {
+      return 16 * (util_bitcount(mask & BITFIELD_MASK(sem.location)));
+   }
 }
 
 /*
@@ -472,290 +475,275 @@ bi_varying_base_bytes(bi_context *ctx, nir_intrinsic_instr *intr)
 static unsigned
 bi_varying_offset(bi_context *ctx, nir_intrinsic_instr *intr)
 {
-        nir_src *src = nir_get_io_offset_src(intr);
-        assert(nir_src_is_const(*src) && "assumes immediate offset");
+   nir_src *src = nir_get_io_offset_src(intr);
+   assert(nir_src_is_const(*src) && "assumes immediate offset");
 
-        return bi_varying_base_bytes(ctx, intr) + (nir_src_as_uint(*src) * 16);
+   return bi_varying_base_bytes(ctx, intr) + (nir_src_as_uint(*src) * 16);
 }
 
 static void
 bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr)
 {
-        enum bi_sample sample = BI_SAMPLE_CENTER;
-        enum bi_update update = BI_UPDATE_STORE;
-        enum bi_register_format regfmt = BI_REGISTER_FORMAT_AUTO;
-        bool smooth = instr->intrinsic == nir_intrinsic_load_interpolated_input;
-        bi_index src0 = bi_null();
+   enum bi_sample sample = BI_SAMPLE_CENTER;
+   enum bi_update update = BI_UPDATE_STORE;
+   enum bi_register_format regfmt = BI_REGISTER_FORMAT_AUTO;
+   bool smooth = instr->intrinsic == nir_intrinsic_load_interpolated_input;
+   bi_index src0 = bi_null();
 
-        unsigned component = nir_intrinsic_component(instr);
-        enum bi_vecsize vecsize = (instr->num_components + component - 1);
-        bi_index dest = (component == 0) ? bi_dest_index(&instr->dest) : bi_temp(b->shader);
+   unsigned component = nir_intrinsic_component(instr);
+   enum bi_vecsize vecsize = (instr->num_components + component - 1);
+   bi_index dest =
+      (component == 0) ? bi_dest_index(&instr->dest) : bi_temp(b->shader);
 
-        unsigned sz = nir_dest_bit_size(instr->dest);
+   unsigned sz = nir_dest_bit_size(instr->dest);
 
-        if (smooth) {
-                nir_intrinsic_instr *parent = nir_src_as_intrinsic(instr->src[0]);
-                assert(parent);
+   if (smooth) {
+      nir_intrinsic_instr *parent = nir_src_as_intrinsic(instr->src[0]);
+      assert(parent);
 
-                sample = bi_interp_for_intrinsic(parent->intrinsic);
-                src0 = bi_varying_src0_for_barycentric(b, parent);
+      sample = bi_interp_for_intrinsic(parent->intrinsic);
+      src0 = bi_varying_src0_for_barycentric(b, parent);
 
-                assert(sz == 16 || sz == 32);
-                regfmt = (sz == 16) ? BI_REGISTER_FORMAT_F16
-                        : BI_REGISTER_FORMAT_F32;
-        } else {
-                assert(sz == 32);
-                regfmt = BI_REGISTER_FORMAT_U32;
+      assert(sz == 16 || sz == 32);
+      regfmt = (sz == 16) ? BI_REGISTER_FORMAT_F16 : BI_REGISTER_FORMAT_F32;
+   } else {
+      assert(sz == 32);
+      regfmt = BI_REGISTER_FORMAT_U32;
 
-                /* Valhall can't have bi_null() here, although the source is
-                 * logically unused for flat varyings
-                 */
-                if (b->shader->arch >= 9)
-                        src0 = bi_preload(b, 61);
+      /* Valhall can't have bi_null() here, although the source is
+       * logically unused for flat varyings
+       */
+      if (b->shader->arch >= 9)
+         src0 = bi_preload(b, 61);
 
-                /* Gather info as we go */
-                b->shader->info.bifrost->uses_flat_shading = true;
-        }
+      /* Gather info as we go */
+      b->shader->info.bifrost->uses_flat_shading = true;
+   }
 
-        enum bi_source_format source_format =
-                smooth ? BI_SOURCE_FORMAT_F32 : BI_SOURCE_FORMAT_FLAT32;
+   enum bi_source_format source_format =
+      smooth ? BI_SOURCE_FORMAT_F32 : BI_SOURCE_FORMAT_FLAT32;
 
-        nir_src *offset = nir_get_io_offset_src(instr);
-        unsigned imm_index = 0;
-        bool immediate = bi_is_intr_immediate(instr, &imm_index, 20);
-        bi_instr *I = NULL;
+   nir_src *offset = nir_get_io_offset_src(instr);
+   unsigned imm_index = 0;
+   bool immediate = bi_is_intr_immediate(instr, &imm_index, 20);
+   bi_instr *I = NULL;
 
-        if (b->shader->malloc_idvs && immediate) {
-                /* Immediate index given in bytes. */
-                bi_ld_var_buf_imm_to(b, sz, dest, src0, regfmt,
-                                     sample, source_format, update, vecsize,
-                                     bi_varying_offset(b->shader, instr));
-        } else if (immediate && smooth) {
-                I = bi_ld_var_imm_to(b, dest, src0, regfmt, sample, update,
-                                     vecsize, imm_index);
-        } else if (immediate && !smooth) {
-                I = bi_ld_var_flat_imm_to(b, dest, BI_FUNCTION_NONE, regfmt,
-                                          vecsize, imm_index);
-        } else {
-                bi_index idx = bi_src_index(offset);
-                unsigned base = nir_intrinsic_base(instr);
+   if (b->shader->malloc_idvs && immediate) {
+      /* Immediate index given in bytes. */
+      bi_ld_var_buf_imm_to(b, sz, dest, src0, regfmt, sample, source_format,
+                           update, vecsize,
+                           bi_varying_offset(b->shader, instr));
+   } else if (immediate && smooth) {
+      I = bi_ld_var_imm_to(b, dest, src0, regfmt, sample, update, vecsize,
+                           imm_index);
+   } else if (immediate && !smooth) {
+      I = bi_ld_var_flat_imm_to(b, dest, BI_FUNCTION_NONE, regfmt, vecsize,
+                                imm_index);
+   } else {
+      bi_index idx = bi_src_index(offset);
+      unsigned base = nir_intrinsic_base(instr);
 
-                if (b->shader->malloc_idvs) {
-                        /* Index needs to be in bytes, but NIR gives the index
-                         * in slots. For now assume 16 bytes per element.
-                         */
-                        bi_index idx_bytes = bi_lshift_or_i32(b, idx, bi_zero(), bi_imm_u8(4));
-                        unsigned vbase = bi_varying_base_bytes(b->shader, instr);
+      if (b->shader->malloc_idvs) {
+         /* Index needs to be in bytes, but NIR gives the index
+          * in slots. For now assume 16 bytes per element.
+          */
+         bi_index idx_bytes = bi_lshift_or_i32(b, idx, bi_zero(), bi_imm_u8(4));
+         unsigned vbase = bi_varying_base_bytes(b->shader, instr);
 
-                        if (vbase != 0)
-                                idx_bytes = bi_iadd_u32(b, idx, bi_imm_u32(vbase), false);
+         if (vbase != 0)
+            idx_bytes = bi_iadd_u32(b, idx, bi_imm_u32(vbase), false);
 
-                        bi_ld_var_buf_to(b, sz, dest, src0, idx_bytes, regfmt,
-                                         sample, source_format, update,
-                                         vecsize);
-                } else if (smooth) {
-                        if (base != 0)
-                                idx = bi_iadd_u32(b, idx, bi_imm_u32(base), false);
+         bi_ld_var_buf_to(b, sz, dest, src0, idx_bytes, regfmt, sample,
+                          source_format, update, vecsize);
+      } else if (smooth) {
+         if (base != 0)
+            idx = bi_iadd_u32(b, idx, bi_imm_u32(base), false);
 
-                        I = bi_ld_var_to(b, dest, src0, idx, regfmt, sample,
-                                         update, vecsize);
-                } else {
-                        if (base != 0)
-                                idx = bi_iadd_u32(b, idx, bi_imm_u32(base), false);
+         I = bi_ld_var_to(b, dest, src0, idx, regfmt, sample, update, vecsize);
+      } else {
+         if (base != 0)
+            idx = bi_iadd_u32(b, idx, bi_imm_u32(base), false);
 
-                        I = bi_ld_var_flat_to(b, dest, idx,
-                                              BI_FUNCTION_NONE, regfmt,
-                                              vecsize);
-                }
-        }
+         I = bi_ld_var_flat_to(b, dest, idx, BI_FUNCTION_NONE, regfmt, vecsize);
+      }
+   }
 
-        /* Valhall usually uses machine-allocated IDVS. If this is disabled, use
-         * a simple Midgard-style ABI.
-         */
-        if (b->shader->arch >= 9 && I != NULL)
-                I->table = PAN_TABLE_ATTRIBUTE;
+   /* Valhall usually uses machine-allocated IDVS. If this is disabled, use
+    * a simple Midgard-style ABI.
+    */
+   if (b->shader->arch >= 9 && I != NULL)
+      I->table = PAN_TABLE_ATTRIBUTE;
 
-        bi_copy_component(b, instr, dest);
+   bi_copy_component(b, instr, dest);
 }
 
 static bi_index
-bi_make_vec8_helper(bi_builder *b, bi_index *src, unsigned *channel, unsigned count)
+bi_make_vec8_helper(bi_builder *b, bi_index *src, unsigned *channel,
+                    unsigned count)
 {
-        assert(1 <= count && count <= 4);
+   assert(1 <= count && count <= 4);
 
-        bi_index bytes[4] = {
-                bi_imm_u8(0),
-                bi_imm_u8(0),
-                bi_imm_u8(0),
-                bi_imm_u8(0)
-        };
+   bi_index bytes[4] = {bi_imm_u8(0), bi_imm_u8(0), bi_imm_u8(0), bi_imm_u8(0)};
 
-        for (unsigned i = 0; i < count; ++i) {
-                unsigned chan = channel ? channel[i] : 0;
+   for (unsigned i = 0; i < count; ++i) {
+      unsigned chan = channel ? channel[i] : 0;
 
-                bytes[i] = bi_byte(bi_extract(b, src[i], chan >> 2), chan & 3);
-        }
+      bytes[i] = bi_byte(bi_extract(b, src[i], chan >> 2), chan & 3);
+   }
 
-        if (b->shader->arch >= 9) {
-                bi_index vec = bi_zero();
+   if (b->shader->arch >= 9) {
+      bi_index vec = bi_zero();
 
-                if (count >= 3)
-                        vec = bi_mkvec_v2i8(b, bytes[2], bytes[3], vec);
+      if (count >= 3)
+         vec = bi_mkvec_v2i8(b, bytes[2], bytes[3], vec);
 
-                return bi_mkvec_v2i8(b, bytes[0], bytes[1], vec);
-        } else {
-                return bi_mkvec_v4i8(b, bytes[0], bytes[1], bytes[2], bytes[3]);
-        }
+      return bi_mkvec_v2i8(b, bytes[0], bytes[1], vec);
+   } else {
+      return bi_mkvec_v4i8(b, bytes[0], bytes[1], bytes[2], bytes[3]);
+   }
 }
 
 static bi_index
-bi_make_vec16_helper(bi_builder *b, bi_index *src, unsigned *channel, unsigned count)
+bi_make_vec16_helper(bi_builder *b, bi_index *src, unsigned *channel,
+                     unsigned count)
 {
-        unsigned chan0 = channel ? channel[0] : 0;
-        bi_index w0 = bi_extract(b, src[0], chan0 >> 1);
-        bi_index h0 = bi_half(w0, chan0 & 1);
+   unsigned chan0 = channel ? channel[0] : 0;
+   bi_index w0 = bi_extract(b, src[0], chan0 >> 1);
+   bi_index h0 = bi_half(w0, chan0 & 1);
 
-        /* Zero extend */
-        if (count == 1)
-                return bi_mkvec_v2i16(b, h0, bi_imm_u16(0));
+   /* Zero extend */
+   if (count == 1)
+      return bi_mkvec_v2i16(b, h0, bi_imm_u16(0));
 
-        /* Else, create a vector */
-        assert(count == 2);
+   /* Else, create a vector */
+   assert(count == 2);
 
-        unsigned chan1 = channel ? channel[1] : 0;
-        bi_index w1 = bi_extract(b, src[1], chan1 >> 1);
-        bi_index h1 = bi_half(w1, chan1 & 1);
+   unsigned chan1 = channel ? channel[1] : 0;
+   bi_index w1 = bi_extract(b, src[1], chan1 >> 1);
+   bi_index h1 = bi_half(w1, chan1 & 1);
 
-        if (bi_is_word_equiv(w0, w1) && (chan0 & 1) == 0 && ((chan1 & 1) == 1))
-                return bi_mov_i32(b, w0);
-        else if (bi_is_word_equiv(w0, w1))
-                return bi_swz_v2i16(b, bi_swz_16(w0, chan0 & 1, chan1 & 1));
-        else
-                return bi_mkvec_v2i16(b, h0, h1);
+   if (bi_is_word_equiv(w0, w1) && (chan0 & 1) == 0 && ((chan1 & 1) == 1))
+      return bi_mov_i32(b, w0);
+   else if (bi_is_word_equiv(w0, w1))
+      return bi_swz_v2i16(b, bi_swz_16(w0, chan0 & 1, chan1 & 1));
+   else
+      return bi_mkvec_v2i16(b, h0, h1);
 }
 
 static void
-bi_make_vec_to(bi_builder *b, bi_index dst,
-                bi_index *src,
-                unsigned *channel,
-                unsigned count,
-                unsigned bitsize)
+bi_make_vec_to(bi_builder *b, bi_index dst, bi_index *src, unsigned *channel,
+               unsigned count, unsigned bitsize)
 {
-        assert(bitsize == 8 || bitsize == 16 || bitsize == 32);
-        unsigned shift = (bitsize == 32) ? 0 : (bitsize == 16) ? 1 : 2;
-        unsigned chan_per_word = 1 << shift;
+   assert(bitsize == 8 || bitsize == 16 || bitsize == 32);
+   unsigned shift = (bitsize == 32) ? 0 : (bitsize == 16) ? 1 : 2;
+   unsigned chan_per_word = 1 << shift;
 
-        assert(DIV_ROUND_UP(count * bitsize, 32) <= BI_MAX_SRCS &&
-               "unnecessarily large vector should have been lowered");
+   assert(DIV_ROUND_UP(count * bitsize, 32) <= BI_MAX_SRCS &&
+          "unnecessarily large vector should have been lowered");
 
-        bi_index srcs[BI_MAX_VEC];
+   bi_index srcs[BI_MAX_VEC];
 
-        for (unsigned i = 0; i < count; i += chan_per_word) {
-                unsigned rem = MIN2(count - i, chan_per_word);
-                unsigned *channel_offset = channel ? (channel + i) : NULL;
+   for (unsigned i = 0; i < count; i += chan_per_word) {
+      unsigned rem = MIN2(count - i, chan_per_word);
+      unsigned *channel_offset = channel ? (channel + i) : NULL;
 
-                if (bitsize == 32)
-                        srcs[i] = bi_extract(b, src[i], channel_offset ? *channel_offset : 0);
-                else if (bitsize == 16)
-                        srcs[i >> 1] = bi_make_vec16_helper(b, src + i, channel_offset, rem);
-                else
-                        srcs[i >> 2] = bi_make_vec8_helper(b, src + i, channel_offset, rem);
-        }
+      if (bitsize == 32)
+         srcs[i] = bi_extract(b, src[i], channel_offset ? *channel_offset : 0);
+      else if (bitsize == 16)
+         srcs[i >> 1] = bi_make_vec16_helper(b, src + i, channel_offset, rem);
+      else
+         srcs[i >> 2] = bi_make_vec8_helper(b, src + i, channel_offset, rem);
+   }
 
-        bi_emit_collect_to(b, dst, srcs, DIV_ROUND_UP(count, chan_per_word));
+   bi_emit_collect_to(b, dst, srcs, DIV_ROUND_UP(count, chan_per_word));
 }
 
 static inline bi_instr *
 bi_load_ubo_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0,
-                bi_index src1)
+               bi_index src1)
 {
-        bi_instr *I;
+   bi_instr *I;
 
-        if (b->shader->arch >= 9) {
-                I = bi_ld_buffer_to(b, bitsize, dest0, src0, src1);
-                I->seg = BI_SEG_UBO;
-        } else {
-                I = bi_load_to(b, bitsize, dest0, src0, src1, BI_SEG_UBO, 0);
-        }
+   if (b->shader->arch >= 9) {
+      I = bi_ld_buffer_to(b, bitsize, dest0, src0, src1);
+      I->seg = BI_SEG_UBO;
+   } else {
+      I = bi_load_to(b, bitsize, dest0, src0, src1, BI_SEG_UBO, 0);
+   }
 
-        bi_emit_cached_split(b, dest0, bitsize);
-        return I;
+   bi_emit_cached_split(b, dest0, bitsize);
+   return I;
 }
 
 static bi_instr *
 bi_load_sysval_to(bi_builder *b, bi_index dest, int sysval,
-                unsigned nr_components, unsigned offset)
+                  unsigned nr_components, unsigned offset)
 {
-        unsigned sysval_ubo = b->shader->inputs->fixed_sysval_ubo >= 0 ?
-                              b->shader->inputs->fixed_sysval_ubo :
-                              b->shader->nir->info.num_ubos;
-        unsigned uniform =
-                pan_lookup_sysval(b->shader->sysval_to_id,
-                                  b->shader->info.sysvals,
-                                  sysval);
-        unsigned idx = (uniform * 16) + offset;
+   unsigned sysval_ubo = b->shader->inputs->fixed_sysval_ubo >= 0
+                            ? b->shader->inputs->fixed_sysval_ubo
+                            : b->shader->nir->info.num_ubos;
+   unsigned uniform = pan_lookup_sysval(b->shader->sysval_to_id,
+                                        b->shader->info.sysvals, sysval);
+   unsigned idx = (uniform * 16) + offset;
 
-        return bi_load_ubo_to(b, nr_components * 32, dest,
-                              bi_imm_u32(idx), bi_imm_u32(sysval_ubo));
+   return bi_load_ubo_to(b, nr_components * 32, dest, bi_imm_u32(idx),
+                         bi_imm_u32(sysval_ubo));
 }
 
 static void
 bi_load_sysval_nir(bi_builder *b, nir_intrinsic_instr *intr,
-                unsigned nr_components, unsigned offset)
+                   unsigned nr_components, unsigned offset)
 {
-        bi_load_sysval_to(b, bi_dest_index(&intr->dest),
-                        panfrost_sysval_for_instr(&intr->instr, NULL),
-                        nr_components, offset);
+   bi_load_sysval_to(b, bi_dest_index(&intr->dest),
+                     panfrost_sysval_for_instr(&intr->instr, NULL),
+                     nr_components, offset);
 }
 
 static bi_index
-bi_load_sysval(bi_builder *b, int sysval,
-                unsigned nr_components, unsigned offset)
+bi_load_sysval(bi_builder *b, int sysval, unsigned nr_components,
+               unsigned offset)
 {
-        bi_index tmp = bi_temp(b->shader);
-        bi_load_sysval_to(b, tmp, sysval, nr_components, offset);
-        return tmp;
+   bi_index tmp = bi_temp(b->shader);
+   bi_load_sysval_to(b, tmp, sysval, nr_components, offset);
+   return tmp;
 }
 
 static void
 bi_load_sample_id_to(bi_builder *b, bi_index dst)
 {
-        /* r61[16:23] contains the sampleID, mask it out. Upper bits
-         * seem to read garbage (despite being architecturally defined
-         * as zero), so use a 5-bit mask instead of 8-bits */
+   /* r61[16:23] contains the sampleID, mask it out. Upper bits
+    * seem to read garbage (despite being architecturally defined
+    * as zero), so use a 5-bit mask instead of 8-bits */
 
-        bi_rshift_and_i32_to(b, dst, bi_preload(b, 61), bi_imm_u32(0x1f),
-                                bi_imm_u8(16), false);
+   bi_rshift_and_i32_to(b, dst, bi_preload(b, 61), bi_imm_u32(0x1f),
+                        bi_imm_u8(16), false);
 }
 
 static bi_index
 bi_load_sample_id(bi_builder *b)
 {
-        bi_index sample_id = bi_temp(b->shader);
-        bi_load_sample_id_to(b, sample_id);
-        return sample_id;
+   bi_index sample_id = bi_temp(b->shader);
+   bi_load_sample_id_to(b, sample_id);
+   return sample_id;
 }
 
 static bi_index
 bi_pixel_indices(bi_builder *b, unsigned rt)
 {
-        /* We want to load the current pixel. */
-        struct bifrost_pixel_indices pix = {
-                .y = BIFROST_CURRENT_PIXEL,
-                .rt = rt
-        };
+   /* We want to load the current pixel. */
+   struct bifrost_pixel_indices pix = {.y = BIFROST_CURRENT_PIXEL, .rt = rt};
 
-        uint32_t indices_u32 = 0;
-        memcpy(&indices_u32, &pix, sizeof(indices_u32));
-        bi_index indices = bi_imm_u32(indices_u32);
+   uint32_t indices_u32 = 0;
+   memcpy(&indices_u32, &pix, sizeof(indices_u32));
+   bi_index indices = bi_imm_u32(indices_u32);
 
-        /* Sample index above is left as zero. For multisampling, we need to
-         * fill in the actual sample ID in the lower byte */
+   /* Sample index above is left as zero. For multisampling, we need to
+    * fill in the actual sample ID in the lower byte */
 
-        if (b->shader->inputs->blend.nr_samples > 1)
-                indices = bi_iadd_u32(b, indices, bi_load_sample_id(b), false);
+   if (b->shader->inputs->blend.nr_samples > 1)
+      indices = bi_iadd_u32(b, indices, bi_load_sample_id(b), false);
 
-        return indices;
+   return indices;
 }
 
 /* Source color is passed through r0-r3, or r4-r7 for the second source when
@@ -764,68 +752,64 @@ bi_pixel_indices(bi_builder *b, unsigned rt)
 static void
 bi_emit_load_blend_input(bi_builder *b, nir_intrinsic_instr *instr)
 {
-        nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
-        unsigned base = (sem.location == VARYING_SLOT_VAR0) ? 4 : 0;
-        unsigned size = nir_alu_type_get_type_size(nir_intrinsic_dest_type(instr));
-        assert(size == 16 || size == 32);
+   nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
+   unsigned base = (sem.location == VARYING_SLOT_VAR0) ? 4 : 0;
+   unsigned size = nir_alu_type_get_type_size(nir_intrinsic_dest_type(instr));
+   assert(size == 16 || size == 32);
 
-        bi_index srcs[] = {
-                bi_preload(b, base + 0), bi_preload(b, base + 1),
-                bi_preload(b, base + 2), bi_preload(b, base + 3)
-        };
+   bi_index srcs[] = {bi_preload(b, base + 0), bi_preload(b, base + 1),
+                      bi_preload(b, base + 2), bi_preload(b, base + 3)};
 
-        bi_emit_collect_to(b, bi_dest_index(&instr->dest), srcs, size == 32 ? 4 : 2);
+   bi_emit_collect_to(b, bi_dest_index(&instr->dest), srcs, size == 32 ? 4 : 2);
 }
 
 static void
-bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T,
-                 bi_index rgba2, nir_alu_type T2, unsigned rt)
+bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T, bi_index rgba2,
+                 nir_alu_type T2, unsigned rt)
 {
-        /* Reads 2 or 4 staging registers to cover the input */
-        unsigned size = nir_alu_type_get_type_size(T);
-        unsigned size_2 = nir_alu_type_get_type_size(T2);
-        unsigned sr_count = (size <= 16) ? 2 : 4;
-        unsigned sr_count_2 = (size_2 <= 16) ? 2 : 4;
-        const struct panfrost_compile_inputs *inputs = b->shader->inputs;
-        uint64_t blend_desc = inputs->blend.bifrost_blend_desc;
-        enum bi_register_format regfmt = bi_reg_fmt_for_nir(T);
+   /* Reads 2 or 4 staging registers to cover the input */
+   unsigned size = nir_alu_type_get_type_size(T);
+   unsigned size_2 = nir_alu_type_get_type_size(T2);
+   unsigned sr_count = (size <= 16) ? 2 : 4;
+   unsigned sr_count_2 = (size_2 <= 16) ? 2 : 4;
+   const struct panfrost_compile_inputs *inputs = b->shader->inputs;
+   uint64_t blend_desc = inputs->blend.bifrost_blend_desc;
+   enum bi_register_format regfmt = bi_reg_fmt_for_nir(T);
 
-        /* Workaround for NIR-to-TGSI */
-        if (b->shader->nir->info.fs.untyped_color_outputs)
-                regfmt = BI_REGISTER_FORMAT_AUTO;
+   /* Workaround for NIR-to-TGSI */
+   if (b->shader->nir->info.fs.untyped_color_outputs)
+      regfmt = BI_REGISTER_FORMAT_AUTO;
 
-        if (inputs->is_blend && inputs->blend.nr_samples > 1) {
-                /* Conversion descriptor comes from the compile inputs, pixel
-                 * indices derived at run time based on sample ID */
-                bi_st_tile(b, rgba, bi_pixel_indices(b, rt), bi_coverage(b),
-                                bi_imm_u32(blend_desc >> 32),
-                                regfmt, BI_VECSIZE_V4);
-        } else if (b->shader->inputs->is_blend) {
-                uint64_t blend_desc = b->shader->inputs->blend.bifrost_blend_desc;
+   if (inputs->is_blend && inputs->blend.nr_samples > 1) {
+      /* Conversion descriptor comes from the compile inputs, pixel
+       * indices derived at run time based on sample ID */
+      bi_st_tile(b, rgba, bi_pixel_indices(b, rt), bi_coverage(b),
+                 bi_imm_u32(blend_desc >> 32), regfmt, BI_VECSIZE_V4);
+   } else if (b->shader->inputs->is_blend) {
+      uint64_t blend_desc = b->shader->inputs->blend.bifrost_blend_desc;
 
-                /* Blend descriptor comes from the compile inputs */
-                /* Put the result in r0 */
+      /* Blend descriptor comes from the compile inputs */
+      /* Put the result in r0 */
 
-                bi_blend_to(b, bi_temp(b->shader), rgba, bi_coverage(b),
-                               bi_imm_u32(blend_desc),
-                               bi_imm_u32(blend_desc >> 32),
-                               bi_null(), regfmt, sr_count, 0);
-        } else {
-                /* Blend descriptor comes from the FAU RAM. By convention, the
-                 * return address on Bifrost is stored in r48 and will be used
-                 * by the blend shader to jump back to the fragment shader */
+      bi_blend_to(b, bi_temp(b->shader), rgba, bi_coverage(b),
+                  bi_imm_u32(blend_desc), bi_imm_u32(blend_desc >> 32),
+                  bi_null(), regfmt, sr_count, 0);
+   } else {
+      /* Blend descriptor comes from the FAU RAM. By convention, the
+       * return address on Bifrost is stored in r48 and will be used
+       * by the blend shader to jump back to the fragment shader */
 
-                bi_blend_to(b, bi_temp(b->shader), rgba, bi_coverage(b),
-                               bi_fau(BIR_FAU_BLEND_0 + rt, false),
-                               bi_fau(BIR_FAU_BLEND_0 + rt, true),
-                               rgba2, regfmt, sr_count, sr_count_2);
-        }
+      bi_blend_to(b, bi_temp(b->shader), rgba, bi_coverage(b),
+                  bi_fau(BIR_FAU_BLEND_0 + rt, false),
+                  bi_fau(BIR_FAU_BLEND_0 + rt, true), rgba2, regfmt, sr_count,
+                  sr_count_2);
+   }
 
-        assert(rt < 8);
-        b->shader->info.bifrost->blend[rt].type = T;
+   assert(rt < 8);
+   b->shader->info.bifrost->blend[rt].type = T;
 
-        if (T2)
-                b->shader->info.bifrost->blend_src1_type = T2;
+   if (T2)
+      b->shader->info.bifrost->blend_src1_type = T2;
 }
 
 /* Blend shaders do not need to run ATEST since they are dependent on a
@@ -838,116 +822,115 @@ bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T,
 static bool
 bi_skip_atest(bi_context *ctx, bool emit_zs)
 {
-        return (ctx->inputs->is_blit && !emit_zs) || ctx->inputs->is_blend;
+   return (ctx->inputs->is_blit && !emit_zs) || ctx->inputs->is_blend;
 }
 
 static void
 bi_emit_atest(bi_builder *b, bi_index alpha)
 {
-        b->shader->coverage = bi_atest(b, bi_coverage(b), alpha,
-                                          bi_fau(BIR_FAU_ATEST_PARAM, false));
-        b->shader->emitted_atest = true;
+   b->shader->coverage =
+      bi_atest(b, bi_coverage(b), alpha, bi_fau(BIR_FAU_ATEST_PARAM, false));
+   b->shader->emitted_atest = true;
 }
 
 static void
 bi_emit_fragment_out(bi_builder *b, nir_intrinsic_instr *instr)
 {
-        bool combined = instr->intrinsic ==
-                nir_intrinsic_store_combined_output_pan;
+   bool combined = instr->intrinsic == nir_intrinsic_store_combined_output_pan;
 
-        unsigned writeout = combined ? nir_intrinsic_component(instr) :
-                PAN_WRITEOUT_C;
+   unsigned writeout =
+      combined ? nir_intrinsic_component(instr) : PAN_WRITEOUT_C;
 
-        bool emit_blend = writeout & (PAN_WRITEOUT_C);
-        bool emit_zs = writeout & (PAN_WRITEOUT_Z | PAN_WRITEOUT_S);
+   bool emit_blend = writeout & (PAN_WRITEOUT_C);
+   bool emit_zs = writeout & (PAN_WRITEOUT_Z | PAN_WRITEOUT_S);
 
-        unsigned loc = nir_intrinsic_io_semantics(instr).location;
-        bi_index src0 = bi_src_index(&instr->src[0]);
+   unsigned loc = nir_intrinsic_io_semantics(instr).location;
+   bi_index src0 = bi_src_index(&instr->src[0]);
 
-        /* By ISA convention, the coverage mask is stored in R60. The store
-         * itself will be handled by a subsequent ATEST instruction */
-        if (loc == FRAG_RESULT_SAMPLE_MASK) {
-                bi_index orig = bi_coverage(b);
-                bi_index msaa = bi_load_sysval(b, PAN_SYSVAL_MULTISAMPLED, 1, 0);
-                bi_index new = bi_lshift_and_i32(b, orig, bi_extract(b, src0, 0), bi_imm_u8(0));
+   /* By ISA convention, the coverage mask is stored in R60. The store
+    * itself will be handled by a subsequent ATEST instruction */
+   if (loc == FRAG_RESULT_SAMPLE_MASK) {
+      bi_index orig = bi_coverage(b);
+      bi_index msaa = bi_load_sysval(b, PAN_SYSVAL_MULTISAMPLED, 1, 0);
+      bi_index new =
+         bi_lshift_and_i32(b, orig, bi_extract(b, src0, 0), bi_imm_u8(0));
 
-                b->shader->coverage =
-                        bi_mux_i32(b, orig, new, msaa, BI_MUX_INT_ZERO);
-                return;
-        }
+      b->shader->coverage = bi_mux_i32(b, orig, new, msaa, BI_MUX_INT_ZERO);
+      return;
+   }
 
-        /* Emit ATEST if we have to, note ATEST requires a floating-point alpha
-         * value, but render target #0 might not be floating point. However the
-         * alpha value is only used for alpha-to-coverage, a stage which is
-         * skipped for pure integer framebuffers, so the issue is moot. */
+   /* Emit ATEST if we have to, note ATEST requires a floating-point alpha
+    * value, but render target #0 might not be floating point. However the
+    * alpha value is only used for alpha-to-coverage, a stage which is
+    * skipped for pure integer framebuffers, so the issue is moot. */
 
-        if (!b->shader->emitted_atest && !bi_skip_atest(b->shader, emit_zs)) {
-                nir_alu_type T = nir_intrinsic_src_type(instr);
+   if (!b->shader->emitted_atest && !bi_skip_atest(b->shader, emit_zs)) {
+      nir_alu_type T = nir_intrinsic_src_type(instr);
 
-                bi_index rgba = bi_src_index(&instr->src[0]);
-                bi_index alpha =
-                        (T == nir_type_float16) ? bi_half(bi_extract(b, rgba, 1), true) :
-                        (T == nir_type_float32) ? bi_extract(b, rgba, 3) :
-                        bi_dontcare(b);
+      bi_index rgba = bi_src_index(&instr->src[0]);
+      bi_index alpha = (T == nir_type_float16)
+                          ? bi_half(bi_extract(b, rgba, 1), true)
+                       : (T == nir_type_float32) ? bi_extract(b, rgba, 3)
+                                                 : bi_dontcare(b);
 
-                /* Don't read out-of-bounds */
-                if (nir_src_num_components(instr->src[0]) < 4)
-                        alpha = bi_imm_f32(1.0);
+      /* Don't read out-of-bounds */
+      if (nir_src_num_components(instr->src[0]) < 4)
+         alpha = bi_imm_f32(1.0);
 
-                bi_emit_atest(b, alpha);
-        }
+      bi_emit_atest(b, alpha);
+   }
 
-        if (emit_zs) {
-                bi_index z = bi_dontcare(b), s = bi_dontcare(b);
+   if (emit_zs) {
+      bi_index z = bi_dontcare(b), s = bi_dontcare(b);
 
-                if (writeout & PAN_WRITEOUT_Z)
-                        z = bi_src_index(&instr->src[2]);
+      if (writeout & PAN_WRITEOUT_Z)
+         z = bi_src_index(&instr->src[2]);
 
-                if (writeout & PAN_WRITEOUT_S)
-                        s = bi_src_index(&instr->src[3]);
+      if (writeout & PAN_WRITEOUT_S)
+         s = bi_src_index(&instr->src[3]);
 
-                b->shader->coverage = bi_zs_emit(b, z, s, bi_coverage(b),
-                                                 writeout & PAN_WRITEOUT_S,
-                                                 writeout & PAN_WRITEOUT_Z);
-        }
+      b->shader->coverage =
+         bi_zs_emit(b, z, s, bi_coverage(b), writeout & PAN_WRITEOUT_S,
+                    writeout & PAN_WRITEOUT_Z);
+   }
 
-        if (emit_blend) {
-                unsigned rt = loc ? (loc - FRAG_RESULT_DATA0) : 0;
-                bool dual = (writeout & PAN_WRITEOUT_2);
-                bi_index color = bi_src_index(&instr->src[0]);
-                bi_index color2 = dual ? bi_src_index(&instr->src[4]) : bi_null();
-                nir_alu_type T2 = dual ? nir_intrinsic_dest_type(instr) : 0;
+   if (emit_blend) {
+      unsigned rt = loc ? (loc - FRAG_RESULT_DATA0) : 0;
+      bool dual = (writeout & PAN_WRITEOUT_2);
+      bi_index color = bi_src_index(&instr->src[0]);
+      bi_index color2 = dual ? bi_src_index(&instr->src[4]) : bi_null();
+      nir_alu_type T2 = dual ? nir_intrinsic_dest_type(instr) : 0;
 
-                /* Explicit copy since BLEND inputs are precoloured to R0-R3,
-                 * TODO: maybe schedule around this or implement in RA as a
-                 * spill */
-                bool has_mrt = (b->shader->nir->info.outputs_written >> FRAG_RESULT_DATA1);
+      /* Explicit copy since BLEND inputs are precoloured to R0-R3,
+       * TODO: maybe schedule around this or implement in RA as a
+       * spill */
+      bool has_mrt =
+         (b->shader->nir->info.outputs_written >> FRAG_RESULT_DATA1);
 
-                if (has_mrt) {
-                        bi_index srcs[4] = { color, color, color, color };
-                        unsigned channels[4] = { 0, 1, 2, 3 };
-                        color = bi_temp(b->shader);
-                        bi_make_vec_to(b, color, srcs, channels,
-                                       nir_src_num_components(instr->src[0]),
-                                       nir_alu_type_get_type_size(nir_intrinsic_src_type(instr)));
-                }
+      if (has_mrt) {
+         bi_index srcs[4] = {color, color, color, color};
+         unsigned channels[4] = {0, 1, 2, 3};
+         color = bi_temp(b->shader);
+         bi_make_vec_to(
+            b, color, srcs, channels, nir_src_num_components(instr->src[0]),
+            nir_alu_type_get_type_size(nir_intrinsic_src_type(instr)));
+      }
 
-                bi_emit_blend_op(b, color, nir_intrinsic_src_type(instr),
-                                    color2, T2, rt);
-        }
+      bi_emit_blend_op(b, color, nir_intrinsic_src_type(instr), color2, T2, rt);
+   }
 
-        if (b->shader->inputs->is_blend) {
-                /* Jump back to the fragment shader, return address is stored
-                 * in r48 (see above). On Valhall, only jump if the address is
-                 * nonzero. The check is free there and it implements the "jump
-                 * to 0 terminates the blend shader" that's automatic on
-                 * Bifrost.
-                 */
-                if (b->shader->arch >= 8)
-                        bi_branchzi(b, bi_preload(b, 48), bi_preload(b, 48), BI_CMPF_NE);
-                else
-                        bi_jump(b, bi_preload(b, 48));
-        }
+   if (b->shader->inputs->is_blend) {
+      /* Jump back to the fragment shader, return address is stored
+       * in r48 (see above). On Valhall, only jump if the address is
+       * nonzero. The check is free there and it implements the "jump
+       * to 0 terminates the blend shader" that's automatic on
+       * Bifrost.
+       */
+      if (b->shader->arch >= 8)
+         bi_branchzi(b, bi_preload(b, 48), bi_preload(b, 48), BI_CMPF_NE);
+      else
+         bi_jump(b, bi_preload(b, 48));
+   }
 }
 
 /**
@@ -958,315 +941,311 @@ bi_emit_fragment_out(bi_builder *b, nir_intrinsic_instr *instr)
 static bool
 bi_should_remove_store(nir_intrinsic_instr *intr, enum bi_idvs_mode idvs)
 {
-        nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
+   nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
 
-        switch (sem.location) {
-        case VARYING_SLOT_POS:
-        case VARYING_SLOT_PSIZ:
-                return idvs == BI_IDVS_VARYING;
-        default:
-                return idvs == BI_IDVS_POSITION;
-        }
+   switch (sem.location) {
+   case VARYING_SLOT_POS:
+   case VARYING_SLOT_PSIZ:
+      return idvs == BI_IDVS_VARYING;
+   default:
+      return idvs == BI_IDVS_POSITION;
+   }
 }
 
 static bool
 bifrost_nir_specialize_idvs(nir_builder *b, nir_instr *instr, void *data)
 {
-        enum bi_idvs_mode *idvs = data;
+   enum bi_idvs_mode *idvs = data;
 
-        if (instr->type != nir_instr_type_intrinsic)
-                return false;
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
 
-        nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 
-        if (intr->intrinsic != nir_intrinsic_store_output)
-                return false;
+   if (intr->intrinsic != nir_intrinsic_store_output)
+      return false;
 
-        if (bi_should_remove_store(intr, *idvs)) {
-                nir_instr_remove(instr);
-                return  true;
-        }
+   if (bi_should_remove_store(intr, *idvs)) {
+      nir_instr_remove(instr);
+      return true;
+   }
 
-        return false;
+   return false;
 }
 
 static void
 bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr)
 {
-        /* In principle we can do better for 16-bit. At the moment we require
-         * 32-bit to permit the use of .auto, in order to force .u32 for flat
-         * varyings, to handle internal TGSI shaders that set flat in the VS
-         * but smooth in the FS */
+   /* In principle we can do better for 16-bit. At the moment we require
+    * 32-bit to permit the use of .auto, in order to force .u32 for flat
+    * varyings, to handle internal TGSI shaders that set flat in the VS
+    * but smooth in the FS */
 
-        ASSERTED nir_alu_type T = nir_intrinsic_src_type(instr);
-        ASSERTED unsigned T_size = nir_alu_type_get_type_size(T);
-        assert(T_size == 32 || (b->shader->arch >= 9 && T_size == 16));
-        enum bi_register_format regfmt = BI_REGISTER_FORMAT_AUTO;
+   ASSERTED nir_alu_type T = nir_intrinsic_src_type(instr);
+   ASSERTED unsigned T_size = nir_alu_type_get_type_size(T);
+   assert(T_size == 32 || (b->shader->arch >= 9 && T_size == 16));
+   enum bi_register_format regfmt = BI_REGISTER_FORMAT_AUTO;
 
-        unsigned imm_index = 0;
-        bool immediate = bi_is_intr_immediate(instr, &imm_index, 16);
+   unsigned imm_index = 0;
+   bool immediate = bi_is_intr_immediate(instr, &imm_index, 16);
 
-        /* Only look at the total components needed. In effect, we fill in all
-         * the intermediate "holes" in the write mask, since we can't mask off
-         * stores. Since nir_lower_io_to_temporaries ensures each varying is
-         * written at most once, anything that's masked out is undefined, so it
-         * doesn't matter what we write there. So we may as well do the
-         * simplest thing possible. */
-        unsigned nr = util_last_bit(nir_intrinsic_write_mask(instr));
-        assert(nr > 0 && nr <= nir_intrinsic_src_components(instr, 0));
+   /* Only look at the total components needed. In effect, we fill in all
+    * the intermediate "holes" in the write mask, since we can't mask off
+    * stores. Since nir_lower_io_to_temporaries ensures each varying is
+    * written at most once, anything that's masked out is undefined, so it
+    * doesn't matter what we write there. So we may as well do the
+    * simplest thing possible. */
+   unsigned nr = util_last_bit(nir_intrinsic_write_mask(instr));
+   assert(nr > 0 && nr <= nir_intrinsic_src_components(instr, 0));
 
-        bi_index data = bi_src_index(&instr->src[0]);
+   bi_index data = bi_src_index(&instr->src[0]);
 
-        /* To keep the vector dimensions consistent, we need to drop some
-         * components. This should be coalesced.
-         *
-         * TODO: This is ugly and maybe inefficient. Would we rather
-         * introduce a TRIM.i32 pseudoinstruction?
-         */
-        if (nr < nir_intrinsic_src_components(instr, 0)) {
-                assert(T_size == 32 && "todo: 16-bit trim");
+   /* To keep the vector dimensions consistent, we need to drop some
+    * components. This should be coalesced.
+    *
+    * TODO: This is ugly and maybe inefficient. Would we rather
+    * introduce a TRIM.i32 pseudoinstruction?
+    */
+   if (nr < nir_intrinsic_src_components(instr, 0)) {
+      assert(T_size == 32 && "todo: 16-bit trim");
 
-                bi_index chans[4] = { bi_null(), bi_null(), bi_null(), bi_null() };
-                unsigned src_comps = nir_intrinsic_src_components(instr, 0);
+      bi_index chans[4] = {bi_null(), bi_null(), bi_null(), bi_null()};
+      unsigned src_comps = nir_intrinsic_src_components(instr, 0);
 
-                bi_emit_split_i32(b, chans, data, src_comps);
+      bi_emit_split_i32(b, chans, data, src_comps);
 
-                bi_index tmp = bi_temp(b->shader);
-                bi_instr *collect = bi_collect_i32_to(b, tmp, nr);
+      bi_index tmp = bi_temp(b->shader);
+      bi_instr *collect = bi_collect_i32_to(b, tmp, nr);
 
-                bi_foreach_src(collect, w)
-                        collect->src[w] = chans[w];
+      bi_foreach_src(collect, w)
+         collect->src[w] = chans[w];
 
-                data = tmp;
-        }
+      data = tmp;
+   }
 
-        bool psiz = (nir_intrinsic_io_semantics(instr).location == VARYING_SLOT_PSIZ);
+   bool psiz =
+      (nir_intrinsic_io_semantics(instr).location == VARYING_SLOT_PSIZ);
 
-        bi_index a[4] = { bi_null() };
+   bi_index a[4] = {bi_null()};
 
-        if (b->shader->arch <= 8 && b->shader->idvs == BI_IDVS_POSITION) {
-                /* Bifrost position shaders have a fast path */
-                assert(T == nir_type_float16 || T == nir_type_float32);
-                unsigned regfmt = (T == nir_type_float16) ? 0 : 1;
-                unsigned identity = (b->shader->arch == 6) ? 0x688 : 0;
-                unsigned snap4 = 0x5E;
-                uint32_t format = identity | (snap4 << 12) | (regfmt << 24);
+   if (b->shader->arch <= 8 && b->shader->idvs == BI_IDVS_POSITION) {
+      /* Bifrost position shaders have a fast path */
+      assert(T == nir_type_float16 || T == nir_type_float32);
+      unsigned regfmt = (T == nir_type_float16) ? 0 : 1;
+      unsigned identity = (b->shader->arch == 6) ? 0x688 : 0;
+      unsigned snap4 = 0x5E;
+      uint32_t format = identity | (snap4 << 12) | (regfmt << 24);
 
-                bi_st_cvt(b, data, bi_preload(b, 58), bi_preload(b, 59),
-                          bi_imm_u32(format), regfmt, nr - 1);
-        } else if (b->shader->arch >= 9 && b->shader->idvs != BI_IDVS_NONE) {
-                bi_index index = bi_preload(b, 59);
+      bi_st_cvt(b, data, bi_preload(b, 58), bi_preload(b, 59),
+                bi_imm_u32(format), regfmt, nr - 1);
+   } else if (b->shader->arch >= 9 && b->shader->idvs != BI_IDVS_NONE) {
+      bi_index index = bi_preload(b, 59);
 
-                if (psiz) {
-                        assert(T_size == 16 && "should've been lowered");
-                        index = bi_iadd_imm_i32(b, index, 4);
-                }
+      if (psiz) {
+         assert(T_size == 16 && "should've been lowered");
+         index = bi_iadd_imm_i32(b, index, 4);
+      }
 
-                bi_index address = bi_lea_buf_imm(b, index);
-                bi_emit_split_i32(b, a, address, 2);
+      bi_index address = bi_lea_buf_imm(b, index);
+      bi_emit_split_i32(b, a, address, 2);
 
-                bool varying = (b->shader->idvs == BI_IDVS_VARYING);
+      bool varying = (b->shader->idvs == BI_IDVS_VARYING);
 
-                bi_store(b, nr * nir_src_bit_size(instr->src[0]),
-                         data, a[0], a[1],
-                         varying ? BI_SEG_VARY : BI_SEG_POS,
-                         varying ? bi_varying_offset(b->shader, instr) : 0);
-        } else if (immediate) {
-                bi_index address = bi_lea_attr_imm(b,
-                                          bi_vertex_id(b), bi_instance_id(b),
-                                          regfmt, imm_index);
-                bi_emit_split_i32(b, a, address, 3);
+      bi_store(b, nr * nir_src_bit_size(instr->src[0]), data, a[0], a[1],
+               varying ? BI_SEG_VARY : BI_SEG_POS,
+               varying ? bi_varying_offset(b->shader, instr) : 0);
+   } else if (immediate) {
+      bi_index address = bi_lea_attr_imm(b, bi_vertex_id(b), bi_instance_id(b),
+                                         regfmt, imm_index);
+      bi_emit_split_i32(b, a, address, 3);
 
-                bi_st_cvt(b, data, a[0], a[1], a[2], regfmt, nr - 1);
-        } else {
-                bi_index idx =
-                        bi_iadd_u32(b,
-                                    bi_src_index(nir_get_io_offset_src(instr)),
-                                    bi_imm_u32(nir_intrinsic_base(instr)),
-                                    false);
-                bi_index address = bi_lea_attr(b,
-                                      bi_vertex_id(b), bi_instance_id(b),
-                                      idx, regfmt);
-                bi_emit_split_i32(b, a, address, 3);
+      bi_st_cvt(b, data, a[0], a[1], a[2], regfmt, nr - 1);
+   } else {
+      bi_index idx = bi_iadd_u32(b, bi_src_index(nir_get_io_offset_src(instr)),
+                                 bi_imm_u32(nir_intrinsic_base(instr)), false);
+      bi_index address =
+         bi_lea_attr(b, bi_vertex_id(b), bi_instance_id(b), idx, regfmt);
+      bi_emit_split_i32(b, a, address, 3);
 
-                bi_st_cvt(b, data, a[0], a[1], a[2], regfmt, nr - 1);
-        }
+      bi_st_cvt(b, data, a[0], a[1], a[2], regfmt, nr - 1);
+   }
 }
 
 static void
 bi_emit_load_ubo(bi_builder *b, nir_intrinsic_instr *instr)
 {
-        nir_src *offset = nir_get_io_offset_src(instr);
+   nir_src *offset = nir_get_io_offset_src(instr);
 
-        bool offset_is_const = nir_src_is_const(*offset);
-        bi_index dyn_offset = bi_src_index(offset);
-        uint32_t const_offset = offset_is_const ? nir_src_as_uint(*offset) : 0;
+   bool offset_is_const = nir_src_is_const(*offset);
+   bi_index dyn_offset = bi_src_index(offset);
+   uint32_t const_offset = offset_is_const ? nir_src_as_uint(*offset) : 0;
 
-        bi_load_ubo_to(b, instr->num_components * nir_dest_bit_size(instr->dest),
-                        bi_dest_index(&instr->dest), offset_is_const ?
-                        bi_imm_u32(const_offset) : dyn_offset,
-                        bi_src_index(&instr->src[0]));
+   bi_load_ubo_to(b, instr->num_components * nir_dest_bit_size(instr->dest),
+                  bi_dest_index(&instr->dest),
+                  offset_is_const ? bi_imm_u32(const_offset) : dyn_offset,
+                  bi_src_index(&instr->src[0]));
 }
 
 static void
 bi_emit_load_push_constant(bi_builder *b, nir_intrinsic_instr *instr)
 {
-        assert(b->shader->inputs->no_ubo_to_push && "can't mix push constant forms");
+   assert(b->shader->inputs->no_ubo_to_push && "can't mix push constant forms");
 
-        nir_src *offset = &instr->src[0];
-        assert(nir_src_is_const(*offset) && "no indirect push constants");
-        uint32_t base = nir_intrinsic_base(instr) + nir_src_as_uint(*offset);
-        assert((base & 3) == 0 && "unaligned push constants");
+   nir_src *offset = &instr->src[0];
+   assert(nir_src_is_const(*offset) && "no indirect push constants");
+   uint32_t base = nir_intrinsic_base(instr) + nir_src_as_uint(*offset);
+   assert((base & 3) == 0 && "unaligned push constants");
 
-        unsigned bits = nir_dest_bit_size(instr->dest) *
-                        nir_dest_num_components(instr->dest);
+   unsigned bits =
+      nir_dest_bit_size(instr->dest) * nir_dest_num_components(instr->dest);
 
-        unsigned n = DIV_ROUND_UP(bits, 32);
-        assert(n <= 4);
-        bi_index channels[4] = { bi_null() };
+   unsigned n = DIV_ROUND_UP(bits, 32);
+   assert(n <= 4);
+   bi_index channels[4] = {bi_null()};
 
-        for (unsigned i = 0; i < n; ++i) {
-                unsigned word = (base >> 2) + i;
+   for (unsigned i = 0; i < n; ++i) {
+      unsigned word = (base >> 2) + i;
 
-                channels[i] = bi_fau(BIR_FAU_UNIFORM | (word >> 1), word & 1);
-        }
+      channels[i] = bi_fau(BIR_FAU_UNIFORM | (word >> 1), word & 1);
+   }
 
-        bi_emit_collect_to(b, bi_dest_index(&instr->dest), channels, n);
+   bi_emit_collect_to(b, bi_dest_index(&instr->dest), channels, n);
 }
 
 static bi_index
 bi_addr_high(bi_builder *b, nir_src *src)
 {
-	return (nir_src_bit_size(*src) == 64) ?
-		bi_extract(b, bi_src_index(src), 1) : bi_zero();
+   return (nir_src_bit_size(*src) == 64) ? bi_extract(b, bi_src_index(src), 1)
+                                         : bi_zero();
 }
 
 static void
-bi_handle_segment(bi_builder *b, bi_index *addr_lo, bi_index *addr_hi, enum bi_seg seg, int16_t *offset)
+bi_handle_segment(bi_builder *b, bi_index *addr_lo, bi_index *addr_hi,
+                  enum bi_seg seg, int16_t *offset)
 {
-        /* Not needed on Bifrost or for global accesses */
-        if (b->shader->arch < 9 || seg == BI_SEG_NONE)
-                return;
+   /* Not needed on Bifrost or for global accesses */
+   if (b->shader->arch < 9 || seg == BI_SEG_NONE)
+      return;
 
-        /* There is no segment modifier on Valhall. Instead, we need to
-         * emit the arithmetic ourselves. We do have an offset
-         * available, which saves an instruction for constant offsets.
-         */
-        bool wls = (seg == BI_SEG_WLS);
-        assert(wls || (seg == BI_SEG_TL));
+   /* There is no segment modifier on Valhall. Instead, we need to
+    * emit the arithmetic ourselves. We do have an offset
+    * available, which saves an instruction for constant offsets.
+    */
+   bool wls = (seg == BI_SEG_WLS);
+   assert(wls || (seg == BI_SEG_TL));
 
-        enum bir_fau fau = wls ? BIR_FAU_WLS_PTR : BIR_FAU_TLS_PTR;
+   enum bir_fau fau = wls ? BIR_FAU_WLS_PTR : BIR_FAU_TLS_PTR;
 
-        bi_index base_lo = bi_fau(fau, false);
+   bi_index base_lo = bi_fau(fau, false);
 
-        if (offset && addr_lo->type == BI_INDEX_CONSTANT && addr_lo->value == (int16_t) addr_lo->value) {
-                *offset = addr_lo->value;
-                *addr_lo = base_lo;
-        } else {
-                *addr_lo = bi_iadd_u32(b, base_lo, *addr_lo, false);
-        }
+   if (offset && addr_lo->type == BI_INDEX_CONSTANT &&
+       addr_lo->value == (int16_t)addr_lo->value) {
+      *offset = addr_lo->value;
+      *addr_lo = base_lo;
+   } else {
+      *addr_lo = bi_iadd_u32(b, base_lo, *addr_lo, false);
+   }
 
-        /* Do not allow overflow for WLS or TLS */
-        *addr_hi = bi_fau(fau, true);
+   /* Do not allow overflow for WLS or TLS */
+   *addr_hi = bi_fau(fau, true);
 }
 
 static void
 bi_emit_load(bi_builder *b, nir_intrinsic_instr *instr, enum bi_seg seg)
 {
-        int16_t offset = 0;
-        unsigned bits = instr->num_components * nir_dest_bit_size(instr->dest);
-        bi_index dest = bi_dest_index(&instr->dest);
-        bi_index addr_lo = bi_extract(b, bi_src_index(&instr->src[0]), 0);
-        bi_index addr_hi = bi_addr_high(b, &instr->src[0]);
+   int16_t offset = 0;
+   unsigned bits = instr->num_components * nir_dest_bit_size(instr->dest);
+   bi_index dest = bi_dest_index(&instr->dest);
+   bi_index addr_lo = bi_extract(b, bi_src_index(&instr->src[0]), 0);
+   bi_index addr_hi = bi_addr_high(b, &instr->src[0]);
 
-        bi_handle_segment(b, &addr_lo, &addr_hi, seg, &offset);
+   bi_handle_segment(b, &addr_lo, &addr_hi, seg, &offset);
 
-        bi_load_to(b, bits, dest, addr_lo, addr_hi, seg, offset);
-        bi_emit_cached_split(b, dest, bits);
+   bi_load_to(b, bits, dest, addr_lo, addr_hi, seg, offset);
+   bi_emit_cached_split(b, dest, bits);
 }
 
 static void
 bi_emit_store(bi_builder *b, nir_intrinsic_instr *instr, enum bi_seg seg)
 {
-        /* Require contiguous masks, gauranteed by nir_lower_wrmasks */
-        assert(nir_intrinsic_write_mask(instr) ==
-                        BITFIELD_MASK(instr->num_components));
+   /* Require contiguous masks, gauranteed by nir_lower_wrmasks */
+   assert(nir_intrinsic_write_mask(instr) ==
+          BITFIELD_MASK(instr->num_components));
 
-        int16_t offset = 0;
-        bi_index addr_lo = bi_extract(b, bi_src_index(&instr->src[1]), 0);
-        bi_index addr_hi = bi_addr_high(b, &instr->src[1]);
+   int16_t offset = 0;
+   bi_index addr_lo = bi_extract(b, bi_src_index(&instr->src[1]), 0);
+   bi_index addr_hi = bi_addr_high(b, &instr->src[1]);
 
-        bi_handle_segment(b, &addr_lo, &addr_hi, seg, &offset);
+   bi_handle_segment(b, &addr_lo, &addr_hi, seg, &offset);
 
-        bi_store(b, instr->num_components * nir_src_bit_size(instr->src[0]),
-                 bi_src_index(&instr->src[0]),
-                 addr_lo, addr_hi, seg, offset);
+   bi_store(b, instr->num_components * nir_src_bit_size(instr->src[0]),
+            bi_src_index(&instr->src[0]), addr_lo, addr_hi, seg, offset);
 }
 
 /* Exchanges the staging register with memory */
 
 static void
-bi_emit_axchg_to(bi_builder *b, bi_index dst, bi_index addr, nir_src *arg, enum bi_seg seg)
+bi_emit_axchg_to(bi_builder *b, bi_index dst, bi_index addr, nir_src *arg,
+                 enum bi_seg seg)
 {
-        assert(seg == BI_SEG_NONE || seg == BI_SEG_WLS);
+   assert(seg == BI_SEG_NONE || seg == BI_SEG_WLS);
 
-        unsigned sz = nir_src_bit_size(*arg);
-        assert(sz == 32 || sz == 64);
+   unsigned sz = nir_src_bit_size(*arg);
+   assert(sz == 32 || sz == 64);
 
-        bi_index data = bi_src_index(arg);
+   bi_index data = bi_src_index(arg);
 
-        bi_index addr_hi = (seg == BI_SEG_WLS) ? bi_zero() : bi_extract(b, addr, 1);
+   bi_index addr_hi = (seg == BI_SEG_WLS) ? bi_zero() : bi_extract(b, addr, 1);
 
-        if (b->shader->arch >= 9)
-                bi_handle_segment(b, &addr, &addr_hi, seg, NULL);
-        else if (seg == BI_SEG_WLS)
-                addr_hi = bi_zero();
+   if (b->shader->arch >= 9)
+      bi_handle_segment(b, &addr, &addr_hi, seg, NULL);
+   else if (seg == BI_SEG_WLS)
+      addr_hi = bi_zero();
 
-        bi_axchg_to(b, sz, dst, data, bi_extract(b, addr, 0), addr_hi, seg);
+   bi_axchg_to(b, sz, dst, data, bi_extract(b, addr, 0), addr_hi, seg);
 }
 
 /* Exchanges the second staging register with memory if comparison with first
  * staging register passes */
 
 static void
-bi_emit_acmpxchg_to(bi_builder *b, bi_index dst, bi_index addr, nir_src *arg_1, nir_src *arg_2, enum bi_seg seg)
+bi_emit_acmpxchg_to(bi_builder *b, bi_index dst, bi_index addr, nir_src *arg_1,
+                    nir_src *arg_2, enum bi_seg seg)
 {
-        assert(seg == BI_SEG_NONE || seg == BI_SEG_WLS);
+   assert(seg == BI_SEG_NONE || seg == BI_SEG_WLS);
 
-        /* hardware is swapped from NIR */
-        bi_index src0 = bi_src_index(arg_2);
-        bi_index src1 = bi_src_index(arg_1);
+   /* hardware is swapped from NIR */
+   bi_index src0 = bi_src_index(arg_2);
+   bi_index src1 = bi_src_index(arg_1);
 
-        unsigned sz = nir_src_bit_size(*arg_1);
-        assert(sz == 32 || sz == 64);
+   unsigned sz = nir_src_bit_size(*arg_1);
+   assert(sz == 32 || sz == 64);
 
-        bi_index data_words[] = {
-                bi_extract(b, src0, 0),
-                sz == 32 ? bi_extract(b, src1, 0) : bi_extract(b, src0, 1),
+   bi_index data_words[] = {
+      bi_extract(b, src0, 0),
+      sz == 32 ? bi_extract(b, src1, 0) : bi_extract(b, src0, 1),
 
-                /* 64-bit */
-                bi_extract(b, src1, 0),
-                sz == 32 ? bi_extract(b, src1, 0) : bi_extract(b, src1, 1),
-        };
+      /* 64-bit */
+      bi_extract(b, src1, 0),
+      sz == 32 ? bi_extract(b, src1, 0) : bi_extract(b, src1, 1),
+   };
 
-        bi_index in = bi_temp(b->shader);
-        bi_emit_collect_to(b, in, data_words, 2 * (sz / 32));
-        bi_index addr_hi = (seg == BI_SEG_WLS) ? bi_zero() : bi_extract(b, addr, 1);
+   bi_index in = bi_temp(b->shader);
+   bi_emit_collect_to(b, in, data_words, 2 * (sz / 32));
+   bi_index addr_hi = (seg == BI_SEG_WLS) ? bi_zero() : bi_extract(b, addr, 1);
 
-        if (b->shader->arch >= 9)
-                bi_handle_segment(b, &addr, &addr_hi, seg, NULL);
-        else if (seg == BI_SEG_WLS)
-                addr_hi = bi_zero();
+   if (b->shader->arch >= 9)
+      bi_handle_segment(b, &addr, &addr_hi, seg, NULL);
+   else if (seg == BI_SEG_WLS)
+      addr_hi = bi_zero();
 
-        bi_index out = bi_acmpxchg(b, sz, in, bi_extract(b, addr, 0), addr_hi, seg);
-        bi_emit_cached_split(b, out, sz);
+   bi_index out = bi_acmpxchg(b, sz, in, bi_extract(b, addr, 0), addr_hi, seg);
+   bi_emit_cached_split(b, out, sz);
 
-        bi_index inout_words[] = {
-                bi_extract(b, out, 0),
-                sz == 64 ? bi_extract(b, out, 1) : bi_null()
-        };
+   bi_index inout_words[] = {bi_extract(b, out, 0),
+                             sz == 64 ? bi_extract(b, out, 1) : bi_null()};
 
-        bi_make_vec_to(b, dst, inout_words, NULL, sz / 32, 32);
+   bi_make_vec_to(b, dst, inout_words, NULL, sz / 32, 32);
 }
 
 /* Extracts an atomic opcode */
@@ -1274,50 +1253,50 @@ bi_emit_acmpxchg_to(bi_builder *b, bi_index dst, bi_index addr, nir_src *arg_1,
 static enum bi_atom_opc
 bi_atom_opc_for_nir(nir_intrinsic_op op)
 {
-        switch (op) {
-        case nir_intrinsic_global_atomic_add:
-        case nir_intrinsic_shared_atomic_add:
-        case nir_intrinsic_image_atomic_add:
-                return BI_ATOM_OPC_AADD;
+   switch (op) {
+   case nir_intrinsic_global_atomic_add:
+   case nir_intrinsic_shared_atomic_add:
+   case nir_intrinsic_image_atomic_add:
+      return BI_ATOM_OPC_AADD;
 
-        case nir_intrinsic_global_atomic_imin:
-        case nir_intrinsic_shared_atomic_imin:
-        case nir_intrinsic_image_atomic_imin:
-                return BI_ATOM_OPC_ASMIN;
+   case nir_intrinsic_global_atomic_imin:
+   case nir_intrinsic_shared_atomic_imin:
+   case nir_intrinsic_image_atomic_imin:
+      return BI_ATOM_OPC_ASMIN;
 
-        case nir_intrinsic_global_atomic_umin:
-        case nir_intrinsic_shared_atomic_umin:
-        case nir_intrinsic_image_atomic_umin:
-                return BI_ATOM_OPC_AUMIN;
+   case nir_intrinsic_global_atomic_umin:
+   case nir_intrinsic_shared_atomic_umin:
+   case nir_intrinsic_image_atomic_umin:
+      return BI_ATOM_OPC_AUMIN;
 
-        case nir_intrinsic_global_atomic_imax:
-        case nir_intrinsic_shared_atomic_imax:
-        case nir_intrinsic_image_atomic_imax:
-                return BI_ATOM_OPC_ASMAX;
+   case nir_intrinsic_global_atomic_imax:
+   case nir_intrinsic_shared_atomic_imax:
+   case nir_intrinsic_image_atomic_imax:
+      return BI_ATOM_OPC_ASMAX;
 
-        case nir_intrinsic_global_atomic_umax:
-        case nir_intrinsic_shared_atomic_umax:
-        case nir_intrinsic_image_atomic_umax:
-                return BI_ATOM_OPC_AUMAX;
+   case nir_intrinsic_global_atomic_umax:
+   case nir_intrinsic_shared_atomic_umax:
+   case nir_intrinsic_image_atomic_umax:
+      return BI_ATOM_OPC_AUMAX;
 
-        case nir_intrinsic_global_atomic_and:
-        case nir_intrinsic_shared_atomic_and:
-        case nir_intrinsic_image_atomic_and:
-                return BI_ATOM_OPC_AAND;
+   case nir_intrinsic_global_atomic_and:
+   case nir_intrinsic_shared_atomic_and:
+   case nir_intrinsic_image_atomic_and:
+      return BI_ATOM_OPC_AAND;
 
-        case nir_intrinsic_global_atomic_or:
-        case nir_intrinsic_shared_atomic_or:
-        case nir_intrinsic_image_atomic_or:
-                return BI_ATOM_OPC_AOR;
+   case nir_intrinsic_global_atomic_or:
+   case nir_intrinsic_shared_atomic_or:
+   case nir_intrinsic_image_atomic_or:
+      return BI_ATOM_OPC_AOR;
 
-        case nir_intrinsic_global_atomic_xor:
-        case nir_intrinsic_shared_atomic_xor:
-        case nir_intrinsic_image_atomic_xor:
-                return BI_ATOM_OPC_AXOR;
+   case nir_intrinsic_global_atomic_xor:
+   case nir_intrinsic_shared_atomic_xor:
+   case nir_intrinsic_image_atomic_xor:
+      return BI_ATOM_OPC_AXOR;
 
-        default:
-                unreachable("Unexpected computational atomic");
-        }
+   default:
+      unreachable("Unexpected computational atomic");
+   }
 }
 
 /* Optimized unary atomics are available with an implied #1 argument */
@@ -1325,30 +1304,30 @@ bi_atom_opc_for_nir(nir_intrinsic_op op)
 static bool
 bi_promote_atom_c1(enum bi_atom_opc op, bi_index arg, enum bi_atom_opc *out)
 {
-        /* Check we have a compatible constant */
-        if (arg.type != BI_INDEX_CONSTANT)
-                return false;
+   /* Check we have a compatible constant */
+   if (arg.type != BI_INDEX_CONSTANT)
+      return false;
 
-        if (!(arg.value == 1 || (arg.value == -1 && op == BI_ATOM_OPC_AADD)))
-                return false;
+   if (!(arg.value == 1 || (arg.value == -1 && op == BI_ATOM_OPC_AADD)))
+      return false;
 
-        /* Check for a compatible operation */
-        switch (op) {
-        case BI_ATOM_OPC_AADD:
-                *out = (arg.value == 1) ? BI_ATOM_OPC_AINC : BI_ATOM_OPC_ADEC;
-                return true;
-        case BI_ATOM_OPC_ASMAX:
-                *out = BI_ATOM_OPC_ASMAX1;
-                return true;
-        case BI_ATOM_OPC_AUMAX:
-                *out = BI_ATOM_OPC_AUMAX1;
-                return true;
-        case BI_ATOM_OPC_AOR:
-                *out = BI_ATOM_OPC_AOR1;
-                return true;
-        default:
-                return false;
-        }
+   /* Check for a compatible operation */
+   switch (op) {
+   case BI_ATOM_OPC_AADD:
+      *out = (arg.value == 1) ? BI_ATOM_OPC_AINC : BI_ATOM_OPC_ADEC;
+      return true;
+   case BI_ATOM_OPC_ASMAX:
+      *out = BI_ATOM_OPC_ASMAX1;
+      return true;
+   case BI_ATOM_OPC_AUMAX:
+      *out = BI_ATOM_OPC_AUMAX1;
+      return true;
+   case BI_ATOM_OPC_AOR:
+      *out = BI_ATOM_OPC_AOR1;
+      return true;
+   default:
+      return false;
+   }
 }
 
 /*
@@ -1364,172 +1343,173 @@ static bi_index
 bi_emit_image_coord(bi_builder *b, bi_index coord, unsigned src_idx,
                     unsigned coord_comps, bool is_array)
 {
-        assert(coord_comps > 0 && coord_comps <= 3);
+   assert(coord_comps > 0 && coord_comps <= 3);
 
-        if (src_idx == 0) {
-                if (coord_comps == 1 || (coord_comps == 2 && is_array))
-                        return bi_extract(b, coord, 0);
-                else
-                        return bi_mkvec_v2i16(b,
-                                              bi_half(bi_extract(b, coord, 0), false),
-                                              bi_half(bi_extract(b, coord, 1), false));
-        } else {
-                if (coord_comps == 3 && b->shader->arch >= 9)
-                        return bi_mkvec_v2i16(b, bi_imm_u16(0),
-                                              bi_half(bi_extract(b, coord, 2), false));
-                else if (coord_comps == 2 && is_array && b->shader->arch >= 9)
-                        return bi_mkvec_v2i16(b, bi_imm_u16(0),
-                                                 bi_half(bi_extract(b, coord, 1), false));
-                else if (coord_comps == 3)
-                        return bi_extract(b, coord, 2);
-                else if (coord_comps == 2 && is_array)
-                        return bi_extract(b, coord, 1);
-                else
-                        return bi_zero();
-        }
+   if (src_idx == 0) {
+      if (coord_comps == 1 || (coord_comps == 2 && is_array))
+         return bi_extract(b, coord, 0);
+      else
+         return bi_mkvec_v2i16(b, bi_half(bi_extract(b, coord, 0), false),
+                               bi_half(bi_extract(b, coord, 1), false));
+   } else {
+      if (coord_comps == 3 && b->shader->arch >= 9)
+         return bi_mkvec_v2i16(b, bi_imm_u16(0),
+                               bi_half(bi_extract(b, coord, 2), false));
+      else if (coord_comps == 2 && is_array && b->shader->arch >= 9)
+         return bi_mkvec_v2i16(b, bi_imm_u16(0),
+                               bi_half(bi_extract(b, coord, 1), false));
+      else if (coord_comps == 3)
+         return bi_extract(b, coord, 2);
+      else if (coord_comps == 2 && is_array)
+         return bi_extract(b, coord, 1);
+      else
+         return bi_zero();
+   }
 }
 
 static bi_index
 bi_emit_image_index(bi_builder *b, nir_intrinsic_instr *instr)
 {
-        nir_src src = instr->src[0];
-        bi_index index = bi_src_index(&src);
-        bi_context *ctx = b->shader;
+   nir_src src = instr->src[0];
+   bi_index index = bi_src_index(&src);
+   bi_context *ctx = b->shader;
 
-        /* Images come after vertex attributes, so handle an explicit offset */
-        unsigned offset = (ctx->stage == MESA_SHADER_VERTEX) ?
-                util_bitcount64(ctx->nir->info.inputs_read) : 0;
+   /* Images come after vertex attributes, so handle an explicit offset */
+   unsigned offset = (ctx->stage == MESA_SHADER_VERTEX)
+                        ? util_bitcount64(ctx->nir->info.inputs_read)
+                        : 0;
 
-        if (offset == 0)
-                return index;
-        else if (nir_src_is_const(src))
-                return bi_imm_u32(nir_src_as_uint(src) + offset);
-        else
-                return bi_iadd_u32(b, index, bi_imm_u32(offset), false);
+   if (offset == 0)
+      return index;
+   else if (nir_src_is_const(src))
+      return bi_imm_u32(nir_src_as_uint(src) + offset);
+   else
+      return bi_iadd_u32(b, index, bi_imm_u32(offset), false);
 }
 
 static void
 bi_emit_image_load(bi_builder *b, nir_intrinsic_instr *instr)
 {
-        enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
-        unsigned coord_comps = nir_image_intrinsic_coord_components(instr);
-        bool array = nir_intrinsic_image_array(instr);
-        ASSERTED unsigned nr_dim = glsl_get_sampler_dim_coordinate_components(dim);
+   enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
+   unsigned coord_comps = nir_image_intrinsic_coord_components(instr);
+   bool array = nir_intrinsic_image_array(instr);
+   ASSERTED unsigned nr_dim = glsl_get_sampler_dim_coordinate_components(dim);
 
-        bi_index coords = bi_src_index(&instr->src[1]);
-        bi_index xy = bi_emit_image_coord(b, coords, 0, coord_comps, array);
-        bi_index zw = bi_emit_image_coord(b, coords, 1, coord_comps, array);
-        bi_index dest = bi_dest_index(&instr->dest);
-        enum bi_register_format regfmt = bi_reg_fmt_for_nir(nir_intrinsic_dest_type(instr));
-        enum bi_vecsize vecsize = instr->num_components - 1;
+   bi_index coords = bi_src_index(&instr->src[1]);
+   bi_index xy = bi_emit_image_coord(b, coords, 0, coord_comps, array);
+   bi_index zw = bi_emit_image_coord(b, coords, 1, coord_comps, array);
+   bi_index dest = bi_dest_index(&instr->dest);
+   enum bi_register_format regfmt =
+      bi_reg_fmt_for_nir(nir_intrinsic_dest_type(instr));
+   enum bi_vecsize vecsize = instr->num_components - 1;
 
-        /* TODO: MSAA */
-        assert(nr_dim != GLSL_SAMPLER_DIM_MS && "MSAA'd images not supported");
+   /* TODO: MSAA */
+   assert(nr_dim != GLSL_SAMPLER_DIM_MS && "MSAA'd images not supported");
 
-        if (b->shader->arch >= 9 && nir_src_is_const(instr->src[0])) {
-                bi_instr *I = bi_ld_tex_imm_to(b, dest, xy, zw, regfmt, vecsize,
-                                                nir_src_as_uint(instr->src[0]));
+   if (b->shader->arch >= 9 && nir_src_is_const(instr->src[0])) {
+      bi_instr *I = bi_ld_tex_imm_to(b, dest, xy, zw, regfmt, vecsize,
+                                     nir_src_as_uint(instr->src[0]));
 
-                I->table = PAN_TABLE_IMAGE;
-        } else if (b->shader->arch >= 9) {
-                unreachable("Indirect images on Valhall not yet supported");
-        } else {
-                bi_ld_attr_tex_to(b, dest, xy, zw,
-                                  bi_emit_image_index(b, instr), regfmt,
-                                  vecsize);
-        }
+      I->table = PAN_TABLE_IMAGE;
+   } else if (b->shader->arch >= 9) {
+      unreachable("Indirect images on Valhall not yet supported");
+   } else {
+      bi_ld_attr_tex_to(b, dest, xy, zw, bi_emit_image_index(b, instr), regfmt,
+                        vecsize);
+   }
 
-        bi_split_dest(b, instr->dest);
+   bi_split_dest(b, instr->dest);
 }
 
 static bi_index
 bi_emit_lea_image(bi_builder *b, nir_intrinsic_instr *instr)
 {
-        enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
-        bool array = nir_intrinsic_image_array(instr);
-        ASSERTED unsigned nr_dim = glsl_get_sampler_dim_coordinate_components(dim);
-        unsigned coord_comps = nir_image_intrinsic_coord_components(instr);
+   enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
+   bool array = nir_intrinsic_image_array(instr);
+   ASSERTED unsigned nr_dim = glsl_get_sampler_dim_coordinate_components(dim);
+   unsigned coord_comps = nir_image_intrinsic_coord_components(instr);
 
-        /* TODO: MSAA */
-        assert(nr_dim != GLSL_SAMPLER_DIM_MS && "MSAA'd images not supported");
+   /* TODO: MSAA */
+   assert(nr_dim != GLSL_SAMPLER_DIM_MS && "MSAA'd images not supported");
 
-        enum bi_register_format type = (instr->intrinsic == nir_intrinsic_image_store) ?
-                bi_reg_fmt_for_nir(nir_intrinsic_src_type(instr)) :
-                BI_REGISTER_FORMAT_AUTO;
+   enum bi_register_format type =
+      (instr->intrinsic == nir_intrinsic_image_store)
+         ? bi_reg_fmt_for_nir(nir_intrinsic_src_type(instr))
+         : BI_REGISTER_FORMAT_AUTO;
 
-        bi_index coords = bi_src_index(&instr->src[1]);
-        bi_index xy = bi_emit_image_coord(b, coords, 0, coord_comps, array);
-        bi_index zw = bi_emit_image_coord(b, coords, 1, coord_comps, array);
-        bi_index dest = bi_temp(b->shader);
+   bi_index coords = bi_src_index(&instr->src[1]);
+   bi_index xy = bi_emit_image_coord(b, coords, 0, coord_comps, array);
+   bi_index zw = bi_emit_image_coord(b, coords, 1, coord_comps, array);
+   bi_index dest = bi_temp(b->shader);
 
-        if (b->shader->arch >= 9 && nir_src_is_const(instr->src[0])) {
-                bi_instr *I = bi_lea_tex_imm_to(b, dest, xy, zw, false,
-                                                nir_src_as_uint(instr->src[0]));
+   if (b->shader->arch >= 9 && nir_src_is_const(instr->src[0])) {
+      bi_instr *I = bi_lea_tex_imm_to(b, dest, xy, zw, false,
+                                      nir_src_as_uint(instr->src[0]));
 
-                I->table = PAN_TABLE_IMAGE;
-        } else if (b->shader->arch >= 9) {
-                unreachable("Indirect images on Valhall not yet supported");
-        } else {
-                bi_instr *I = bi_lea_attr_tex_to(b, dest, xy, zw,
-                                bi_emit_image_index(b, instr), type);
+      I->table = PAN_TABLE_IMAGE;
+   } else if (b->shader->arch >= 9) {
+      unreachable("Indirect images on Valhall not yet supported");
+   } else {
+      bi_instr *I = bi_lea_attr_tex_to(b, dest, xy, zw,
+                                       bi_emit_image_index(b, instr), type);
 
-                /* LEA_ATTR_TEX defaults to the secondary attribute table, but
-                 * our ABI has all images in the primary attribute table
-                 */
-                I->table = BI_TABLE_ATTRIBUTE_1;
-        }
+      /* LEA_ATTR_TEX defaults to the secondary attribute table, but
+       * our ABI has all images in the primary attribute table
+       */
+      I->table = BI_TABLE_ATTRIBUTE_1;
+   }
 
-        bi_emit_cached_split(b, dest, 3 * 32);
-        return dest;
+   bi_emit_cached_split(b, dest, 3 * 32);
+   return dest;
 }
 
 static void
 bi_emit_image_store(bi_builder *b, nir_intrinsic_instr *instr)
 {
-        bi_index a[4] = { bi_null() };
-        bi_emit_split_i32(b, a, bi_emit_lea_image(b, instr), 3);
+   bi_index a[4] = {bi_null()};
+   bi_emit_split_i32(b, a, bi_emit_lea_image(b, instr), 3);
 
-        /* Due to SPIR-V limitations, the source type is not fully reliable: it
-         * reports uint32 even for write_imagei. This causes an incorrect
-         * u32->s32->u32 roundtrip which incurs an unwanted clamping. Use auto32
-         * instead, which will match per the OpenCL spec. Of course this does
-         * not work for 16-bit stores, but those are not available in OpenCL.
-         */
-        nir_alu_type T = nir_intrinsic_src_type(instr);
-        assert(nir_alu_type_get_type_size(T) == 32);
+   /* Due to SPIR-V limitations, the source type is not fully reliable: it
+    * reports uint32 even for write_imagei. This causes an incorrect
+    * u32->s32->u32 roundtrip which incurs an unwanted clamping. Use auto32
+    * instead, which will match per the OpenCL spec. Of course this does
+    * not work for 16-bit stores, but those are not available in OpenCL.
+    */
+   nir_alu_type T = nir_intrinsic_src_type(instr);
+   assert(nir_alu_type_get_type_size(T) == 32);
 
-        bi_st_cvt(b, bi_src_index(&instr->src[3]), a[0], a[1], a[2],
-                     BI_REGISTER_FORMAT_AUTO,
-                     instr->num_components - 1);
+   bi_st_cvt(b, bi_src_index(&instr->src[3]), a[0], a[1], a[2],
+             BI_REGISTER_FORMAT_AUTO, instr->num_components - 1);
 }
 
 static void
-bi_emit_atomic_i32_to(bi_builder *b, bi_index dst,
-                bi_index addr, bi_index arg, nir_intrinsic_op intrinsic)
+bi_emit_atomic_i32_to(bi_builder *b, bi_index dst, bi_index addr, bi_index arg,
+                      nir_intrinsic_op intrinsic)
 {
-        enum bi_atom_opc opc = bi_atom_opc_for_nir(intrinsic);
-        enum bi_atom_opc post_opc = opc;
-        bool bifrost = b->shader->arch <= 8;
+   enum bi_atom_opc opc = bi_atom_opc_for_nir(intrinsic);
+   enum bi_atom_opc post_opc = opc;
+   bool bifrost = b->shader->arch <= 8;
 
-        /* ATOM_C.i32 takes a vector with {arg, coalesced}, ATOM_C1.i32 doesn't
-         * take any vector but can still output in RETURN mode */
-        bi_index tmp_dest = bifrost ? bi_temp(b->shader) : dst;
-        unsigned sr_count = bifrost ? 2 : 1;
+   /* ATOM_C.i32 takes a vector with {arg, coalesced}, ATOM_C1.i32 doesn't
+    * take any vector but can still output in RETURN mode */
+   bi_index tmp_dest = bifrost ? bi_temp(b->shader) : dst;
+   unsigned sr_count = bifrost ? 2 : 1;
 
-        /* Generate either ATOM or ATOM1 as required */
-        if (bi_promote_atom_c1(opc, arg, &opc)) {
-                bi_atom1_return_i32_to(b, tmp_dest, bi_extract(b, addr, 0),
-                                       bi_extract(b, addr, 1), opc, sr_count);
-        } else {
-                bi_atom_return_i32_to(b, tmp_dest, arg, bi_extract(b, addr, 0),
-                                      bi_extract(b, addr, 1), opc, sr_count);
-        }
+   /* Generate either ATOM or ATOM1 as required */
+   if (bi_promote_atom_c1(opc, arg, &opc)) {
+      bi_atom1_return_i32_to(b, tmp_dest, bi_extract(b, addr, 0),
+                             bi_extract(b, addr, 1), opc, sr_count);
+   } else {
+      bi_atom_return_i32_to(b, tmp_dest, arg, bi_extract(b, addr, 0),
+                            bi_extract(b, addr, 1), opc, sr_count);
+   }
 
-        if (bifrost) {
-                /* Post-process it */
-                bi_emit_cached_split_i32(b, tmp_dest, 2);
-                bi_atom_post_i32_to(b, dst, bi_extract(b, tmp_dest, 0), bi_extract(b, tmp_dest, 1), post_opc);
-        }
+   if (bifrost) {
+      /* Post-process it */
+      bi_emit_cached_split_i32(b, tmp_dest, 2);
+      bi_atom_post_i32_to(b, dst, bi_extract(b, tmp_dest, 0),
+                          bi_extract(b, tmp_dest, 1), post_opc);
+   }
 }
 
 /* gl_FragCoord.xy = u16_to_f32(R59.xy) + 0.5
@@ -1540,475 +1520,474 @@ bi_emit_atomic_i32_to(bi_builder *b, bi_index dst,
 static void
 bi_emit_load_frag_coord(bi_builder *b, nir_intrinsic_instr *instr)
 {
-        bi_index src[4] = {};
+   bi_index src[4] = {};
 
-        for (unsigned i = 0; i < 2; ++i) {
-                src[i] = bi_fadd_f32(b,
-                                bi_u16_to_f32(b, bi_half(bi_preload(b, 59), i)),
-                                bi_imm_f32(0.5f));
-        }
+   for (unsigned i = 0; i < 2; ++i) {
+      src[i] = bi_fadd_f32(b, bi_u16_to_f32(b, bi_half(bi_preload(b, 59), i)),
+                           bi_imm_f32(0.5f));
+   }
 
-        for (unsigned i = 0; i < 2; ++i) {
-                src[2 + i] = bi_ld_var_special(b, bi_zero(),
-                                BI_REGISTER_FORMAT_F32, BI_SAMPLE_CENTER,
-                                BI_UPDATE_CLOBBER,
-                                (i == 0) ? BI_VARYING_NAME_FRAG_Z :
-                                        BI_VARYING_NAME_FRAG_W,
-                                BI_VECSIZE_NONE);
-        }
+   for (unsigned i = 0; i < 2; ++i) {
+      src[2 + i] = bi_ld_var_special(
+         b, bi_zero(), BI_REGISTER_FORMAT_F32, BI_SAMPLE_CENTER,
+         BI_UPDATE_CLOBBER,
+         (i == 0) ? BI_VARYING_NAME_FRAG_Z : BI_VARYING_NAME_FRAG_W,
+         BI_VECSIZE_NONE);
+   }
 
-        bi_make_vec_to(b, bi_dest_index(&instr->dest), src, NULL, 4, 32);
+   bi_make_vec_to(b, bi_dest_index(&instr->dest), src, NULL, 4, 32);
 }
 
 static void
 bi_emit_ld_tile(bi_builder *b, nir_intrinsic_instr *instr)
 {
-        bi_index dest = bi_dest_index(&instr->dest);
-        nir_alu_type T = nir_intrinsic_dest_type(instr);
-        enum bi_register_format regfmt = bi_reg_fmt_for_nir(T);
-        unsigned rt = b->shader->inputs->blend.rt;
-        unsigned size = nir_dest_bit_size(instr->dest);
-        unsigned nr = instr->num_components;
+   bi_index dest = bi_dest_index(&instr->dest);
+   nir_alu_type T = nir_intrinsic_dest_type(instr);
+   enum bi_register_format regfmt = bi_reg_fmt_for_nir(T);
+   unsigned rt = b->shader->inputs->blend.rt;
+   unsigned size = nir_dest_bit_size(instr->dest);
+   unsigned nr = instr->num_components;
 
-        /* Get the render target */
-        if (!b->shader->inputs->is_blend) {
-                nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
-                unsigned loc = sem.location;
-                assert(loc >= FRAG_RESULT_DATA0);
-                rt = (loc - FRAG_RESULT_DATA0);
-        }
+   /* Get the render target */
+   if (!b->shader->inputs->is_blend) {
+      nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
+      unsigned loc = sem.location;
+      assert(loc >= FRAG_RESULT_DATA0);
+      rt = (loc - FRAG_RESULT_DATA0);
+   }
 
-        bi_index desc = b->shader->inputs->is_blend ?
-                bi_imm_u32(b->shader->inputs->blend.bifrost_blend_desc >> 32) :
-                b->shader->inputs->bifrost.static_rt_conv ?
-                bi_imm_u32(b->shader->inputs->bifrost.rt_conv[rt]) :
-                bi_load_sysval(b, PAN_SYSVAL(RT_CONVERSION, rt | (size << 4)), 1, 0);
+   bi_index desc =
+      b->shader->inputs->is_blend
+         ? bi_imm_u32(b->shader->inputs->blend.bifrost_blend_desc >> 32)
+      : b->shader->inputs->bifrost.static_rt_conv
+         ? bi_imm_u32(b->shader->inputs->bifrost.rt_conv[rt])
+         : bi_load_sysval(b, PAN_SYSVAL(RT_CONVERSION, rt | (size << 4)), 1, 0);
 
-        bi_ld_tile_to(b, dest, bi_pixel_indices(b, rt), bi_coverage(b), desc,
-                      regfmt, nr - 1);
-        bi_emit_cached_split(b, dest, size * nr);
+   bi_ld_tile_to(b, dest, bi_pixel_indices(b, rt), bi_coverage(b), desc, regfmt,
+                 nr - 1);
+   bi_emit_cached_split(b, dest, size * nr);
 }
 
 static void
 bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
 {
-        bi_index dst = nir_intrinsic_infos[instr->intrinsic].has_dest ?
-                bi_dest_index(&instr->dest) : bi_null();
-        gl_shader_stage stage = b->shader->stage;
+   bi_index dst = nir_intrinsic_infos[instr->intrinsic].has_dest
+                     ? bi_dest_index(&instr->dest)
+                     : bi_null();
+   gl_shader_stage stage = b->shader->stage;
 
-        switch (instr->intrinsic) {
-        case nir_intrinsic_load_barycentric_pixel:
-        case nir_intrinsic_load_barycentric_centroid:
-        case nir_intrinsic_load_barycentric_sample:
-        case nir_intrinsic_load_barycentric_at_sample:
-        case nir_intrinsic_load_barycentric_at_offset:
-                /* handled later via load_vary */
-                break;
-        case nir_intrinsic_load_interpolated_input:
-        case nir_intrinsic_load_input:
-                if (b->shader->inputs->is_blend)
-                        bi_emit_load_blend_input(b, instr);
-                else if (stage == MESA_SHADER_FRAGMENT)
-                        bi_emit_load_vary(b, instr);
-                else if (stage == MESA_SHADER_VERTEX)
-                        bi_emit_load_attr(b, instr);
-                else
-                        unreachable("Unsupported shader stage");
-                break;
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_barycentric_pixel:
+   case nir_intrinsic_load_barycentric_centroid:
+   case nir_intrinsic_load_barycentric_sample:
+   case nir_intrinsic_load_barycentric_at_sample:
+   case nir_intrinsic_load_barycentric_at_offset:
+      /* handled later via load_vary */
+      break;
+   case nir_intrinsic_load_interpolated_input:
+   case nir_intrinsic_load_input:
+      if (b->shader->inputs->is_blend)
+         bi_emit_load_blend_input(b, instr);
+      else if (stage == MESA_SHADER_FRAGMENT)
+         bi_emit_load_vary(b, instr);
+      else if (stage == MESA_SHADER_VERTEX)
+         bi_emit_load_attr(b, instr);
+      else
+         unreachable("Unsupported shader stage");
+      break;
 
-        case nir_intrinsic_store_output:
-                if (stage == MESA_SHADER_FRAGMENT)
-                        bi_emit_fragment_out(b, instr);
-                else if (stage == MESA_SHADER_VERTEX)
-                        bi_emit_store_vary(b, instr);
-                else
-                        unreachable("Unsupported shader stage");
-                break;
+   case nir_intrinsic_store_output:
+      if (stage == MESA_SHADER_FRAGMENT)
+         bi_emit_fragment_out(b, instr);
+      else if (stage == MESA_SHADER_VERTEX)
+         bi_emit_store_vary(b, instr);
+      else
+         unreachable("Unsupported shader stage");
+      break;
 
-        case nir_intrinsic_store_combined_output_pan:
-                assert(stage == MESA_SHADER_FRAGMENT);
-                bi_emit_fragment_out(b, instr);
-                break;
+   case nir_intrinsic_store_combined_output_pan:
+      assert(stage == MESA_SHADER_FRAGMENT);
+      bi_emit_fragment_out(b, instr);
+      break;
 
-        case nir_intrinsic_load_ubo:
-                bi_emit_load_ubo(b, instr);
-                break;
+   case nir_intrinsic_load_ubo:
+      bi_emit_load_ubo(b, instr);
+      break;
 
-        case nir_intrinsic_load_push_constant:
-                bi_emit_load_push_constant(b, instr);
-                break;
+   case nir_intrinsic_load_push_constant:
+      bi_emit_load_push_constant(b, instr);
+      break;
 
-        case nir_intrinsic_load_global:
-        case nir_intrinsic_load_global_constant:
-                bi_emit_load(b, instr, BI_SEG_NONE);
-                break;
+   case nir_intrinsic_load_global:
+   case nir_intrinsic_load_global_constant:
+      bi_emit_load(b, instr, BI_SEG_NONE);
+      break;
 
-        case nir_intrinsic_store_global:
-                bi_emit_store(b, instr, BI_SEG_NONE);
-                break;
+   case nir_intrinsic_store_global:
+      bi_emit_store(b, instr, BI_SEG_NONE);
+      break;
 
-        case nir_intrinsic_load_scratch:
-                bi_emit_load(b, instr, BI_SEG_TL);
-                break;
+   case nir_intrinsic_load_scratch:
+      bi_emit_load(b, instr, BI_SEG_TL);
+      break;
 
-        case nir_intrinsic_store_scratch:
-                bi_emit_store(b, instr, BI_SEG_TL);
-                break;
+   case nir_intrinsic_store_scratch:
+      bi_emit_store(b, instr, BI_SEG_TL);
+      break;
 
-        case nir_intrinsic_load_shared:
-                bi_emit_load(b, instr, BI_SEG_WLS);
-                break;
+   case nir_intrinsic_load_shared:
+      bi_emit_load(b, instr, BI_SEG_WLS);
+      break;
 
-        case nir_intrinsic_store_shared:
-                bi_emit_store(b, instr, BI_SEG_WLS);
-                break;
+   case nir_intrinsic_store_shared:
+      bi_emit_store(b, instr, BI_SEG_WLS);
+      break;
 
-        /* Blob doesn't seem to do anything for memory barriers, note +BARRIER
-         * is illegal in fragment shaders */
-        case nir_intrinsic_memory_barrier:
-        case nir_intrinsic_memory_barrier_buffer:
-        case nir_intrinsic_memory_barrier_image:
-        case nir_intrinsic_memory_barrier_shared:
-        case nir_intrinsic_group_memory_barrier:
-                break;
+   /* Blob doesn't seem to do anything for memory barriers, note +BARRIER
+    * is illegal in fragment shaders */
+   case nir_intrinsic_memory_barrier:
+   case nir_intrinsic_memory_barrier_buffer:
+   case nir_intrinsic_memory_barrier_image:
+   case nir_intrinsic_memory_barrier_shared:
+   case nir_intrinsic_group_memory_barrier:
+      break;
 
-        case nir_intrinsic_control_barrier:
-                assert(b->shader->stage != MESA_SHADER_FRAGMENT);
-                bi_barrier(b);
-                break;
+   case nir_intrinsic_control_barrier:
+      assert(b->shader->stage != MESA_SHADER_FRAGMENT);
+      bi_barrier(b);
+      break;
 
-        case nir_intrinsic_scoped_barrier:
-                assert(b->shader->stage != MESA_SHADER_FRAGMENT);
-                assert(nir_intrinsic_memory_scope(instr) > NIR_SCOPE_SUBGROUP &&
-                       "todo: subgroup barriers (different divergence rules)");
+   case nir_intrinsic_scoped_barrier:
+      assert(b->shader->stage != MESA_SHADER_FRAGMENT);
+      assert(nir_intrinsic_memory_scope(instr) > NIR_SCOPE_SUBGROUP &&
+             "todo: subgroup barriers (different divergence rules)");
 
-                bi_barrier(b);
-                break;
+      bi_barrier(b);
+      break;
 
-        case nir_intrinsic_shared_atomic_add:
-        case nir_intrinsic_shared_atomic_imin:
-        case nir_intrinsic_shared_atomic_umin:
-        case nir_intrinsic_shared_atomic_imax:
-        case nir_intrinsic_shared_atomic_umax:
-        case nir_intrinsic_shared_atomic_and:
-        case nir_intrinsic_shared_atomic_or:
-        case nir_intrinsic_shared_atomic_xor: {
-                assert(nir_src_bit_size(instr->src[1]) == 32);
+   case nir_intrinsic_shared_atomic_add:
+   case nir_intrinsic_shared_atomic_imin:
+   case nir_intrinsic_shared_atomic_umin:
+   case nir_intrinsic_shared_atomic_imax:
+   case nir_intrinsic_shared_atomic_umax:
+   case nir_intrinsic_shared_atomic_and:
+   case nir_intrinsic_shared_atomic_or:
+   case nir_intrinsic_shared_atomic_xor: {
+      assert(nir_src_bit_size(instr->src[1]) == 32);
 
-                bi_index addr = bi_src_index(&instr->src[0]);
-                bi_index addr_hi;
+      bi_index addr = bi_src_index(&instr->src[0]);
+      bi_index addr_hi;
 
-                if (b->shader->arch >= 9) {
-                        bi_handle_segment(b, &addr, &addr_hi, BI_SEG_WLS, NULL);
-                        addr = bi_collect_v2i32(b, addr, addr_hi);
-                } else {
-                        addr = bi_seg_add_i64(b, addr, bi_zero(), false, BI_SEG_WLS);
-                        bi_emit_cached_split(b, addr, 64);
-                }
+      if (b->shader->arch >= 9) {
+         bi_handle_segment(b, &addr, &addr_hi, BI_SEG_WLS, NULL);
+         addr = bi_collect_v2i32(b, addr, addr_hi);
+      } else {
+         addr = bi_seg_add_i64(b, addr, bi_zero(), false, BI_SEG_WLS);
+         bi_emit_cached_split(b, addr, 64);
+      }
 
-                bi_emit_atomic_i32_to(b, dst, addr, bi_src_index(&instr->src[1]),
-                                instr->intrinsic);
-                bi_split_dest(b, instr->dest);
-                break;
-        }
+      bi_emit_atomic_i32_to(b, dst, addr, bi_src_index(&instr->src[1]),
+                            instr->intrinsic);
+      bi_split_dest(b, instr->dest);
+      break;
+   }
 
-        case nir_intrinsic_image_atomic_add:
-        case nir_intrinsic_image_atomic_imin:
-        case nir_intrinsic_image_atomic_umin:
-        case nir_intrinsic_image_atomic_imax:
-        case nir_intrinsic_image_atomic_umax:
-        case nir_intrinsic_image_atomic_and:
-        case nir_intrinsic_image_atomic_or:
-        case nir_intrinsic_image_atomic_xor:
-                assert(nir_src_bit_size(instr->src[3]) == 32);
+   case nir_intrinsic_image_atomic_add:
+   case nir_intrinsic_image_atomic_imin:
+   case nir_intrinsic_image_atomic_umin:
+   case nir_intrinsic_image_atomic_imax:
+   case nir_intrinsic_image_atomic_umax:
+   case nir_intrinsic_image_atomic_and:
+   case nir_intrinsic_image_atomic_or:
+   case nir_intrinsic_image_atomic_xor:
+      assert(nir_src_bit_size(instr->src[3]) == 32);
 
-                bi_emit_atomic_i32_to(b, dst,
-                                bi_emit_lea_image(b, instr),
-                                bi_src_index(&instr->src[3]),
-                                instr->intrinsic);
-                bi_split_dest(b, instr->dest);
-                break;
+      bi_emit_atomic_i32_to(b, dst, bi_emit_lea_image(b, instr),
+                            bi_src_index(&instr->src[3]), instr->intrinsic);
+      bi_split_dest(b, instr->dest);
+      break;
 
-        case nir_intrinsic_global_atomic_add:
-        case nir_intrinsic_global_atomic_imin:
-        case nir_intrinsic_global_atomic_umin:
-        case nir_intrinsic_global_atomic_imax:
-        case nir_intrinsic_global_atomic_umax:
-        case nir_intrinsic_global_atomic_and:
-        case nir_intrinsic_global_atomic_or:
-        case nir_intrinsic_global_atomic_xor:
-                assert(nir_src_bit_size(instr->src[1]) == 32);
+   case nir_intrinsic_global_atomic_add:
+   case nir_intrinsic_global_atomic_imin:
+   case nir_intrinsic_global_atomic_umin:
+   case nir_intrinsic_global_atomic_imax:
+   case nir_intrinsic_global_atomic_umax:
+   case nir_intrinsic_global_atomic_and:
+   case nir_intrinsic_global_atomic_or:
+   case nir_intrinsic_global_atomic_xor:
+      assert(nir_src_bit_size(instr->src[1]) == 32);
 
-                bi_emit_atomic_i32_to(b, dst,
-                                bi_src_index(&instr->src[0]),
-                                bi_src_index(&instr->src[1]),
-                                instr->intrinsic);
+      bi_emit_atomic_i32_to(b, dst, bi_src_index(&instr->src[0]),
+                            bi_src_index(&instr->src[1]), instr->intrinsic);
 
-                bi_split_dest(b, instr->dest);
-                break;
+      bi_split_dest(b, instr->dest);
+      break;
 
-        case nir_intrinsic_image_load:
-                bi_emit_image_load(b, instr);
-                break;
+   case nir_intrinsic_image_load:
+      bi_emit_image_load(b, instr);
+      break;
 
-        case nir_intrinsic_image_store:
-                bi_emit_image_store(b, instr);
-                break;
+   case nir_intrinsic_image_store:
+      bi_emit_image_store(b, instr);
+      break;
 
-        case nir_intrinsic_global_atomic_exchange:
-                bi_emit_axchg_to(b, dst, bi_src_index(&instr->src[0]),
-                                &instr->src[1], BI_SEG_NONE);
-                bi_split_dest(b, instr->dest);
-                break;
+   case nir_intrinsic_global_atomic_exchange:
+      bi_emit_axchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1],
+                       BI_SEG_NONE);
+      bi_split_dest(b, instr->dest);
+      break;
 
-        case nir_intrinsic_image_atomic_exchange:
-                bi_emit_axchg_to(b, dst, bi_emit_lea_image(b, instr),
-                                &instr->src[3], BI_SEG_NONE);
-                bi_split_dest(b, instr->dest);
-                break;
+   case nir_intrinsic_image_atomic_exchange:
+      bi_emit_axchg_to(b, dst, bi_emit_lea_image(b, instr), &instr->src[3],
+                       BI_SEG_NONE);
+      bi_split_dest(b, instr->dest);
+      break;
 
-        case nir_intrinsic_shared_atomic_exchange:
-                bi_emit_axchg_to(b, dst, bi_src_index(&instr->src[0]),
-                                &instr->src[1], BI_SEG_WLS);
-                bi_split_dest(b, instr->dest);
-                break;
+   case nir_intrinsic_shared_atomic_exchange:
+      bi_emit_axchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1],
+                       BI_SEG_WLS);
+      bi_split_dest(b, instr->dest);
+      break;
 
-        case nir_intrinsic_global_atomic_comp_swap:
-                bi_emit_acmpxchg_to(b, dst, bi_src_index(&instr->src[0]),
-                                &instr->src[1], &instr->src[2], BI_SEG_NONE);
-                bi_split_dest(b, instr->dest);
-                break;
+   case nir_intrinsic_global_atomic_comp_swap:
+      bi_emit_acmpxchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1],
+                          &instr->src[2], BI_SEG_NONE);
+      bi_split_dest(b, instr->dest);
+      break;
 
-        case nir_intrinsic_image_atomic_comp_swap:
-                bi_emit_acmpxchg_to(b, dst, bi_emit_lea_image(b, instr),
-                                &instr->src[3], &instr->src[4], BI_SEG_NONE);
-                bi_split_dest(b, instr->dest);
-                break;
+   case nir_intrinsic_image_atomic_comp_swap:
+      bi_emit_acmpxchg_to(b, dst, bi_emit_lea_image(b, instr), &instr->src[3],
+                          &instr->src[4], BI_SEG_NONE);
+      bi_split_dest(b, instr->dest);
+      break;
 
-        case nir_intrinsic_shared_atomic_comp_swap:
-                bi_emit_acmpxchg_to(b, dst, bi_src_index(&instr->src[0]),
-                                &instr->src[1], &instr->src[2], BI_SEG_WLS);
-                bi_split_dest(b, instr->dest);
-                break;
+   case nir_intrinsic_shared_atomic_comp_swap:
+      bi_emit_acmpxchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1],
+                          &instr->src[2], BI_SEG_WLS);
+      bi_split_dest(b, instr->dest);
+      break;
 
-        case nir_intrinsic_load_frag_coord:
-                bi_emit_load_frag_coord(b, instr);
-                break;
+   case nir_intrinsic_load_frag_coord:
+      bi_emit_load_frag_coord(b, instr);
+      break;
 
-        case nir_intrinsic_load_output:
-                bi_emit_ld_tile(b, instr);
-                break;
+   case nir_intrinsic_load_output:
+      bi_emit_ld_tile(b, instr);
+      break;
 
-        case nir_intrinsic_discard_if:
-                bi_discard_b32(b, bi_src_index(&instr->src[0]));
-                break;
+   case nir_intrinsic_discard_if:
+      bi_discard_b32(b, bi_src_index(&instr->src[0]));
+      break;
 
-        case nir_intrinsic_discard:
-                bi_discard_f32(b, bi_zero(), bi_zero(), BI_CMPF_EQ);
-                break;
+   case nir_intrinsic_discard:
+      bi_discard_f32(b, bi_zero(), bi_zero(), BI_CMPF_EQ);
+      break;
 
-        case nir_intrinsic_load_ssbo_address:
-        case nir_intrinsic_load_xfb_address:
-                bi_load_sysval_nir(b, instr, 2, 0);
-                break;
+   case nir_intrinsic_load_ssbo_address:
+   case nir_intrinsic_load_xfb_address:
+      bi_load_sysval_nir(b, instr, 2, 0);
+      break;
 
-        case nir_intrinsic_load_work_dim:
-        case nir_intrinsic_load_num_vertices:
-        case nir_intrinsic_load_first_vertex:
-        case nir_intrinsic_load_draw_id:
-                bi_load_sysval_nir(b, instr, 1, 0);
-                break;
+   case nir_intrinsic_load_work_dim:
+   case nir_intrinsic_load_num_vertices:
+   case nir_intrinsic_load_first_vertex:
+   case nir_intrinsic_load_draw_id:
+      bi_load_sysval_nir(b, instr, 1, 0);
+      break;
 
-        case nir_intrinsic_load_base_vertex:
-                bi_load_sysval_nir(b, instr, 1, 4);
-                break;
+   case nir_intrinsic_load_base_vertex:
+      bi_load_sysval_nir(b, instr, 1, 4);
+      break;
 
-        case nir_intrinsic_load_base_instance:
-        case nir_intrinsic_get_ssbo_size:
-                bi_load_sysval_nir(b, instr, 1, 8);
-                break;
+   case nir_intrinsic_load_base_instance:
+   case nir_intrinsic_get_ssbo_size:
+      bi_load_sysval_nir(b, instr, 1, 8);
+      break;
 
-        case nir_intrinsic_load_viewport_scale:
-        case nir_intrinsic_load_viewport_offset:
-        case nir_intrinsic_load_num_workgroups:
-        case nir_intrinsic_load_workgroup_size:
-                bi_load_sysval_nir(b, instr, 3, 0);
-                break;
+   case nir_intrinsic_load_viewport_scale:
+   case nir_intrinsic_load_viewport_offset:
+   case nir_intrinsic_load_num_workgroups:
+   case nir_intrinsic_load_workgroup_size:
+      bi_load_sysval_nir(b, instr, 3, 0);
+      break;
 
-        case nir_intrinsic_image_size:
-                bi_load_sysval_nir(b, instr,
-                                nir_dest_num_components(instr->dest), 0);
-                break;
+   case nir_intrinsic_image_size:
+      bi_load_sysval_nir(b, instr, nir_dest_num_components(instr->dest), 0);
+      break;
 
-        case nir_intrinsic_load_blend_const_color_rgba:
-                bi_load_sysval_nir(b, instr,
-                                   nir_dest_num_components(instr->dest), 0);
-                break;
+   case nir_intrinsic_load_blend_const_color_rgba:
+      bi_load_sysval_nir(b, instr, nir_dest_num_components(instr->dest), 0);
+      break;
 
-	case nir_intrinsic_load_sample_positions_pan:
-                bi_collect_v2i32_to(b, dst,
-                                    bi_fau(BIR_FAU_SAMPLE_POS_ARRAY, false),
-                                    bi_fau(BIR_FAU_SAMPLE_POS_ARRAY, true));
-                break;
+   case nir_intrinsic_load_sample_positions_pan:
+      bi_collect_v2i32_to(b, dst, bi_fau(BIR_FAU_SAMPLE_POS_ARRAY, false),
+                          bi_fau(BIR_FAU_SAMPLE_POS_ARRAY, true));
+      break;
 
-	case nir_intrinsic_load_sample_mask_in:
-                /* r61[0:15] contains the coverage bitmap */
-                bi_u16_to_u32_to(b, dst, bi_half(bi_preload(b, 61), false));
-                break;
+   case nir_intrinsic_load_sample_mask_in:
+      /* r61[0:15] contains the coverage bitmap */
+      bi_u16_to_u32_to(b, dst, bi_half(bi_preload(b, 61), false));
+      break;
 
-        case nir_intrinsic_load_sample_id:
-                bi_load_sample_id_to(b, dst);
-                break;
+   case nir_intrinsic_load_sample_id:
+      bi_load_sample_id_to(b, dst);
+      break;
 
-	case nir_intrinsic_load_front_face:
-                /* r58 == 0 means primitive is front facing */
-                bi_icmp_i32_to(b, dst, bi_preload(b, 58), bi_zero(), BI_CMPF_EQ,
-                                BI_RESULT_TYPE_M1);
-                break;
+   case nir_intrinsic_load_front_face:
+      /* r58 == 0 means primitive is front facing */
+      bi_icmp_i32_to(b, dst, bi_preload(b, 58), bi_zero(), BI_CMPF_EQ,
+                     BI_RESULT_TYPE_M1);
+      break;
 
-        case nir_intrinsic_load_point_coord:
-                bi_ld_var_special_to(b, dst, bi_zero(), BI_REGISTER_FORMAT_F32,
-                                BI_SAMPLE_CENTER, BI_UPDATE_CLOBBER,
-                                BI_VARYING_NAME_POINT, BI_VECSIZE_V2);
-                bi_emit_cached_split_i32(b, dst, 2);
-                break;
+   case nir_intrinsic_load_point_coord:
+      bi_ld_var_special_to(b, dst, bi_zero(), BI_REGISTER_FORMAT_F32,
+                           BI_SAMPLE_CENTER, BI_UPDATE_CLOBBER,
+                           BI_VARYING_NAME_POINT, BI_VECSIZE_V2);
+      bi_emit_cached_split_i32(b, dst, 2);
+      break;
 
-        /* It appears vertex_id is zero-based with Bifrost geometry flows, but
-         * not with Valhall's memory-allocation IDVS geometry flow. Ostensibly
-         * we support the legacy geometry flow even on Valhall, so
-         * vertex_id_zero_based isn't a machine property for us. Don't set it,
-         * and lower here if needed.
-         */
-        case nir_intrinsic_load_vertex_id:
-                if (b->shader->malloc_idvs) {
-                        bi_mov_i32_to(b, dst, bi_vertex_id(b));
-                } else {
-                        bi_index first = bi_load_sysval(b,
-                                                        PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS,
-                                                        1, 0);
+   /* It appears vertex_id is zero-based with Bifrost geometry flows, but
+    * not with Valhall's memory-allocation IDVS geometry flow. Ostensibly
+    * we support the legacy geometry flow even on Valhall, so
+    * vertex_id_zero_based isn't a machine property for us. Don't set it,
+    * and lower here if needed.
+    */
+   case nir_intrinsic_load_vertex_id:
+      if (b->shader->malloc_idvs) {
+         bi_mov_i32_to(b, dst, bi_vertex_id(b));
+      } else {
+         bi_index first =
+            bi_load_sysval(b, PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS, 1, 0);
 
-                        bi_iadd_u32_to(b, dst, bi_vertex_id(b), first, false);
-                }
+         bi_iadd_u32_to(b, dst, bi_vertex_id(b), first, false);
+      }
 
-                break;
+      break;
 
-        /* We only use in our transform feedback lowering */
-        case nir_intrinsic_load_vertex_id_zero_base:
-                assert(b->shader->nir->info.has_transform_feedback_varyings);
-                bi_mov_i32_to(b, dst, bi_vertex_id(b));
-                break;
+   /* We only use in our transform feedback lowering */
+   case nir_intrinsic_load_vertex_id_zero_base:
+      assert(b->shader->nir->info.has_transform_feedback_varyings);
+      bi_mov_i32_to(b, dst, bi_vertex_id(b));
+      break;
 
-        case nir_intrinsic_load_instance_id:
-                bi_mov_i32_to(b, dst, bi_instance_id(b));
-                break;
+   case nir_intrinsic_load_instance_id:
+      bi_mov_i32_to(b, dst, bi_instance_id(b));
+      break;
 
-        case nir_intrinsic_load_subgroup_invocation:
-                bi_mov_i32_to(b, dst, bi_fau(BIR_FAU_LANE_ID, false));
-                break;
+   case nir_intrinsic_load_subgroup_invocation:
+      bi_mov_i32_to(b, dst, bi_fau(BIR_FAU_LANE_ID, false));
+      break;
 
-        case nir_intrinsic_load_local_invocation_id:
-                bi_collect_v3i32_to(b, dst,
-                                    bi_u16_to_u32(b, bi_half(bi_preload(b, 55), 0)),
-                                    bi_u16_to_u32(b, bi_half(bi_preload(b, 55), 1)),
-                                    bi_u16_to_u32(b, bi_half(bi_preload(b, 56), 0)));
-                break;
+   case nir_intrinsic_load_local_invocation_id:
+      bi_collect_v3i32_to(b, dst,
+                          bi_u16_to_u32(b, bi_half(bi_preload(b, 55), 0)),
+                          bi_u16_to_u32(b, bi_half(bi_preload(b, 55), 1)),
+                          bi_u16_to_u32(b, bi_half(bi_preload(b, 56), 0)));
+      break;
 
-        case nir_intrinsic_load_workgroup_id:
-                bi_collect_v3i32_to(b, dst, bi_preload(b, 57), bi_preload(b, 58),
-                                    bi_preload(b, 59));
-                break;
+   case nir_intrinsic_load_workgroup_id:
+      bi_collect_v3i32_to(b, dst, bi_preload(b, 57), bi_preload(b, 58),
+                          bi_preload(b, 59));
+      break;
 
-        case nir_intrinsic_load_global_invocation_id:
-        case nir_intrinsic_load_global_invocation_id_zero_base:
-                bi_collect_v3i32_to(b, dst, bi_preload(b, 60), bi_preload(b, 61),
-                                    bi_preload(b, 62));
-                break;
+   case nir_intrinsic_load_global_invocation_id:
+   case nir_intrinsic_load_global_invocation_id_zero_base:
+      bi_collect_v3i32_to(b, dst, bi_preload(b, 60), bi_preload(b, 61),
+                          bi_preload(b, 62));
+      break;
 
-        case nir_intrinsic_shader_clock:
-                bi_ld_gclk_u64_to(b, dst, BI_SOURCE_CYCLE_COUNTER);
-                bi_split_dest(b, instr->dest);
-                break;
+   case nir_intrinsic_shader_clock:
+      bi_ld_gclk_u64_to(b, dst, BI_SOURCE_CYCLE_COUNTER);
+      bi_split_dest(b, instr->dest);
+      break;
 
-        default:
-                fprintf(stderr, "Unhandled intrinsic %s\n", nir_intrinsic_infos[instr->intrinsic].name);
-                assert(0);
-        }
+   default:
+      fprintf(stderr, "Unhandled intrinsic %s\n",
+              nir_intrinsic_infos[instr->intrinsic].name);
+      assert(0);
+   }
 }
 
 static void
 bi_emit_load_const(bi_builder *b, nir_load_const_instr *instr)
 {
-        /* Make sure we've been lowered */
-        assert(instr->def.num_components <= (32 / instr->def.bit_size));
+   /* Make sure we've been lowered */
+   assert(instr->def.num_components <= (32 / instr->def.bit_size));
 
-        /* Accumulate all the channels of the constant, as if we did an
-         * implicit SEL over them */
-        uint32_t acc = 0;
+   /* Accumulate all the channels of the constant, as if we did an
+    * implicit SEL over them */
+   uint32_t acc = 0;
 
-        for (unsigned i = 0; i < instr->def.num_components; ++i) {
-                unsigned v = nir_const_value_as_uint(instr->value[i], instr->def.bit_size);
-                acc |= (v << (i * instr->def.bit_size));
-        }
+   for (unsigned i = 0; i < instr->def.num_components; ++i) {
+      unsigned v =
+         nir_const_value_as_uint(instr->value[i], instr->def.bit_size);
+      acc |= (v << (i * instr->def.bit_size));
+   }
 
-        bi_mov_i32_to(b, bi_get_index(instr->def.index), bi_imm_u32(acc));
+   bi_mov_i32_to(b, bi_get_index(instr->def.index), bi_imm_u32(acc));
 }
 
 static bi_index
 bi_alu_src_index(bi_builder *b, nir_alu_src src, unsigned comps)
 {
-        /* we don't lower modifiers until the backend */
-        assert(!(src.negate || src.abs));
+   /* we don't lower modifiers until the backend */
+   assert(!(src.negate || src.abs));
 
-        unsigned bitsize = nir_src_bit_size(src.src);
+   unsigned bitsize = nir_src_bit_size(src.src);
 
-        /* the bi_index carries the 32-bit (word) offset separate from the
-         * subword swizzle, first handle the offset */
+   /* the bi_index carries the 32-bit (word) offset separate from the
+    * subword swizzle, first handle the offset */
 
-        unsigned offset = 0;
+   unsigned offset = 0;
 
-        assert(bitsize == 8 || bitsize == 16 || bitsize == 32);
-        unsigned subword_shift = (bitsize == 32) ? 0 : (bitsize == 16) ? 1 : 2;
+   assert(bitsize == 8 || bitsize == 16 || bitsize == 32);
+   unsigned subword_shift = (bitsize == 32) ? 0 : (bitsize == 16) ? 1 : 2;
 
-        for (unsigned i = 0; i < comps; ++i) {
-                unsigned new_offset = (src.swizzle[i] >> subword_shift);
+   for (unsigned i = 0; i < comps; ++i) {
+      unsigned new_offset = (src.swizzle[i] >> subword_shift);
 
-                if (i > 0)
-                        assert(offset == new_offset && "wrong vectorization");
+      if (i > 0)
+         assert(offset == new_offset && "wrong vectorization");
 
-                offset = new_offset;
-        }
+      offset = new_offset;
+   }
 
-        bi_index idx = bi_extract(b, bi_src_index(&src.src), offset);
+   bi_index idx = bi_extract(b, bi_src_index(&src.src), offset);
 
-        /* Compose the subword swizzle with existing (identity) swizzle */
-        assert(idx.swizzle == BI_SWIZZLE_H01);
+   /* Compose the subword swizzle with existing (identity) swizzle */
+   assert(idx.swizzle == BI_SWIZZLE_H01);
 
-        /* Bigger vectors should have been lowered */
-        assert(comps <= (1 << subword_shift));
+   /* Bigger vectors should have been lowered */
+   assert(comps <= (1 << subword_shift));
 
-        if (bitsize == 16) {
-                unsigned c0 = src.swizzle[0] & 1;
-                unsigned c1 = (comps > 1) ? src.swizzle[1] & 1 : c0;
-                idx.swizzle = BI_SWIZZLE_H00 + c1 + (c0 << 1);
-        } else if (bitsize == 8) {
-                /* 8-bit vectors not yet supported */
-                assert(comps == 1 && "8-bit vectors not supported");
-                idx.swizzle = BI_SWIZZLE_B0000 + (src.swizzle[0] & 3);
-        }
+   if (bitsize == 16) {
+      unsigned c0 = src.swizzle[0] & 1;
+      unsigned c1 = (comps > 1) ? src.swizzle[1] & 1 : c0;
+      idx.swizzle = BI_SWIZZLE_H00 + c1 + (c0 << 1);
+   } else if (bitsize == 8) {
+      /* 8-bit vectors not yet supported */
+      assert(comps == 1 && "8-bit vectors not supported");
+      idx.swizzle = BI_SWIZZLE_B0000 + (src.swizzle[0] & 3);
+   }
 
-        return idx;
+   return idx;
 }
 
 static enum bi_round
 bi_nir_round(nir_op op)
 {
-        switch (op) {
-        case nir_op_fround_even: return BI_ROUND_NONE;
-        case nir_op_ftrunc: return BI_ROUND_RTZ;
-        case nir_op_fceil: return BI_ROUND_RTP;
-        case nir_op_ffloor: return BI_ROUND_RTN;
-        default: unreachable("invalid nir round op");
-        }
+   switch (op) {
+   case nir_op_fround_even:
+      return BI_ROUND_NONE;
+   case nir_op_ftrunc:
+      return BI_ROUND_RTZ;
+   case nir_op_fceil:
+      return BI_ROUND_RTP;
+   case nir_op_ffloor:
+      return BI_ROUND_RTN;
+   default:
+      unreachable("invalid nir round op");
+   }
 }
 
 /* Convenience for lowered transcendentals */
@@ -2016,7 +1995,7 @@ bi_nir_round(nir_op op)
 static bi_index
 bi_fmul_f32(bi_builder *b, bi_index s0, bi_index s1)
 {
-        return bi_fma_f32(b, s0, s1, bi_imm_f32(-0.0f));
+   return bi_fma_f32(b, s0, s1, bi_imm_f32(-0.0f));
 }
 
 /* Approximate with FRCP_APPROX.f32 and apply a single iteration of
@@ -2025,24 +2004,24 @@ bi_fmul_f32(bi_builder *b, bi_index s0, bi_index s1)
 static void
 bi_lower_frcp_32(bi_builder *b, bi_index dst, bi_index s0)
 {
-        bi_index x1 = bi_frcp_approx_f32(b, s0);
-        bi_index m  = bi_frexpm_f32(b, s0, false, false);
-        bi_index e  = bi_frexpe_f32(b, bi_neg(s0), false, false);
-        bi_index t1 = bi_fma_rscale_f32(b, m, bi_neg(x1), bi_imm_f32(1.0),
-                        bi_zero(), BI_SPECIAL_N);
-        bi_fma_rscale_f32_to(b, dst, t1, x1, x1, e, BI_SPECIAL_NONE);
+   bi_index x1 = bi_frcp_approx_f32(b, s0);
+   bi_index m = bi_frexpm_f32(b, s0, false, false);
+   bi_index e = bi_frexpe_f32(b, bi_neg(s0), false, false);
+   bi_index t1 = bi_fma_rscale_f32(b, m, bi_neg(x1), bi_imm_f32(1.0), bi_zero(),
+                                   BI_SPECIAL_N);
+   bi_fma_rscale_f32_to(b, dst, t1, x1, x1, e, BI_SPECIAL_NONE);
 }
 
 static void
 bi_lower_frsq_32(bi_builder *b, bi_index dst, bi_index s0)
 {
-        bi_index x1 = bi_frsq_approx_f32(b, s0);
-        bi_index m  = bi_frexpm_f32(b, s0, false, true);
-        bi_index e  = bi_frexpe_f32(b, bi_neg(s0), false, true);
-        bi_index t1 = bi_fmul_f32(b, x1, x1);
-        bi_index t2 = bi_fma_rscale_f32(b, m, bi_neg(t1), bi_imm_f32(1.0),
-                        bi_imm_u32(-1), BI_SPECIAL_N);
-        bi_fma_rscale_f32_to(b, dst, t2, x1, x1, e, BI_SPECIAL_N);
+   bi_index x1 = bi_frsq_approx_f32(b, s0);
+   bi_index m = bi_frexpm_f32(b, s0, false, true);
+   bi_index e = bi_frexpe_f32(b, bi_neg(s0), false, true);
+   bi_index t1 = bi_fmul_f32(b, x1, x1);
+   bi_index t2 = bi_fma_rscale_f32(b, m, bi_neg(t1), bi_imm_f32(1.0),
+                                   bi_imm_u32(-1), BI_SPECIAL_N);
+   bi_fma_rscale_f32_to(b, dst, t2, x1, x1, e, BI_SPECIAL_N);
 }
 
 /* More complex transcendentals, see
@@ -2052,116 +2031,116 @@ bi_lower_frsq_32(bi_builder *b, bi_index dst, bi_index s0)
 static void
 bi_lower_fexp2_32(bi_builder *b, bi_index dst, bi_index s0)
 {
-        bi_index t1 = bi_temp(b->shader);
-        bi_instr *t1_instr = bi_fadd_f32_to(b, t1, s0, bi_imm_u32(0x49400000));
-        t1_instr->clamp = BI_CLAMP_CLAMP_0_INF;
+   bi_index t1 = bi_temp(b->shader);
+   bi_instr *t1_instr = bi_fadd_f32_to(b, t1, s0, bi_imm_u32(0x49400000));
+   t1_instr->clamp = BI_CLAMP_CLAMP_0_INF;
 
-        bi_index t2 = bi_fadd_f32(b, t1, bi_imm_u32(0xc9400000));
+   bi_index t2 = bi_fadd_f32(b, t1, bi_imm_u32(0xc9400000));
 
-        bi_instr *a2 = bi_fadd_f32_to(b, bi_temp(b->shader), s0, bi_neg(t2));
-        a2->clamp = BI_CLAMP_CLAMP_M1_1;
+   bi_instr *a2 = bi_fadd_f32_to(b, bi_temp(b->shader), s0, bi_neg(t2));
+   a2->clamp = BI_CLAMP_CLAMP_M1_1;
 
-        bi_index a1t = bi_fexp_table_u4(b, t1, BI_ADJ_NONE);
-        bi_index t3 = bi_isub_u32(b, t1, bi_imm_u32(0x49400000), false);
-        bi_index a1i = bi_arshift_i32(b, t3, bi_null(), bi_imm_u8(4));
-        bi_index p1 = bi_fma_f32(b, a2->dest[0], bi_imm_u32(0x3d635635),
-                        bi_imm_u32(0x3e75fffa));
-        bi_index p2 = bi_fma_f32(b, p1, a2->dest[0], bi_imm_u32(0x3f317218));
-        bi_index p3 = bi_fmul_f32(b, a2->dest[0], p2);
-        bi_instr *x = bi_fma_rscale_f32_to(b, bi_temp(b->shader),
-                        p3, a1t, a1t, a1i, BI_SPECIAL_NONE);
-        x->clamp = BI_CLAMP_CLAMP_0_INF;
+   bi_index a1t = bi_fexp_table_u4(b, t1, BI_ADJ_NONE);
+   bi_index t3 = bi_isub_u32(b, t1, bi_imm_u32(0x49400000), false);
+   bi_index a1i = bi_arshift_i32(b, t3, bi_null(), bi_imm_u8(4));
+   bi_index p1 = bi_fma_f32(b, a2->dest[0], bi_imm_u32(0x3d635635),
+                            bi_imm_u32(0x3e75fffa));
+   bi_index p2 = bi_fma_f32(b, p1, a2->dest[0], bi_imm_u32(0x3f317218));
+   bi_index p3 = bi_fmul_f32(b, a2->dest[0], p2);
+   bi_instr *x = bi_fma_rscale_f32_to(b, bi_temp(b->shader), p3, a1t, a1t, a1i,
+                                      BI_SPECIAL_NONE);
+   x->clamp = BI_CLAMP_CLAMP_0_INF;
 
-        bi_instr *max = bi_fmax_f32_to(b, dst, x->dest[0], s0);
-        max->sem = BI_SEM_NAN_PROPAGATE;
+   bi_instr *max = bi_fmax_f32_to(b, dst, x->dest[0], s0);
+   max->sem = BI_SEM_NAN_PROPAGATE;
 }
 
 static void
 bi_fexp_32(bi_builder *b, bi_index dst, bi_index s0, bi_index log2_base)
 {
-        /* Scale by base, Multiply by 2*24 and convert to integer to get a 8:24
-         * fixed-point input */
-        bi_index scale = bi_fma_rscale_f32(b, s0, log2_base, bi_negzero(),
-                        bi_imm_u32(24), BI_SPECIAL_NONE);
-        bi_instr *fixed_pt = bi_f32_to_s32_to(b, bi_temp(b->shader), scale);
-        fixed_pt->round = BI_ROUND_NONE; // XXX
+   /* Scale by base, Multiply by 2*24 and convert to integer to get a 8:24
+    * fixed-point input */
+   bi_index scale = bi_fma_rscale_f32(b, s0, log2_base, bi_negzero(),
+                                      bi_imm_u32(24), BI_SPECIAL_NONE);
+   bi_instr *fixed_pt = bi_f32_to_s32_to(b, bi_temp(b->shader), scale);
+   fixed_pt->round = BI_ROUND_NONE; // XXX
 
-        /* Compute the result for the fixed-point input, but pass along
-         * the floating-point scale for correct NaN propagation */
-        bi_fexp_f32_to(b, dst, fixed_pt->dest[0], scale);
+   /* Compute the result for the fixed-point input, but pass along
+    * the floating-point scale for correct NaN propagation */
+   bi_fexp_f32_to(b, dst, fixed_pt->dest[0], scale);
 }
 
 static void
 bi_lower_flog2_32(bi_builder *b, bi_index dst, bi_index s0)
 {
-        /* s0 = a1 * 2^e, with a1 in [0.75, 1.5) */
-        bi_index a1 = bi_frexpm_f32(b, s0, true, false);
-        bi_index ei = bi_frexpe_f32(b, s0, true, false);
-        bi_index ef = bi_s32_to_f32(b, ei);
+   /* s0 = a1 * 2^e, with a1 in [0.75, 1.5) */
+   bi_index a1 = bi_frexpm_f32(b, s0, true, false);
+   bi_index ei = bi_frexpe_f32(b, s0, true, false);
+   bi_index ef = bi_s32_to_f32(b, ei);
 
-        /* xt estimates -log(r1), a coarse approximation of log(a1) */
-        bi_index r1 = bi_flog_table_f32(b, s0, BI_MODE_RED, BI_PRECISION_NONE);
-        bi_index xt = bi_flog_table_f32(b, s0, BI_MODE_BASE2, BI_PRECISION_NONE);
+   /* xt estimates -log(r1), a coarse approximation of log(a1) */
+   bi_index r1 = bi_flog_table_f32(b, s0, BI_MODE_RED, BI_PRECISION_NONE);
+   bi_index xt = bi_flog_table_f32(b, s0, BI_MODE_BASE2, BI_PRECISION_NONE);
 
-        /* log(s0) = log(a1 * 2^e) = e + log(a1) = e + log(a1 * r1) -
-         * log(r1), so let x1 = e - log(r1) ~= e + xt and x2 = log(a1 * r1),
-         * and then log(s0) = x1 + x2 */
-        bi_index x1 = bi_fadd_f32(b, ef, xt);
+   /* log(s0) = log(a1 * 2^e) = e + log(a1) = e + log(a1 * r1) -
+    * log(r1), so let x1 = e - log(r1) ~= e + xt and x2 = log(a1 * r1),
+    * and then log(s0) = x1 + x2 */
+   bi_index x1 = bi_fadd_f32(b, ef, xt);
 
-        /* Since a1 * r1 is close to 1, x2 = log(a1 * r1) may be computed by
-         * polynomial approximation around 1. The series is expressed around
-         * 1, so set y = (a1 * r1) - 1.0 */
-        bi_index y = bi_fma_f32(b, a1, r1, bi_imm_f32(-1.0));
+   /* Since a1 * r1 is close to 1, x2 = log(a1 * r1) may be computed by
+    * polynomial approximation around 1. The series is expressed around
+    * 1, so set y = (a1 * r1) - 1.0 */
+   bi_index y = bi_fma_f32(b, a1, r1, bi_imm_f32(-1.0));
 
-        /* x2 = log_2(1 + y) = log_e(1 + y) * (1/log_e(2)), so approximate
-         * log_e(1 + y) by the Taylor series (lower precision than the blob):
-         * y - y^2/2 + O(y^3) = y(1 - y/2) + O(y^3) */
-        bi_index loge = bi_fmul_f32(b, y,
-                bi_fma_f32(b, y, bi_imm_f32(-0.5), bi_imm_f32(1.0)));
+   /* x2 = log_2(1 + y) = log_e(1 + y) * (1/log_e(2)), so approximate
+    * log_e(1 + y) by the Taylor series (lower precision than the blob):
+    * y - y^2/2 + O(y^3) = y(1 - y/2) + O(y^3) */
+   bi_index loge =
+      bi_fmul_f32(b, y, bi_fma_f32(b, y, bi_imm_f32(-0.5), bi_imm_f32(1.0)));
 
-        bi_index x2 = bi_fmul_f32(b, loge, bi_imm_f32(1.0 / logf(2.0)));
+   bi_index x2 = bi_fmul_f32(b, loge, bi_imm_f32(1.0 / logf(2.0)));
 
-        /* log(s0) = x1 + x2 */
-        bi_fadd_f32_to(b, dst, x1, x2);
+   /* log(s0) = x1 + x2 */
+   bi_fadd_f32_to(b, dst, x1, x2);
 }
 
 static void
 bi_flog2_32(bi_builder *b, bi_index dst, bi_index s0)
 {
-        bi_index frexp = bi_frexpe_f32(b, s0, true, false);
-        bi_index frexpi = bi_s32_to_f32(b, frexp);
-        bi_index add = bi_fadd_lscale_f32(b, bi_imm_f32(-1.0f), s0);
-        bi_fma_f32_to(b, dst, bi_flogd_f32(b, s0), add, frexpi);
+   bi_index frexp = bi_frexpe_f32(b, s0, true, false);
+   bi_index frexpi = bi_s32_to_f32(b, frexp);
+   bi_index add = bi_fadd_lscale_f32(b, bi_imm_f32(-1.0f), s0);
+   bi_fma_f32_to(b, dst, bi_flogd_f32(b, s0), add, frexpi);
 }
 
 static void
 bi_lower_fpow_32(bi_builder *b, bi_index dst, bi_index base, bi_index exp)
 {
-        bi_index log2_base = bi_null();
+   bi_index log2_base = bi_null();
 
-        if (base.type == BI_INDEX_CONSTANT) {
-                log2_base = bi_imm_f32(log2f(uif(base.value)));
-        } else {
-                log2_base = bi_temp(b->shader);
-                bi_lower_flog2_32(b, log2_base, base);
-        }
+   if (base.type == BI_INDEX_CONSTANT) {
+      log2_base = bi_imm_f32(log2f(uif(base.value)));
+   } else {
+      log2_base = bi_temp(b->shader);
+      bi_lower_flog2_32(b, log2_base, base);
+   }
 
-        return bi_lower_fexp2_32(b, dst, bi_fmul_f32(b, exp, log2_base));
+   return bi_lower_fexp2_32(b, dst, bi_fmul_f32(b, exp, log2_base));
 }
 
 static void
 bi_fpow_32(bi_builder *b, bi_index dst, bi_index base, bi_index exp)
 {
-        bi_index log2_base = bi_null();
+   bi_index log2_base = bi_null();
 
-        if (base.type == BI_INDEX_CONSTANT) {
-                log2_base = bi_imm_f32(log2f(uif(base.value)));
-        } else {
-                log2_base = bi_temp(b->shader);
-                bi_flog2_32(b, log2_base, base);
-        }
+   if (base.type == BI_INDEX_CONSTANT) {
+      log2_base = bi_imm_f32(log2f(uif(base.value)));
+   } else {
+      log2_base = bi_temp(b->shader);
+      bi_flog2_32(b, log2_base, base);
+   }
 
-        return bi_fexp_32(b, dst, exp, log2_base);
+   return bi_fexp_32(b, dst, exp, log2_base);
 }
 
 /* Bifrost has extremely coarse tables for approximating sin/cos, accessible as
@@ -2181,34 +2160,32 @@ bi_fpow_32(bi_builder *b, bi_index dst, bi_index base, bi_index exp)
 static void
 bi_lower_fsincos_32(bi_builder *b, bi_index dst, bi_index s0, bool cos)
 {
-        /* bottom 6-bits of result times pi/32 approximately s0 mod 2pi */
-        bi_index x_u6 = bi_fma_f32(b, s0, TWO_OVER_PI, SINCOS_BIAS);
+   /* bottom 6-bits of result times pi/32 approximately s0 mod 2pi */
+   bi_index x_u6 = bi_fma_f32(b, s0, TWO_OVER_PI, SINCOS_BIAS);
 
-        /* Approximate domain error (small) */
-        bi_index e = bi_fma_f32(b, bi_fadd_f32(b, x_u6, bi_neg(SINCOS_BIAS)),
-                        MPI_OVER_TWO, s0);
+   /* Approximate domain error (small) */
+   bi_index e = bi_fma_f32(b, bi_fadd_f32(b, x_u6, bi_neg(SINCOS_BIAS)),
+                           MPI_OVER_TWO, s0);
 
-        /* Lookup sin(x), cos(x) */
-        bi_index sinx = bi_fsin_table_u6(b, x_u6, false);
-        bi_index cosx = bi_fcos_table_u6(b, x_u6, false);
+   /* Lookup sin(x), cos(x) */
+   bi_index sinx = bi_fsin_table_u6(b, x_u6, false);
+   bi_index cosx = bi_fcos_table_u6(b, x_u6, false);
 
-        /* e^2 / 2 */
-        bi_index e2_over_2 = bi_fma_rscale_f32(b, e, e, bi_negzero(),
-                        bi_imm_u32(-1), BI_SPECIAL_NONE);
+   /* e^2 / 2 */
+   bi_index e2_over_2 =
+      bi_fma_rscale_f32(b, e, e, bi_negzero(), bi_imm_u32(-1), BI_SPECIAL_NONE);
 
-        /* (-e^2)/2 f''(x) */
-        bi_index quadratic = bi_fma_f32(b, bi_neg(e2_over_2),
-                        cos ? cosx : sinx,
-                        bi_negzero());
+   /* (-e^2)/2 f''(x) */
+   bi_index quadratic =
+      bi_fma_f32(b, bi_neg(e2_over_2), cos ? cosx : sinx, bi_negzero());
 
-        /* e f'(x) - (e^2/2) f''(x) */
-        bi_instr *I = bi_fma_f32_to(b, bi_temp(b->shader), e,
-                        cos ? bi_neg(sinx) : cosx,
-                        quadratic);
-        I->clamp = BI_CLAMP_CLAMP_M1_1;
+   /* e f'(x) - (e^2/2) f''(x) */
+   bi_instr *I = bi_fma_f32_to(b, bi_temp(b->shader), e,
+                               cos ? bi_neg(sinx) : cosx, quadratic);
+   I->clamp = BI_CLAMP_CLAMP_M1_1;
 
-        /* f(x) + e f'(x) - (e^2/2) f''(x) */
-        bi_fadd_f32_to(b, dst, I->dest[0], cos ? cosx : sinx);
+   /* f(x) + e f'(x) - (e^2/2) f''(x) */
+   bi_fadd_f32_to(b, dst, I->dest[0], cos ? cosx : sinx);
 }
 
 /*
@@ -2219,954 +2196,961 @@ bi_lower_fsincos_32(bi_builder *b, bi_index dst, bi_index s0, bool cos)
 static bi_index
 bi_clper_xor(bi_builder *b, bi_index s0, bi_index s1)
 {
-        if (!(b->shader->quirks & BIFROST_LIMITED_CLPER)) {
-                return bi_clper_i32(b, s0, s1,
-                                BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_XOR,
-                                BI_SUBGROUP_SUBGROUP4);
-        }
+   if (!(b->shader->quirks & BIFROST_LIMITED_CLPER)) {
+      return bi_clper_i32(b, s0, s1, BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_XOR,
+                          BI_SUBGROUP_SUBGROUP4);
+   }
 
-        bi_index lane_id = bi_fau(BIR_FAU_LANE_ID, false);
-        bi_index lane = bi_lshift_xor_i32(b, lane_id, s1, bi_imm_u8(0));
-        return bi_clper_old_i32(b, s0, lane);
+   bi_index lane_id = bi_fau(BIR_FAU_LANE_ID, false);
+   bi_index lane = bi_lshift_xor_i32(b, lane_id, s1, bi_imm_u8(0));
+   return bi_clper_old_i32(b, s0, lane);
 }
 
 static enum bi_cmpf
 bi_translate_cmpf(nir_op op)
 {
-        switch (op) {
-        case nir_op_ieq8:
-        case nir_op_ieq16:
-        case nir_op_ieq32:
-        case nir_op_feq16:
-        case nir_op_feq32:
-                return BI_CMPF_EQ;
+   switch (op) {
+   case nir_op_ieq8:
+   case nir_op_ieq16:
+   case nir_op_ieq32:
+   case nir_op_feq16:
+   case nir_op_feq32:
+      return BI_CMPF_EQ;
 
-        case nir_op_ine8:
-        case nir_op_ine16:
-        case nir_op_ine32:
-        case nir_op_fneu16:
-        case nir_op_fneu32:
-                return BI_CMPF_NE;
+   case nir_op_ine8:
+   case nir_op_ine16:
+   case nir_op_ine32:
+   case nir_op_fneu16:
+   case nir_op_fneu32:
+      return BI_CMPF_NE;
 
-        case nir_op_ilt8:
-        case nir_op_ilt16:
-        case nir_op_ilt32:
-        case nir_op_flt16:
-        case nir_op_flt32:
-        case nir_op_ult8:
-        case nir_op_ult16:
-        case nir_op_ult32:
-                return BI_CMPF_LT;
+   case nir_op_ilt8:
+   case nir_op_ilt16:
+   case nir_op_ilt32:
+   case nir_op_flt16:
+   case nir_op_flt32:
+   case nir_op_ult8:
+   case nir_op_ult16:
+   case nir_op_ult32:
+      return BI_CMPF_LT;
 
-        case nir_op_ige8:
-        case nir_op_ige16:
-        case nir_op_ige32:
-        case nir_op_fge16:
-        case nir_op_fge32:
-        case nir_op_uge8:
-        case nir_op_uge16:
-        case nir_op_uge32:
-                return BI_CMPF_GE;
+   case nir_op_ige8:
+   case nir_op_ige16:
+   case nir_op_ige32:
+   case nir_op_fge16:
+   case nir_op_fge32:
+   case nir_op_uge8:
+   case nir_op_uge16:
+   case nir_op_uge32:
+      return BI_CMPF_GE;
 
-        default:
-                unreachable("invalid comparison");
-        }
+   default:
+      unreachable("invalid comparison");
+   }
 }
 
 static bool
 bi_nir_is_replicated(nir_alu_src *src)
 {
-        for (unsigned i = 1; i < nir_src_num_components(src->src); ++i) {
-                if (src->swizzle[0] == src->swizzle[i])
-                        return false;
-        }
+   for (unsigned i = 1; i < nir_src_num_components(src->src); ++i) {
+      if (src->swizzle[0] == src->swizzle[i])
+         return false;
+   }
 
-        return true;
+   return true;
 }
 
 static void
 bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
 {
-        bi_index dst = bi_dest_index(&instr->dest.dest);
-        unsigned srcs = nir_op_infos[instr->op].num_inputs;
-        unsigned sz = nir_dest_bit_size(instr->dest.dest);
-        unsigned comps = nir_dest_num_components(instr->dest.dest);
-        unsigned src_sz = srcs > 0 ? nir_src_bit_size(instr->src[0].src) : 0;
-
-        /* Indicate scalarness */
-        if (sz == 16 && comps == 1)
-                dst.swizzle = BI_SWIZZLE_H00;
-
-        /* First, match against the various moves in NIR. These are
-         * special-cased because they can operate on vectors even after
-         * lowering ALU to scalar. For Bifrost, bi_alu_src_index assumes the
-         * instruction is no "bigger" than SIMD-within-a-register. These moves
-         * are the exceptions that need to handle swizzles specially. */
-
-        switch (instr->op) {
-        case nir_op_vec2:
-        case nir_op_vec3:
-        case nir_op_vec4:
-        case nir_op_vec8:
-        case nir_op_vec16: {
-                bi_index unoffset_srcs[16] = { bi_null() };
-                unsigned channels[16] = { 0 };
-
-                for (unsigned i = 0; i < srcs; ++i) {
-                        unoffset_srcs[i] = bi_src_index(&instr->src[i].src);
-                        channels[i] = instr->src[i].swizzle[0];
-                }
-
-                bi_make_vec_to(b, dst, unoffset_srcs, channels, srcs, sz);
-                return;
-        }
-
-        case nir_op_unpack_32_2x16: {
-                /* Should have been scalarized */
-                assert(comps == 2 && sz == 16);
-
-                bi_index vec = bi_src_index(&instr->src[0].src);
-                unsigned chan = instr->src[0].swizzle[0];
-
-                bi_mov_i32_to(b, dst, bi_extract(b, vec, chan));
-                return;
-        }
-
-        case nir_op_unpack_64_2x32_split_x:
-        {
-                unsigned chan = (instr->src[0].swizzle[0] * 2) + 0;
-                bi_mov_i32_to(b, dst, bi_extract(b, bi_src_index(&instr->src[0].src), chan));
-                return;
-        }
-
-        case nir_op_unpack_64_2x32_split_y:
-        {
-                unsigned chan = (instr->src[0].swizzle[0] * 2) + 1;
-                bi_mov_i32_to(b, dst, bi_extract(b, bi_src_index(&instr->src[0].src), chan));
-                return;
-        }
-
-        case nir_op_pack_64_2x32_split:
-                bi_collect_v2i32_to(b, dst,
-                                    bi_extract(b, bi_src_index(&instr->src[0].src), instr->src[0].swizzle[0]),
-                                    bi_extract(b, bi_src_index(&instr->src[1].src), instr->src[1].swizzle[0]));
-                return;
-
-        case nir_op_pack_64_2x32:
-                bi_collect_v2i32_to(b, dst,
-                                    bi_extract(b, bi_src_index(&instr->src[0].src), 0),
-                                    bi_extract(b, bi_src_index(&instr->src[0].src), 1));
-                return;
-
-        case nir_op_pack_uvec2_to_uint: {
-                bi_index src = bi_src_index(&instr->src[0].src);
-
-                assert(sz == 32 && src_sz == 32);
-                bi_mkvec_v2i16_to(b, dst, bi_half(bi_extract(b, src, 0), false),
-                                          bi_half(bi_extract(b, src, 1), false));
-                return;
-        }
-
-        case nir_op_pack_uvec4_to_uint: {
-                bi_index src = bi_src_index(&instr->src[0].src);
-
-                assert(sz == 32 && src_sz == 32);
-                bi_mkvec_v4i8_to(b, dst, bi_byte(bi_extract(b, src, 0), 0),
-                                         bi_byte(bi_extract(b, src, 1), 0),
-                                         bi_byte(bi_extract(b, src, 2), 0),
-                                         bi_byte(bi_extract(b, src, 3), 0));
-                return;
-        }
-
-        case nir_op_mov: {
-                bi_index idx = bi_src_index(&instr->src[0].src);
-                bi_index unoffset_srcs[4] = { idx, idx, idx, idx };
-
-                unsigned channels[4] = {
-                        comps > 0 ? instr->src[0].swizzle[0] : 0,
-                        comps > 1 ? instr->src[0].swizzle[1] : 0,
-                        comps > 2 ? instr->src[0].swizzle[2] : 0,
-                        comps > 3 ? instr->src[0].swizzle[3] : 0,
-                };
-
-                bi_make_vec_to(b, dst, unoffset_srcs, channels, comps, src_sz);
-                return;
-        }
-
-        case nir_op_pack_32_2x16: {
-                assert(comps == 1);
-
-                bi_index idx = bi_src_index(&instr->src[0].src);
-                bi_index unoffset_srcs[4] = { idx, idx, idx, idx };
-
-                unsigned channels[2] = {
-                        instr->src[0].swizzle[0],
-                        instr->src[0].swizzle[1]
-                };
-
-                bi_make_vec_to(b, dst, unoffset_srcs, channels, 2, 16);
-                return;
-        }
-
-        case nir_op_f2f16:
-        case nir_op_f2f16_rtz:
-        case nir_op_f2f16_rtne: {
-                assert(src_sz == 32);
-                bi_index idx = bi_src_index(&instr->src[0].src);
-                bi_index s0 = bi_extract(b, idx, instr->src[0].swizzle[0]);
-                bi_index s1 = comps > 1 ?
-                        bi_extract(b, idx, instr->src[0].swizzle[1]) : s0;
-
-                bi_instr *I = bi_v2f32_to_v2f16_to(b, dst, s0, s1);
-
-                /* Override rounding if explicitly requested. Otherwise, the
-                 * default rounding mode is selected by the builder. Depending
-                 * on the float controls required by the shader, the default
-                 * mode may not be nearest-even.
-                 */
-                if (instr->op == nir_op_f2f16_rtz)
-                        I->round = BI_ROUND_RTZ;
-                else if (instr->op == nir_op_f2f16_rtne)
-                        I->round = BI_ROUND_NONE; /* Nearest even */
-
-                return;
-        }
-
-        /* Vectorized downcasts */
-        case nir_op_u2u16:
-        case nir_op_i2i16: {
-                if (!(src_sz == 32 && comps == 2))
-                        break;
-
-                bi_index idx = bi_src_index(&instr->src[0].src);
-                bi_index s0 = bi_extract(b, idx, instr->src[0].swizzle[0]);
-                bi_index s1 = bi_extract(b, idx, instr->src[0].swizzle[1]);
-
-                bi_mkvec_v2i16_to(b, dst,
-                                bi_half(s0, false), bi_half(s1, false));
-                return;
-        }
-
-        /* While we do not have a direct V2U32_TO_V2F16 instruction, lowering to
-         * MKVEC.v2i16 + V2U16_TO_V2F16 is more efficient on Bifrost than
-         * scalarizing due to scheduling (equal cost on Valhall). Additionally
-         * if the source is replicated the MKVEC.v2i16 can be optimized out.
-         */
-        case nir_op_u2f16:
-        case nir_op_i2f16: {
-                if (!(src_sz == 32 && comps == 2))
-                        break;
-
-                nir_alu_src *src = &instr->src[0];
-                bi_index idx = bi_src_index(&src->src);
-                bi_index s0 = bi_extract(b, idx, src->swizzle[0]);
-                bi_index s1 = bi_extract(b, idx, src->swizzle[1]);
-
-                bi_index t = (src->swizzle[0] == src->swizzle[1]) ?
-                        bi_half(s0, false) :
-                        bi_mkvec_v2i16(b, bi_half(s0, false),
-                                          bi_half(s1, false));
-
-                if (instr->op == nir_op_u2f16)
-                        bi_v2u16_to_v2f16_to(b, dst, t);
-                else
-                        bi_v2s16_to_v2f16_to(b, dst, t);
-
-                return;
-        }
-
-        case nir_op_i2i8:
-        case nir_op_u2u8:
-        {
-                /* Acts like an 8-bit swizzle */
-                bi_index idx = bi_src_index(&instr->src[0].src);
-                unsigned factor = src_sz / 8;
-                unsigned chan[4] = { 0 };
-
-                for (unsigned i = 0; i < comps; ++i)
-                        chan[i] = instr->src[0].swizzle[i] * factor;
-
-                bi_make_vec_to(b, dst, &idx, chan, comps, 8);
-                return;
-        }
-
-        case nir_op_b32csel:
-        {
-                if (sz != 16)
-                        break;
-
-                /* We allow vectorizing b32csel(cond, A, B) which can be
-                 * translated as MUX.v2i16, even though cond is a 32-bit vector.
-                 *
-                 * If the source condition vector is replicated, we can use
-                 * MUX.v2i16 directly, letting each component use the
-                 * corresponding half of the 32-bit source. NIR uses 0/~0
-                 * booleans so that's guaranteed to work (that is, 32-bit NIR
-                 * booleans are 16-bit replicated).
-                 *
-                 * If we're not replicated, we use the same trick but must
-                 * insert a MKVEC.v2i16 first to convert down to 16-bit.
-                 */
-                bi_index idx = bi_src_index(&instr->src[0].src);
-                bi_index s0 = bi_extract(b, idx, instr->src[0].swizzle[0]);
-                bi_index s1 = bi_alu_src_index(b, instr->src[1], comps);
-                bi_index s2 = bi_alu_src_index(b, instr->src[2], comps);
-
-                if (!bi_nir_is_replicated(&instr->src[0])) {
-                        s0 = bi_mkvec_v2i16(b, bi_half(s0, false),
-                                            bi_half(bi_extract(b, idx, instr->src[0].swizzle[1]), false));
-                }
-
-                bi_mux_v2i16_to(b, dst, s2, s1, s0, BI_MUX_INT_ZERO);
-                return;
-        }
-
-        default:
-                break;
-        }
-
-        bi_index s0 = srcs > 0 ? bi_alu_src_index(b, instr->src[0], comps) : bi_null();
-        bi_index s1 = srcs > 1 ? bi_alu_src_index(b, instr->src[1], comps) : bi_null();
-        bi_index s2 = srcs > 2 ? bi_alu_src_index(b, instr->src[2], comps) : bi_null();
-
-        switch (instr->op) {
-        case nir_op_ffma:
-                bi_fma_to(b, sz, dst, s0, s1, s2);
-                break;
-
-        case nir_op_fmul:
-                bi_fma_to(b, sz, dst, s0, s1, bi_negzero());
-                break;
-
-        case nir_op_fsub:
-                s1 = bi_neg(s1);
-                FALLTHROUGH;
-        case nir_op_fadd:
-                bi_fadd_to(b, sz, dst, s0, s1);
-                break;
-
-        case nir_op_fsat: {
-                bi_instr *I = bi_fclamp_to(b, sz, dst, s0);
-                I->clamp = BI_CLAMP_CLAMP_0_1;
-                break;
-        }
-
-        case nir_op_fsat_signed_mali: {
-                bi_instr *I = bi_fclamp_to(b, sz, dst, s0);
-                I->clamp = BI_CLAMP_CLAMP_M1_1;
-                break;
-        }
-
-        case nir_op_fclamp_pos_mali: {
-                bi_instr *I = bi_fclamp_to(b, sz, dst, s0);
-                I->clamp = BI_CLAMP_CLAMP_0_INF;
-                break;
-        }
-
-        case nir_op_fneg:
-                bi_fabsneg_to(b, sz, dst, bi_neg(s0));
-                break;
-
-        case nir_op_fabs:
-                bi_fabsneg_to(b, sz, dst, bi_abs(s0));
-                break;
-
-        case nir_op_fsin:
-                bi_lower_fsincos_32(b, dst, s0, false);
-                break;
-
-        case nir_op_fcos:
-                bi_lower_fsincos_32(b, dst, s0, true);
-                break;
-
-        case nir_op_fexp2:
-                assert(sz == 32); /* should've been lowered */
-
-                if (b->shader->quirks & BIFROST_NO_FP32_TRANSCENDENTALS)
-                        bi_lower_fexp2_32(b, dst, s0);
-                else
-                        bi_fexp_32(b, dst, s0, bi_imm_f32(1.0f));
-
-                break;
-
-        case nir_op_flog2:
-                assert(sz == 32); /* should've been lowered */
-
-                if (b->shader->quirks & BIFROST_NO_FP32_TRANSCENDENTALS)
-                        bi_lower_flog2_32(b, dst, s0);
-                else
-                        bi_flog2_32(b, dst, s0);
-
-                break;
-
-        case nir_op_fpow:
-                assert(sz == 32); /* should've been lowered */
-
-                if (b->shader->quirks & BIFROST_NO_FP32_TRANSCENDENTALS)
-                        bi_lower_fpow_32(b, dst, s0, s1);
-                else
-                        bi_fpow_32(b, dst, s0, s1);
-
-                break;
-
-        case nir_op_frexp_exp:
-                bi_frexpe_to(b, sz, dst, s0, false, false);
-                break;
-
-        case nir_op_frexp_sig:
-                bi_frexpm_to(b, sz, dst, s0, false, false);
-                break;
-
-        case nir_op_ldexp:
-                bi_ldexp_to(b, sz, dst, s0, s1);
-                break;
-
-        case nir_op_b8csel:
-                bi_mux_v4i8_to(b, dst, s2, s1, s0, BI_MUX_INT_ZERO);
-                break;
-
-        case nir_op_b16csel:
-                bi_mux_v2i16_to(b, dst, s2, s1, s0, BI_MUX_INT_ZERO);
-                break;
-
-        case nir_op_b32csel:
-                bi_mux_i32_to(b, dst, s2, s1, s0, BI_MUX_INT_ZERO);
-                break;
-
-        case nir_op_extract_u8:
-        case nir_op_extract_i8: {
-                assert(comps == 1 && "should be scalarized");
-                assert((src_sz == 16 || src_sz == 32) && "should be lowered");
-                unsigned byte = nir_src_as_uint(instr->src[1].src);
-
-                if (s0.swizzle == BI_SWIZZLE_H11) {
-                        assert(byte < 2);
-                        byte += 2;
-                } else if (s0.swizzle != BI_SWIZZLE_H01) {
-                        assert(s0.swizzle == BI_SWIZZLE_H00);
-                }
-
-                assert(byte < 4);
-
-                s0.swizzle = BI_SWIZZLE_H01;
-
-                if (instr->op == nir_op_extract_i8)
-                        bi_s8_to_s32_to(b, dst, bi_byte(s0, byte));
-                else
-                        bi_u8_to_u32_to(b, dst, bi_byte(s0, byte));
-                break;
-        }
-
-        case nir_op_extract_u16:
-        case nir_op_extract_i16: {
-                assert(comps == 1 && "should be scalarized");
-                assert(src_sz == 32 && "should be lowered");
-                unsigned half = nir_src_as_uint(instr->src[1].src);
-                assert(half == 0 || half == 1);
-
-                if (instr->op == nir_op_extract_i16)
-                        bi_s16_to_s32_to(b, dst, bi_half(s0, half));
-                else
-                        bi_u16_to_u32_to(b, dst, bi_half(s0, half));
-                break;
-        }
-
-        case nir_op_insert_u16: {
-                assert(comps == 1 && "should be scalarized");
-                unsigned half = nir_src_as_uint(instr->src[1].src);
-                assert(half == 0 || half == 1);
-
-                if (half == 0)
-                        bi_u16_to_u32_to(b, dst, bi_half(s0, 0));
-                else
-                        bi_mkvec_v2i16_to(b, dst, bi_imm_u16(0), bi_half(s0, 0));
-                break;
-        }
-
-        case nir_op_ishl:
-                bi_lshift_or_to(b, sz, dst, s0, bi_zero(), bi_byte(s1, 0));
-                break;
-        case nir_op_ushr:
-                bi_rshift_or_to(b, sz, dst, s0, bi_zero(), bi_byte(s1, 0), false);
-                break;
-
-        case nir_op_ishr:
-                if (b->shader->arch >= 9)
-                        bi_rshift_or_to(b, sz, dst, s0, bi_zero(), bi_byte(s1, 0), true);
-                else
-                        bi_arshift_to(b, sz, dst, s0, bi_null(), bi_byte(s1, 0));
-                break;
-
-        case nir_op_imin:
-        case nir_op_umin:
-                bi_csel_to(b, nir_op_infos[instr->op].input_types[0], sz, dst,
-                                s0, s1, s0, s1, BI_CMPF_LT);
-                break;
-
-        case nir_op_imax:
-        case nir_op_umax:
-                bi_csel_to(b, nir_op_infos[instr->op].input_types[0], sz, dst,
-                                s0, s1, s0, s1, BI_CMPF_GT);
-                break;
-
-        case nir_op_fddx_must_abs_mali:
-        case nir_op_fddy_must_abs_mali: {
-                bi_index bit = bi_imm_u32(instr->op == nir_op_fddx_must_abs_mali ? 1 : 2);
-                bi_index adjacent = bi_clper_xor(b, s0, bit);
-                bi_fadd_to(b, sz, dst, adjacent, bi_neg(s0));
-                break;
-        }
-
-        case nir_op_fddx:
-        case nir_op_fddy:
-        case nir_op_fddx_coarse:
-        case nir_op_fddy_coarse:
-        case nir_op_fddx_fine:
-        case nir_op_fddy_fine: {
-                unsigned axis;
-                switch (instr->op) {
-                case nir_op_fddx:
-                case nir_op_fddx_coarse:
-                case nir_op_fddx_fine:
-                        axis = 1;
-                        break;
-                case nir_op_fddy:
-                case nir_op_fddy_coarse:
-                case nir_op_fddy_fine:
-                        axis = 2;
-                        break;
-                default:
-                        unreachable("Invalid derivative op");
-                }
-
-                bi_index lane1, lane2;
-                switch (instr->op) {
-                case nir_op_fddx:
-                case nir_op_fddx_fine:
-                case nir_op_fddy:
-                case nir_op_fddy_fine:
-                        lane1 = bi_lshift_and_i32(b,
-                                bi_fau(BIR_FAU_LANE_ID, false),
-                                bi_imm_u32(0x3 & ~axis),
-                                bi_imm_u8(0));
-
-                        lane2 = bi_iadd_u32(b, lane1,
-                                bi_imm_u32(axis),
-                                false);
-                        break;
-                case nir_op_fddx_coarse:
-                case nir_op_fddy_coarse:
-                        lane1 = bi_imm_u32(0);
-                        lane2 = bi_imm_u32(axis);
-                        break;
-                default:
-                        unreachable("Invalid derivative op");
-                }
-
-                bi_index left, right;
-
-                if (b->shader->quirks & BIFROST_LIMITED_CLPER) {
-                        left = bi_clper_old_i32(b, s0, lane1);
-                        right = bi_clper_old_i32(b, s0, lane2);
-                } else {
-                        left = bi_clper_i32(b, s0, lane1,
-                                        BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
-                                        BI_SUBGROUP_SUBGROUP4);
-
-                        right = bi_clper_i32(b, s0, lane2,
-                                        BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
-                                        BI_SUBGROUP_SUBGROUP4);
-                }
-
-                bi_fadd_to(b, sz, dst, right, bi_neg(left));
-                break;
-        }
-
-        case nir_op_f2f32:
-                bi_f16_to_f32_to(b, dst, s0);
-                break;
-
-        case nir_op_fquantize2f16:
-        {
-                bi_instr *f16 = bi_v2f32_to_v2f16_to(b, bi_temp(b->shader), s0, s0);
-                bi_instr *f32 = bi_f16_to_f32_to(b, dst, bi_half(f16->dest[0], false));
-
-                f16->ftz = f32->ftz = true;
-                break;
-        }
-
-        case nir_op_f2i32:
-                if (src_sz == 32)
-                        bi_f32_to_s32_to(b, dst, s0);
-                else
-                        bi_f16_to_s32_to(b, dst, s0);
-                break;
-
-        /* Note 32-bit sources => no vectorization, so 32-bit works */
-        case nir_op_f2u16:
-                if (src_sz == 32)
-                        bi_f32_to_u32_to(b, dst, s0);
-                else
-                        bi_v2f16_to_v2u16_to(b, dst, s0);
-                break;
-
-        case nir_op_f2i16:
-                if (src_sz == 32)
-                        bi_f32_to_s32_to(b, dst, s0);
-                else
-                        bi_v2f16_to_v2s16_to(b, dst, s0);
-                break;
-
-        case nir_op_f2u32:
-                if (src_sz == 32)
-                        bi_f32_to_u32_to(b, dst, s0);
-                else
-                        bi_f16_to_u32_to(b, dst, s0);
-                break;
-
-        case nir_op_u2f16:
-                if (src_sz == 32)
-                        bi_v2u16_to_v2f16_to(b, dst, bi_half(s0, false));
-                else if (src_sz == 16)
-                        bi_v2u16_to_v2f16_to(b, dst, s0);
-                else if (src_sz == 8)
-                        bi_v2u8_to_v2f16_to(b, dst, s0);
-                break;
-
-        case nir_op_u2f32:
-                if (src_sz == 32)
-                        bi_u32_to_f32_to(b, dst, s0);
-                else if (src_sz == 16)
-                        bi_u16_to_f32_to(b, dst, s0);
-                else
-                        bi_u8_to_f32_to(b, dst, s0);
-                break;
-
-        case nir_op_i2f16:
-                if (src_sz == 32)
-                        bi_v2s16_to_v2f16_to(b, dst, bi_half(s0, false));
-                else if (src_sz == 16)
-                        bi_v2s16_to_v2f16_to(b, dst, s0);
-                else if (src_sz == 8)
-                        bi_v2s8_to_v2f16_to(b, dst, s0);
-                break;
-
-        case nir_op_i2f32:
-                assert(src_sz == 32 || src_sz == 16 || src_sz == 8);
-
-                if (src_sz == 32)
-                        bi_s32_to_f32_to(b, dst, s0);
-                else if (src_sz == 16)
-                        bi_s16_to_f32_to(b, dst, s0);
-                else if (src_sz == 8)
-                        bi_s8_to_f32_to(b, dst, s0);
-                break;
-
-        case nir_op_i2i32:
-                assert(src_sz == 32 || src_sz == 16 || src_sz == 8);
-
-                if (src_sz == 32)
-                        bi_mov_i32_to(b, dst, s0);
-                else if (src_sz == 16)
-                        bi_s16_to_s32_to(b, dst, s0);
-                else if (src_sz == 8)
-                        bi_s8_to_s32_to(b, dst, s0);
-                break;
-
-        case nir_op_u2u32:
-                assert(src_sz == 32 || src_sz == 16 || src_sz == 8);
-
-                if (src_sz == 32)
-                        bi_mov_i32_to(b, dst, s0);
-                else if (src_sz == 16)
-                        bi_u16_to_u32_to(b, dst, s0);
-                else if (src_sz == 8)
-                        bi_u8_to_u32_to(b, dst, s0);
-
-                break;
-
-        case nir_op_i2i16:
-                assert(src_sz == 8 || src_sz == 32);
-
-                if (src_sz == 8)
-                        bi_v2s8_to_v2s16_to(b, dst, s0);
-                else
-                        bi_mov_i32_to(b, dst, s0);
-                break;
-
-        case nir_op_u2u16:
-                assert(src_sz == 8 || src_sz == 32);
-
-                if (src_sz == 8)
-                        bi_v2u8_to_v2u16_to(b, dst, s0);
-                else
-                        bi_mov_i32_to(b, dst, s0);
-                break;
-
-        case nir_op_b2i8:
-        case nir_op_b2i16:
-        case nir_op_b2i32:
-                bi_mux_to(b, sz, dst, bi_imm_u8(0), bi_imm_uintN(1, sz), s0, BI_MUX_INT_ZERO);
-                break;
-
-        case nir_op_f2b16:
-                bi_mux_v2i16_to(b, dst, bi_imm_u16(0), bi_imm_u16(~0), s0, BI_MUX_FP_ZERO);
-                break;
-        case nir_op_f2b32:
-                bi_mux_i32_to(b, dst, bi_imm_u32(0), bi_imm_u32(~0), s0, BI_MUX_FP_ZERO);
-                break;
-
-        case nir_op_ieq8:
-        case nir_op_ine8:
-        case nir_op_ilt8:
-        case nir_op_ige8:
-        case nir_op_ieq16:
-        case nir_op_ine16:
-        case nir_op_ilt16:
-        case nir_op_ige16:
-        case nir_op_ieq32:
-        case nir_op_ine32:
-        case nir_op_ilt32:
-        case nir_op_ige32:
-                bi_icmp_to(b, nir_type_int, sz, dst, s0, s1, bi_translate_cmpf(instr->op), BI_RESULT_TYPE_M1);
-                break;
-
-        case nir_op_ult8:
-        case nir_op_uge8:
-        case nir_op_ult16:
-        case nir_op_uge16:
-        case nir_op_ult32:
-        case nir_op_uge32:
-                bi_icmp_to(b, nir_type_uint, sz, dst, s0, s1, bi_translate_cmpf(instr->op), BI_RESULT_TYPE_M1);
-                break;
-
-        case nir_op_feq32:
-        case nir_op_feq16:
-        case nir_op_flt32:
-        case nir_op_flt16:
-        case nir_op_fge32:
-        case nir_op_fge16:
-        case nir_op_fneu32:
-        case nir_op_fneu16:
-                bi_fcmp_to(b, sz, dst, s0, s1, bi_translate_cmpf(instr->op), BI_RESULT_TYPE_M1);
-                break;
-
-        case nir_op_fround_even:
-        case nir_op_fceil:
-        case nir_op_ffloor:
-        case nir_op_ftrunc:
-                bi_fround_to(b, sz, dst, s0, bi_nir_round(instr->op));
-                break;
-
-        case nir_op_fmin:
-                bi_fmin_to(b, sz, dst, s0, s1);
-                break;
-
-        case nir_op_fmax:
-                bi_fmax_to(b, sz, dst, s0, s1);
-                break;
-
-        case nir_op_iadd:
-                bi_iadd_to(b, nir_type_int, sz, dst, s0, s1, false);
-                break;
-
-        case nir_op_iadd_sat:
-                bi_iadd_to(b, nir_type_int, sz, dst, s0, s1, true);
-                break;
-
-        case nir_op_uadd_sat:
-                bi_iadd_to(b, nir_type_uint, sz, dst, s0, s1, true);
-                break;
-
-        case nir_op_ihadd:
-                bi_hadd_to(b, nir_type_int, sz, dst, s0, s1, BI_ROUND_RTN);
-                break;
-
-        case nir_op_irhadd:
-                bi_hadd_to(b, nir_type_int, sz, dst, s0, s1, BI_ROUND_RTP);
-                break;
-
-        case nir_op_uhadd:
-                bi_hadd_to(b, nir_type_uint, sz, dst, s0, s1, BI_ROUND_RTN);
-                break;
-
-        case nir_op_urhadd:
-                bi_hadd_to(b, nir_type_uint, sz, dst, s0, s1, BI_ROUND_RTP);
-                break;
-
-        case nir_op_ineg:
-                bi_isub_to(b, nir_type_int, sz, dst, bi_zero(), s0, false);
-                break;
-
-        case nir_op_isub:
-                bi_isub_to(b, nir_type_int, sz, dst, s0, s1, false);
-                break;
-
-        case nir_op_isub_sat:
-                bi_isub_to(b, nir_type_int, sz, dst, s0, s1, true);
-                break;
-
-        case nir_op_usub_sat:
-                bi_isub_to(b, nir_type_uint, sz, dst, s0, s1, true);
-                break;
-
-        case nir_op_imul:
-                bi_imul_to(b, sz, dst, s0, s1);
-                break;
-
-        case nir_op_iabs:
-                bi_iabs_to(b, sz, dst, s0);
-                break;
-
-        case nir_op_iand:
-                bi_lshift_and_to(b, sz, dst, s0, s1, bi_imm_u8(0));
-                break;
-
-        case nir_op_ior:
-                bi_lshift_or_to(b, sz, dst, s0, s1, bi_imm_u8(0));
-                break;
-
-        case nir_op_ixor:
-                bi_lshift_xor_to(b, sz, dst, s0, s1, bi_imm_u8(0));
-                break;
-
-        case nir_op_inot:
-                bi_lshift_or_to(b, sz, dst, bi_zero(), bi_not(s0), bi_imm_u8(0));
-                break;
-
-        case nir_op_frsq:
-                if (sz == 32 && b->shader->quirks & BIFROST_NO_FP32_TRANSCENDENTALS)
-                        bi_lower_frsq_32(b, dst, s0);
-                else
-                        bi_frsq_to(b, sz, dst, s0);
-                break;
-
-        case nir_op_frcp:
-                if (sz == 32 && b->shader->quirks & BIFROST_NO_FP32_TRANSCENDENTALS)
-                        bi_lower_frcp_32(b, dst, s0);
-                else
-                        bi_frcp_to(b, sz, dst, s0);
-                break;
-
-        case nir_op_uclz:
-                bi_clz_to(b, sz, dst, s0, false);
-                break;
-
-        case nir_op_bit_count:
-                assert(sz == 32 && src_sz == 32 && "should've been lowered");
-                bi_popcount_i32_to(b, dst, s0);
-                break;
-
-        case nir_op_bitfield_reverse:
-                assert(sz == 32 && src_sz == 32 && "should've been lowered");
-                bi_bitrev_i32_to(b, dst, s0);
-                break;
-
-        case nir_op_ufind_msb: {
-                bi_index clz = bi_clz(b, src_sz, s0, false);
-
-                if (sz == 8)
-                        clz = bi_byte(clz, 0);
-                else if (sz == 16)
-                        clz = bi_half(clz, false);
-
-                bi_isub_u32_to(b, dst, bi_imm_u32(src_sz - 1), clz, false);
-                break;
-        }
-
-        default:
-                fprintf(stderr, "Unhandled ALU op %s\n", nir_op_infos[instr->op].name);
-                unreachable("Unknown ALU op");
-        }
+   bi_index dst = bi_dest_index(&instr->dest.dest);
+   unsigned srcs = nir_op_infos[instr->op].num_inputs;
+   unsigned sz = nir_dest_bit_size(instr->dest.dest);
+   unsigned comps = nir_dest_num_components(instr->dest.dest);
+   unsigned src_sz = srcs > 0 ? nir_src_bit_size(instr->src[0].src) : 0;
+
+   /* Indicate scalarness */
+   if (sz == 16 && comps == 1)
+      dst.swizzle = BI_SWIZZLE_H00;
+
+   /* First, match against the various moves in NIR. These are
+    * special-cased because they can operate on vectors even after
+    * lowering ALU to scalar. For Bifrost, bi_alu_src_index assumes the
+    * instruction is no "bigger" than SIMD-within-a-register. These moves
+    * are the exceptions that need to handle swizzles specially. */
+
+   switch (instr->op) {
+   case nir_op_vec2:
+   case nir_op_vec3:
+   case nir_op_vec4:
+   case nir_op_vec8:
+   case nir_op_vec16: {
+      bi_index unoffset_srcs[16] = {bi_null()};
+      unsigned channels[16] = {0};
+
+      for (unsigned i = 0; i < srcs; ++i) {
+         unoffset_srcs[i] = bi_src_index(&instr->src[i].src);
+         channels[i] = instr->src[i].swizzle[0];
+      }
+
+      bi_make_vec_to(b, dst, unoffset_srcs, channels, srcs, sz);
+      return;
+   }
+
+   case nir_op_unpack_32_2x16: {
+      /* Should have been scalarized */
+      assert(comps == 2 && sz == 16);
+
+      bi_index vec = bi_src_index(&instr->src[0].src);
+      unsigned chan = instr->src[0].swizzle[0];
+
+      bi_mov_i32_to(b, dst, bi_extract(b, vec, chan));
+      return;
+   }
+
+   case nir_op_unpack_64_2x32_split_x: {
+      unsigned chan = (instr->src[0].swizzle[0] * 2) + 0;
+      bi_mov_i32_to(b, dst,
+                    bi_extract(b, bi_src_index(&instr->src[0].src), chan));
+      return;
+   }
+
+   case nir_op_unpack_64_2x32_split_y: {
+      unsigned chan = (instr->src[0].swizzle[0] * 2) + 1;
+      bi_mov_i32_to(b, dst,
+                    bi_extract(b, bi_src_index(&instr->src[0].src), chan));
+      return;
+   }
+
+   case nir_op_pack_64_2x32_split:
+      bi_collect_v2i32_to(b, dst,
+                          bi_extract(b, bi_src_index(&instr->src[0].src),
+                                     instr->src[0].swizzle[0]),
+                          bi_extract(b, bi_src_index(&instr->src[1].src),
+                                     instr->src[1].swizzle[0]));
+      return;
+
+   case nir_op_pack_64_2x32:
+      bi_collect_v2i32_to(b, dst,
+                          bi_extract(b, bi_src_index(&instr->src[0].src), 0),
+                          bi_extract(b, bi_src_index(&instr->src[0].src), 1));
+      return;
+
+   case nir_op_pack_uvec2_to_uint: {
+      bi_index src = bi_src_index(&instr->src[0].src);
+
+      assert(sz == 32 && src_sz == 32);
+      bi_mkvec_v2i16_to(b, dst, bi_half(bi_extract(b, src, 0), false),
+                        bi_half(bi_extract(b, src, 1), false));
+      return;
+   }
+
+   case nir_op_pack_uvec4_to_uint: {
+      bi_index src = bi_src_index(&instr->src[0].src);
+
+      assert(sz == 32 && src_sz == 32);
+      bi_mkvec_v4i8_to(b, dst, bi_byte(bi_extract(b, src, 0), 0),
+                       bi_byte(bi_extract(b, src, 1), 0),
+                       bi_byte(bi_extract(b, src, 2), 0),
+                       bi_byte(bi_extract(b, src, 3), 0));
+      return;
+   }
+
+   case nir_op_mov: {
+      bi_index idx = bi_src_index(&instr->src[0].src);
+      bi_index unoffset_srcs[4] = {idx, idx, idx, idx};
+
+      unsigned channels[4] = {
+         comps > 0 ? instr->src[0].swizzle[0] : 0,
+         comps > 1 ? instr->src[0].swizzle[1] : 0,
+         comps > 2 ? instr->src[0].swizzle[2] : 0,
+         comps > 3 ? instr->src[0].swizzle[3] : 0,
+      };
+
+      bi_make_vec_to(b, dst, unoffset_srcs, channels, comps, src_sz);
+      return;
+   }
+
+   case nir_op_pack_32_2x16: {
+      assert(comps == 1);
+
+      bi_index idx = bi_src_index(&instr->src[0].src);
+      bi_index unoffset_srcs[4] = {idx, idx, idx, idx};
+
+      unsigned channels[2] = {instr->src[0].swizzle[0],
+                              instr->src[0].swizzle[1]};
+
+      bi_make_vec_to(b, dst, unoffset_srcs, channels, 2, 16);
+      return;
+   }
+
+   case nir_op_f2f16:
+   case nir_op_f2f16_rtz:
+   case nir_op_f2f16_rtne: {
+      assert(src_sz == 32);
+      bi_index idx = bi_src_index(&instr->src[0].src);
+      bi_index s0 = bi_extract(b, idx, instr->src[0].swizzle[0]);
+      bi_index s1 =
+         comps > 1 ? bi_extract(b, idx, instr->src[0].swizzle[1]) : s0;
+
+      bi_instr *I = bi_v2f32_to_v2f16_to(b, dst, s0, s1);
+
+      /* Override rounding if explicitly requested. Otherwise, the
+       * default rounding mode is selected by the builder. Depending
+       * on the float controls required by the shader, the default
+       * mode may not be nearest-even.
+       */
+      if (instr->op == nir_op_f2f16_rtz)
+         I->round = BI_ROUND_RTZ;
+      else if (instr->op == nir_op_f2f16_rtne)
+         I->round = BI_ROUND_NONE; /* Nearest even */
+
+      return;
+   }
+
+   /* Vectorized downcasts */
+   case nir_op_u2u16:
+   case nir_op_i2i16: {
+      if (!(src_sz == 32 && comps == 2))
+         break;
+
+      bi_index idx = bi_src_index(&instr->src[0].src);
+      bi_index s0 = bi_extract(b, idx, instr->src[0].swizzle[0]);
+      bi_index s1 = bi_extract(b, idx, instr->src[0].swizzle[1]);
+
+      bi_mkvec_v2i16_to(b, dst, bi_half(s0, false), bi_half(s1, false));
+      return;
+   }
+
+   /* While we do not have a direct V2U32_TO_V2F16 instruction, lowering to
+    * MKVEC.v2i16 + V2U16_TO_V2F16 is more efficient on Bifrost than
+    * scalarizing due to scheduling (equal cost on Valhall). Additionally
+    * if the source is replicated the MKVEC.v2i16 can be optimized out.
+    */
+   case nir_op_u2f16:
+   case nir_op_i2f16: {
+      if (!(src_sz == 32 && comps == 2))
+         break;
+
+      nir_alu_src *src = &instr->src[0];
+      bi_index idx = bi_src_index(&src->src);
+      bi_index s0 = bi_extract(b, idx, src->swizzle[0]);
+      bi_index s1 = bi_extract(b, idx, src->swizzle[1]);
+
+      bi_index t =
+         (src->swizzle[0] == src->swizzle[1])
+            ? bi_half(s0, false)
+            : bi_mkvec_v2i16(b, bi_half(s0, false), bi_half(s1, false));
+
+      if (instr->op == nir_op_u2f16)
+         bi_v2u16_to_v2f16_to(b, dst, t);
+      else
+         bi_v2s16_to_v2f16_to(b, dst, t);
+
+      return;
+   }
+
+   case nir_op_i2i8:
+   case nir_op_u2u8: {
+      /* Acts like an 8-bit swizzle */
+      bi_index idx = bi_src_index(&instr->src[0].src);
+      unsigned factor = src_sz / 8;
+      unsigned chan[4] = {0};
+
+      for (unsigned i = 0; i < comps; ++i)
+         chan[i] = instr->src[0].swizzle[i] * factor;
+
+      bi_make_vec_to(b, dst, &idx, chan, comps, 8);
+      return;
+   }
+
+   case nir_op_b32csel: {
+      if (sz != 16)
+         break;
+
+      /* We allow vectorizing b32csel(cond, A, B) which can be
+       * translated as MUX.v2i16, even though cond is a 32-bit vector.
+       *
+       * If the source condition vector is replicated, we can use
+       * MUX.v2i16 directly, letting each component use the
+       * corresponding half of the 32-bit source. NIR uses 0/~0
+       * booleans so that's guaranteed to work (that is, 32-bit NIR
+       * booleans are 16-bit replicated).
+       *
+       * If we're not replicated, we use the same trick but must
+       * insert a MKVEC.v2i16 first to convert down to 16-bit.
+       */
+      bi_index idx = bi_src_index(&instr->src[0].src);
+      bi_index s0 = bi_extract(b, idx, instr->src[0].swizzle[0]);
+      bi_index s1 = bi_alu_src_index(b, instr->src[1], comps);
+      bi_index s2 = bi_alu_src_index(b, instr->src[2], comps);
+
+      if (!bi_nir_is_replicated(&instr->src[0])) {
+         s0 = bi_mkvec_v2i16(
+            b, bi_half(s0, false),
+            bi_half(bi_extract(b, idx, instr->src[0].swizzle[1]), false));
+      }
+
+      bi_mux_v2i16_to(b, dst, s2, s1, s0, BI_MUX_INT_ZERO);
+      return;
+   }
+
+   default:
+      break;
+   }
+
+   bi_index s0 =
+      srcs > 0 ? bi_alu_src_index(b, instr->src[0], comps) : bi_null();
+   bi_index s1 =
+      srcs > 1 ? bi_alu_src_index(b, instr->src[1], comps) : bi_null();
+   bi_index s2 =
+      srcs > 2 ? bi_alu_src_index(b, instr->src[2], comps) : bi_null();
+
+   switch (instr->op) {
+   case nir_op_ffma:
+      bi_fma_to(b, sz, dst, s0, s1, s2);
+      break;
+
+   case nir_op_fmul:
+      bi_fma_to(b, sz, dst, s0, s1, bi_negzero());
+      break;
+
+   case nir_op_fsub:
+      s1 = bi_neg(s1);
+      FALLTHROUGH;
+   case nir_op_fadd:
+      bi_fadd_to(b, sz, dst, s0, s1);
+      break;
+
+   case nir_op_fsat: {
+      bi_instr *I = bi_fclamp_to(b, sz, dst, s0);
+      I->clamp = BI_CLAMP_CLAMP_0_1;
+      break;
+   }
+
+   case nir_op_fsat_signed_mali: {
+      bi_instr *I = bi_fclamp_to(b, sz, dst, s0);
+      I->clamp = BI_CLAMP_CLAMP_M1_1;
+      break;
+   }
+
+   case nir_op_fclamp_pos_mali: {
+      bi_instr *I = bi_fclamp_to(b, sz, dst, s0);
+      I->clamp = BI_CLAMP_CLAMP_0_INF;
+      break;
+   }
+
+   case nir_op_fneg:
+      bi_fabsneg_to(b, sz, dst, bi_neg(s0));
+      break;
+
+   case nir_op_fabs:
+      bi_fabsneg_to(b, sz, dst, bi_abs(s0));
+      break;
+
+   case nir_op_fsin:
+      bi_lower_fsincos_32(b, dst, s0, false);
+      break;
+
+   case nir_op_fcos:
+      bi_lower_fsincos_32(b, dst, s0, true);
+      break;
+
+   case nir_op_fexp2:
+      assert(sz == 32); /* should've been lowered */
+
+      if (b->shader->quirks & BIFROST_NO_FP32_TRANSCENDENTALS)
+         bi_lower_fexp2_32(b, dst, s0);
+      else
+         bi_fexp_32(b, dst, s0, bi_imm_f32(1.0f));
+
+      break;
+
+   case nir_op_flog2:
+      assert(sz == 32); /* should've been lowered */
+
+      if (b->shader->quirks & BIFROST_NO_FP32_TRANSCENDENTALS)
+         bi_lower_flog2_32(b, dst, s0);
+      else
+         bi_flog2_32(b, dst, s0);
+
+      break;
+
+   case nir_op_fpow:
+      assert(sz == 32); /* should've been lowered */
+
+      if (b->shader->quirks & BIFROST_NO_FP32_TRANSCENDENTALS)
+         bi_lower_fpow_32(b, dst, s0, s1);
+      else
+         bi_fpow_32(b, dst, s0, s1);
+
+      break;
+
+   case nir_op_frexp_exp:
+      bi_frexpe_to(b, sz, dst, s0, false, false);
+      break;
+
+   case nir_op_frexp_sig:
+      bi_frexpm_to(b, sz, dst, s0, false, false);
+      break;
+
+   case nir_op_ldexp:
+      bi_ldexp_to(b, sz, dst, s0, s1);
+      break;
+
+   case nir_op_b8csel:
+      bi_mux_v4i8_to(b, dst, s2, s1, s0, BI_MUX_INT_ZERO);
+      break;
+
+   case nir_op_b16csel:
+      bi_mux_v2i16_to(b, dst, s2, s1, s0, BI_MUX_INT_ZERO);
+      break;
+
+   case nir_op_b32csel:
+      bi_mux_i32_to(b, dst, s2, s1, s0, BI_MUX_INT_ZERO);
+      break;
+
+   case nir_op_extract_u8:
+   case nir_op_extract_i8: {
+      assert(comps == 1 && "should be scalarized");
+      assert((src_sz == 16 || src_sz == 32) && "should be lowered");
+      unsigned byte = nir_src_as_uint(instr->src[1].src);
+
+      if (s0.swizzle == BI_SWIZZLE_H11) {
+         assert(byte < 2);
+         byte += 2;
+      } else if (s0.swizzle != BI_SWIZZLE_H01) {
+         assert(s0.swizzle == BI_SWIZZLE_H00);
+      }
+
+      assert(byte < 4);
+
+      s0.swizzle = BI_SWIZZLE_H01;
+
+      if (instr->op == nir_op_extract_i8)
+         bi_s8_to_s32_to(b, dst, bi_byte(s0, byte));
+      else
+         bi_u8_to_u32_to(b, dst, bi_byte(s0, byte));
+      break;
+   }
+
+   case nir_op_extract_u16:
+   case nir_op_extract_i16: {
+      assert(comps == 1 && "should be scalarized");
+      assert(src_sz == 32 && "should be lowered");
+      unsigned half = nir_src_as_uint(instr->src[1].src);
+      assert(half == 0 || half == 1);
+
+      if (instr->op == nir_op_extract_i16)
+         bi_s16_to_s32_to(b, dst, bi_half(s0, half));
+      else
+         bi_u16_to_u32_to(b, dst, bi_half(s0, half));
+      break;
+   }
+
+   case nir_op_insert_u16: {
+      assert(comps == 1 && "should be scalarized");
+      unsigned half = nir_src_as_uint(instr->src[1].src);
+      assert(half == 0 || half == 1);
+
+      if (half == 0)
+         bi_u16_to_u32_to(b, dst, bi_half(s0, 0));
+      else
+         bi_mkvec_v2i16_to(b, dst, bi_imm_u16(0), bi_half(s0, 0));
+      break;
+   }
+
+   case nir_op_ishl:
+      bi_lshift_or_to(b, sz, dst, s0, bi_zero(), bi_byte(s1, 0));
+      break;
+   case nir_op_ushr:
+      bi_rshift_or_to(b, sz, dst, s0, bi_zero(), bi_byte(s1, 0), false);
+      break;
+
+   case nir_op_ishr:
+      if (b->shader->arch >= 9)
+         bi_rshift_or_to(b, sz, dst, s0, bi_zero(), bi_byte(s1, 0), true);
+      else
+         bi_arshift_to(b, sz, dst, s0, bi_null(), bi_byte(s1, 0));
+      break;
+
+   case nir_op_imin:
+   case nir_op_umin:
+      bi_csel_to(b, nir_op_infos[instr->op].input_types[0], sz, dst, s0, s1, s0,
+                 s1, BI_CMPF_LT);
+      break;
+
+   case nir_op_imax:
+   case nir_op_umax:
+      bi_csel_to(b, nir_op_infos[instr->op].input_types[0], sz, dst, s0, s1, s0,
+                 s1, BI_CMPF_GT);
+      break;
+
+   case nir_op_fddx_must_abs_mali:
+   case nir_op_fddy_must_abs_mali: {
+      bi_index bit = bi_imm_u32(instr->op == nir_op_fddx_must_abs_mali ? 1 : 2);
+      bi_index adjacent = bi_clper_xor(b, s0, bit);
+      bi_fadd_to(b, sz, dst, adjacent, bi_neg(s0));
+      break;
+   }
+
+   case nir_op_fddx:
+   case nir_op_fddy:
+   case nir_op_fddx_coarse:
+   case nir_op_fddy_coarse:
+   case nir_op_fddx_fine:
+   case nir_op_fddy_fine: {
+      unsigned axis;
+      switch (instr->op) {
+      case nir_op_fddx:
+      case nir_op_fddx_coarse:
+      case nir_op_fddx_fine:
+         axis = 1;
+         break;
+      case nir_op_fddy:
+      case nir_op_fddy_coarse:
+      case nir_op_fddy_fine:
+         axis = 2;
+         break;
+      default:
+         unreachable("Invalid derivative op");
+      }
+
+      bi_index lane1, lane2;
+      switch (instr->op) {
+      case nir_op_fddx:
+      case nir_op_fddx_fine:
+      case nir_op_fddy:
+      case nir_op_fddy_fine:
+         lane1 = bi_lshift_and_i32(b, bi_fau(BIR_FAU_LANE_ID, false),
+                                   bi_imm_u32(0x3 & ~axis), bi_imm_u8(0));
+
+         lane2 = bi_iadd_u32(b, lane1, bi_imm_u32(axis), false);
+         break;
+      case nir_op_fddx_coarse:
+      case nir_op_fddy_coarse:
+         lane1 = bi_imm_u32(0);
+         lane2 = bi_imm_u32(axis);
+         break;
+      default:
+         unreachable("Invalid derivative op");
+      }
+
+      bi_index left, right;
+
+      if (b->shader->quirks & BIFROST_LIMITED_CLPER) {
+         left = bi_clper_old_i32(b, s0, lane1);
+         right = bi_clper_old_i32(b, s0, lane2);
+      } else {
+         left = bi_clper_i32(b, s0, lane1, BI_INACTIVE_RESULT_ZERO,
+                             BI_LANE_OP_NONE, BI_SUBGROUP_SUBGROUP4);
+
+         right = bi_clper_i32(b, s0, lane2, BI_INACTIVE_RESULT_ZERO,
+                              BI_LANE_OP_NONE, BI_SUBGROUP_SUBGROUP4);
+      }
+
+      bi_fadd_to(b, sz, dst, right, bi_neg(left));
+      break;
+   }
+
+   case nir_op_f2f32:
+      bi_f16_to_f32_to(b, dst, s0);
+      break;
+
+   case nir_op_fquantize2f16: {
+      bi_instr *f16 = bi_v2f32_to_v2f16_to(b, bi_temp(b->shader), s0, s0);
+      bi_instr *f32 = bi_f16_to_f32_to(b, dst, bi_half(f16->dest[0], false));
+
+      f16->ftz = f32->ftz = true;
+      break;
+   }
+
+   case nir_op_f2i32:
+      if (src_sz == 32)
+         bi_f32_to_s32_to(b, dst, s0);
+      else
+         bi_f16_to_s32_to(b, dst, s0);
+      break;
+
+   /* Note 32-bit sources => no vectorization, so 32-bit works */
+   case nir_op_f2u16:
+      if (src_sz == 32)
+         bi_f32_to_u32_to(b, dst, s0);
+      else
+         bi_v2f16_to_v2u16_to(b, dst, s0);
+      break;
+
+   case nir_op_f2i16:
+      if (src_sz == 32)
+         bi_f32_to_s32_to(b, dst, s0);
+      else
+         bi_v2f16_to_v2s16_to(b, dst, s0);
+      break;
+
+   case nir_op_f2u32:
+      if (src_sz == 32)
+         bi_f32_to_u32_to(b, dst, s0);
+      else
+         bi_f16_to_u32_to(b, dst, s0);
+      break;
+
+   case nir_op_u2f16:
+      if (src_sz == 32)
+         bi_v2u16_to_v2f16_to(b, dst, bi_half(s0, false));
+      else if (src_sz == 16)
+         bi_v2u16_to_v2f16_to(b, dst, s0);
+      else if (src_sz == 8)
+         bi_v2u8_to_v2f16_to(b, dst, s0);
+      break;
+
+   case nir_op_u2f32:
+      if (src_sz == 32)
+         bi_u32_to_f32_to(b, dst, s0);
+      else if (src_sz == 16)
+         bi_u16_to_f32_to(b, dst, s0);
+      else
+         bi_u8_to_f32_to(b, dst, s0);
+      break;
+
+   case nir_op_i2f16:
+      if (src_sz == 32)
+         bi_v2s16_to_v2f16_to(b, dst, bi_half(s0, false));
+      else if (src_sz == 16)
+         bi_v2s16_to_v2f16_to(b, dst, s0);
+      else if (src_sz == 8)
+         bi_v2s8_to_v2f16_to(b, dst, s0);
+      break;
+
+   case nir_op_i2f32:
+      assert(src_sz == 32 || src_sz == 16 || src_sz == 8);
+
+      if (src_sz == 32)
+         bi_s32_to_f32_to(b, dst, s0);
+      else if (src_sz == 16)
+         bi_s16_to_f32_to(b, dst, s0);
+      else if (src_sz == 8)
+         bi_s8_to_f32_to(b, dst, s0);
+      break;
+
+   case nir_op_i2i32:
+      assert(src_sz == 32 || src_sz == 16 || src_sz == 8);
+
+      if (src_sz == 32)
+         bi_mov_i32_to(b, dst, s0);
+      else if (src_sz == 16)
+         bi_s16_to_s32_to(b, dst, s0);
+      else if (src_sz == 8)
+         bi_s8_to_s32_to(b, dst, s0);
+      break;
+
+   case nir_op_u2u32:
+      assert(src_sz == 32 || src_sz == 16 || src_sz == 8);
+
+      if (src_sz == 32)
+         bi_mov_i32_to(b, dst, s0);
+      else if (src_sz == 16)
+         bi_u16_to_u32_to(b, dst, s0);
+      else if (src_sz == 8)
+         bi_u8_to_u32_to(b, dst, s0);
+
+      break;
+
+   case nir_op_i2i16:
+      assert(src_sz == 8 || src_sz == 32);
+
+      if (src_sz == 8)
+         bi_v2s8_to_v2s16_to(b, dst, s0);
+      else
+         bi_mov_i32_to(b, dst, s0);
+      break;
+
+   case nir_op_u2u16:
+      assert(src_sz == 8 || src_sz == 32);
+
+      if (src_sz == 8)
+         bi_v2u8_to_v2u16_to(b, dst, s0);
+      else
+         bi_mov_i32_to(b, dst, s0);
+      break;
+
+   case nir_op_b2i8:
+   case nir_op_b2i16:
+   case nir_op_b2i32:
+      bi_mux_to(b, sz, dst, bi_imm_u8(0), bi_imm_uintN(1, sz), s0,
+                BI_MUX_INT_ZERO);
+      break;
+
+   case nir_op_f2b16:
+      bi_mux_v2i16_to(b, dst, bi_imm_u16(0), bi_imm_u16(~0), s0,
+                      BI_MUX_FP_ZERO);
+      break;
+   case nir_op_f2b32:
+      bi_mux_i32_to(b, dst, bi_imm_u32(0), bi_imm_u32(~0), s0, BI_MUX_FP_ZERO);
+      break;
+
+   case nir_op_ieq8:
+   case nir_op_ine8:
+   case nir_op_ilt8:
+   case nir_op_ige8:
+   case nir_op_ieq16:
+   case nir_op_ine16:
+   case nir_op_ilt16:
+   case nir_op_ige16:
+   case nir_op_ieq32:
+   case nir_op_ine32:
+   case nir_op_ilt32:
+   case nir_op_ige32:
+      bi_icmp_to(b, nir_type_int, sz, dst, s0, s1, bi_translate_cmpf(instr->op),
+                 BI_RESULT_TYPE_M1);
+      break;
+
+   case nir_op_ult8:
+   case nir_op_uge8:
+   case nir_op_ult16:
+   case nir_op_uge16:
+   case nir_op_ult32:
+   case nir_op_uge32:
+      bi_icmp_to(b, nir_type_uint, sz, dst, s0, s1,
+                 bi_translate_cmpf(instr->op), BI_RESULT_TYPE_M1);
+      break;
+
+   case nir_op_feq32:
+   case nir_op_feq16:
+   case nir_op_flt32:
+   case nir_op_flt16:
+   case nir_op_fge32:
+   case nir_op_fge16:
+   case nir_op_fneu32:
+   case nir_op_fneu16:
+      bi_fcmp_to(b, sz, dst, s0, s1, bi_translate_cmpf(instr->op),
+                 BI_RESULT_TYPE_M1);
+      break;
+
+   case nir_op_fround_even:
+   case nir_op_fceil:
+   case nir_op_ffloor:
+   case nir_op_ftrunc:
+      bi_fround_to(b, sz, dst, s0, bi_nir_round(instr->op));
+      break;
+
+   case nir_op_fmin:
+      bi_fmin_to(b, sz, dst, s0, s1);
+      break;
+
+   case nir_op_fmax:
+      bi_fmax_to(b, sz, dst, s0, s1);
+      break;
+
+   case nir_op_iadd:
+      bi_iadd_to(b, nir_type_int, sz, dst, s0, s1, false);
+      break;
+
+   case nir_op_iadd_sat:
+      bi_iadd_to(b, nir_type_int, sz, dst, s0, s1, true);
+      break;
+
+   case nir_op_uadd_sat:
+      bi_iadd_to(b, nir_type_uint, sz, dst, s0, s1, true);
+      break;
+
+   case nir_op_ihadd:
+      bi_hadd_to(b, nir_type_int, sz, dst, s0, s1, BI_ROUND_RTN);
+      break;
+
+   case nir_op_irhadd:
+      bi_hadd_to(b, nir_type_int, sz, dst, s0, s1, BI_ROUND_RTP);
+      break;
+
+   case nir_op_uhadd:
+      bi_hadd_to(b, nir_type_uint, sz, dst, s0, s1, BI_ROUND_RTN);
+      break;
+
+   case nir_op_urhadd:
+      bi_hadd_to(b, nir_type_uint, sz, dst, s0, s1, BI_ROUND_RTP);
+      break;
+
+   case nir_op_ineg:
+      bi_isub_to(b, nir_type_int, sz, dst, bi_zero(), s0, false);
+      break;
+
+   case nir_op_isub:
+      bi_isub_to(b, nir_type_int, sz, dst, s0, s1, false);
+      break;
+
+   case nir_op_isub_sat:
+      bi_isub_to(b, nir_type_int, sz, dst, s0, s1, true);
+      break;
+
+   case nir_op_usub_sat:
+      bi_isub_to(b, nir_type_uint, sz, dst, s0, s1, true);
+      break;
+
+   case nir_op_imul:
+      bi_imul_to(b, sz, dst, s0, s1);
+      break;
+
+   case nir_op_iabs:
+      bi_iabs_to(b, sz, dst, s0);
+      break;
+
+   case nir_op_iand:
+      bi_lshift_and_to(b, sz, dst, s0, s1, bi_imm_u8(0));
+      break;
+
+   case nir_op_ior:
+      bi_lshift_or_to(b, sz, dst, s0, s1, bi_imm_u8(0));
+      break;
+
+   case nir_op_ixor:
+      bi_lshift_xor_to(b, sz, dst, s0, s1, bi_imm_u8(0));
+      break;
+
+   case nir_op_inot:
+      bi_lshift_or_to(b, sz, dst, bi_zero(), bi_not(s0), bi_imm_u8(0));
+      break;
+
+   case nir_op_frsq:
+      if (sz == 32 && b->shader->quirks & BIFROST_NO_FP32_TRANSCENDENTALS)
+         bi_lower_frsq_32(b, dst, s0);
+      else
+         bi_frsq_to(b, sz, dst, s0);
+      break;
+
+   case nir_op_frcp:
+      if (sz == 32 && b->shader->quirks & BIFROST_NO_FP32_TRANSCENDENTALS)
+         bi_lower_frcp_32(b, dst, s0);
+      else
+         bi_frcp_to(b, sz, dst, s0);
+      break;
+
+   case nir_op_uclz:
+      bi_clz_to(b, sz, dst, s0, false);
+      break;
+
+   case nir_op_bit_count:
+      assert(sz == 32 && src_sz == 32 && "should've been lowered");
+      bi_popcount_i32_to(b, dst, s0);
+      break;
+
+   case nir_op_bitfield_reverse:
+      assert(sz == 32 && src_sz == 32 && "should've been lowered");
+      bi_bitrev_i32_to(b, dst, s0);
+      break;
+
+   case nir_op_ufind_msb: {
+      bi_index clz = bi_clz(b, src_sz, s0, false);
+
+      if (sz == 8)
+         clz = bi_byte(clz, 0);
+      else if (sz == 16)
+         clz = bi_half(clz, false);
+
+      bi_isub_u32_to(b, dst, bi_imm_u32(src_sz - 1), clz, false);
+      break;
+   }
+
+   default:
+      fprintf(stderr, "Unhandled ALU op %s\n", nir_op_infos[instr->op].name);
+      unreachable("Unknown ALU op");
+   }
 }
 
-/* Returns dimension with 0 special casing cubemaps. Shamelessly copied from Midgard */
+/* Returns dimension with 0 special casing cubemaps. Shamelessly copied from
+ * Midgard */
 static unsigned
 bifrost_tex_format(enum glsl_sampler_dim dim)
 {
-        switch (dim) {
-        case GLSL_SAMPLER_DIM_1D:
-        case GLSL_SAMPLER_DIM_BUF:
-                return 1;
+   switch (dim) {
+   case GLSL_SAMPLER_DIM_1D:
+   case GLSL_SAMPLER_DIM_BUF:
+      return 1;
 
-        case GLSL_SAMPLER_DIM_2D:
-        case GLSL_SAMPLER_DIM_MS:
-        case GLSL_SAMPLER_DIM_EXTERNAL:
-        case GLSL_SAMPLER_DIM_RECT:
-                return 2;
+   case GLSL_SAMPLER_DIM_2D:
+   case GLSL_SAMPLER_DIM_MS:
+   case GLSL_SAMPLER_DIM_EXTERNAL:
+   case GLSL_SAMPLER_DIM_RECT:
+      return 2;
 
-        case GLSL_SAMPLER_DIM_3D:
-                return 3;
+   case GLSL_SAMPLER_DIM_3D:
+      return 3;
 
-        case GLSL_SAMPLER_DIM_CUBE:
-                return 0;
+   case GLSL_SAMPLER_DIM_CUBE:
+      return 0;
 
-        default:
-                DBG("Unknown sampler dim type\n");
-                assert(0);
-                return 0;
-        }
+   default:
+      DBG("Unknown sampler dim type\n");
+      assert(0);
+      return 0;
+   }
 }
 
 static enum bi_dimension
 valhall_tex_dimension(enum glsl_sampler_dim dim)
 {
-        switch (dim) {
-        case GLSL_SAMPLER_DIM_1D:
-        case GLSL_SAMPLER_DIM_BUF:
-                return BI_DIMENSION_1D;
+   switch (dim) {
+   case GLSL_SAMPLER_DIM_1D:
+   case GLSL_SAMPLER_DIM_BUF:
+      return BI_DIMENSION_1D;
 
-        case GLSL_SAMPLER_DIM_2D:
-        case GLSL_SAMPLER_DIM_MS:
-        case GLSL_SAMPLER_DIM_EXTERNAL:
-        case GLSL_SAMPLER_DIM_RECT:
-                return BI_DIMENSION_2D;
+   case GLSL_SAMPLER_DIM_2D:
+   case GLSL_SAMPLER_DIM_MS:
+   case GLSL_SAMPLER_DIM_EXTERNAL:
+   case GLSL_SAMPLER_DIM_RECT:
+      return BI_DIMENSION_2D;
 
-        case GLSL_SAMPLER_DIM_3D:
-                return BI_DIMENSION_3D;
+   case GLSL_SAMPLER_DIM_3D:
+      return BI_DIMENSION_3D;
 
-        case GLSL_SAMPLER_DIM_CUBE:
-                return BI_DIMENSION_CUBE;
+   case GLSL_SAMPLER_DIM_CUBE:
+      return BI_DIMENSION_CUBE;
 
-        default:
-                unreachable("Unknown sampler dim type");
-        }
+   default:
+      unreachable("Unknown sampler dim type");
+   }
 }
 
 static enum bifrost_texture_format_full
 bi_texture_format(nir_alu_type T, enum bi_clamp clamp)
 {
-        switch (T) {
-        case nir_type_float16: return BIFROST_TEXTURE_FORMAT_F16 + clamp;
-        case nir_type_float32: return BIFROST_TEXTURE_FORMAT_F32 + clamp;
-        case nir_type_uint16:  return BIFROST_TEXTURE_FORMAT_U16;
-        case nir_type_int16:   return BIFROST_TEXTURE_FORMAT_S16;
-        case nir_type_uint32:  return BIFROST_TEXTURE_FORMAT_U32;
-        case nir_type_int32:   return BIFROST_TEXTURE_FORMAT_S32;
-        default:              unreachable("Invalid type for texturing");
-        }
+   switch (T) {
+   case nir_type_float16:
+      return BIFROST_TEXTURE_FORMAT_F16 + clamp;
+   case nir_type_float32:
+      return BIFROST_TEXTURE_FORMAT_F32 + clamp;
+   case nir_type_uint16:
+      return BIFROST_TEXTURE_FORMAT_U16;
+   case nir_type_int16:
+      return BIFROST_TEXTURE_FORMAT_S16;
+   case nir_type_uint32:
+      return BIFROST_TEXTURE_FORMAT_U32;
+   case nir_type_int32:
+      return BIFROST_TEXTURE_FORMAT_S32;
+   default:
+      unreachable("Invalid type for texturing");
+   }
 }
 
-/* Array indices are specified as 32-bit uints, need to convert. In .z component from NIR */
+/* Array indices are specified as 32-bit uints, need to convert. In .z component
+ * from NIR */
 static bi_index
 bi_emit_texc_array_index(bi_builder *b, bi_index idx, nir_alu_type T)
 {
-        /* For (u)int we can just passthrough */
-        nir_alu_type base = nir_alu_type_get_base_type(T);
-        if (base == nir_type_int || base == nir_type_uint)
-                return idx;
+   /* For (u)int we can just passthrough */
+   nir_alu_type base = nir_alu_type_get_base_type(T);
+   if (base == nir_type_int || base == nir_type_uint)
+      return idx;
 
-        /* Otherwise we convert */
-        assert(T == nir_type_float32);
+   /* Otherwise we convert */
+   assert(T == nir_type_float32);
 
-        /* OpenGL ES 3.2 specification section 8.14.2 ("Coordinate Wrapping and
-         * Texel Selection") defines the layer to be taken from clamp(RNE(r),
-         * 0, dt - 1). So we use round RTE, clamping is handled at the data
-         * structure level */
+   /* OpenGL ES 3.2 specification section 8.14.2 ("Coordinate Wrapping and
+    * Texel Selection") defines the layer to be taken from clamp(RNE(r),
+    * 0, dt - 1). So we use round RTE, clamping is handled at the data
+    * structure level */
 
-        bi_instr *I = bi_f32_to_u32_to(b, bi_temp(b->shader), idx);
-        I->round = BI_ROUND_NONE;
-        return I->dest[0];
+   bi_instr *I = bi_f32_to_u32_to(b, bi_temp(b->shader), idx);
+   I->round = BI_ROUND_NONE;
+   return I->dest[0];
 }
 
 /* TEXC's explicit and bias LOD modes requires the LOD to be transformed to a
@@ -3179,30 +3163,30 @@ bi_emit_texc_array_index(bi_builder *b, bi_index idx, nir_alu_type T)
 static bi_index
 bi_emit_texc_lod_88(bi_builder *b, bi_index lod, bool fp16)
 {
-        /* Precompute for constant LODs to avoid general constant folding */
-        if (lod.type == BI_INDEX_CONSTANT) {
-                uint32_t raw = lod.value;
-                float x = fp16 ? _mesa_half_to_float(raw) : uif(raw);
-                int32_t s32 = CLAMP(x, -16.0f, 16.0f) * 256.0f;
-                return bi_imm_u32(s32 & 0xFFFF);
-        }
+   /* Precompute for constant LODs to avoid general constant folding */
+   if (lod.type == BI_INDEX_CONSTANT) {
+      uint32_t raw = lod.value;
+      float x = fp16 ? _mesa_half_to_float(raw) : uif(raw);
+      int32_t s32 = CLAMP(x, -16.0f, 16.0f) * 256.0f;
+      return bi_imm_u32(s32 & 0xFFFF);
+   }
 
-        /* Sort of arbitrary. Must be less than 128.0, greater than or equal to
-         * the max LOD (16 since we cap at 2^16 texture dimensions), and
-         * preferably small to minimize precision loss */
-        const float max_lod = 16.0;
+   /* Sort of arbitrary. Must be less than 128.0, greater than or equal to
+    * the max LOD (16 since we cap at 2^16 texture dimensions), and
+    * preferably small to minimize precision loss */
+   const float max_lod = 16.0;
 
-        bi_instr *fsat = bi_fma_f32_to(b, bi_temp(b->shader),
-                        fp16 ? bi_half(lod, false) : lod,
-                        bi_imm_f32(1.0f / max_lod), bi_negzero());
+   bi_instr *fsat =
+      bi_fma_f32_to(b, bi_temp(b->shader), fp16 ? bi_half(lod, false) : lod,
+                    bi_imm_f32(1.0f / max_lod), bi_negzero());
 
-        fsat->clamp = BI_CLAMP_CLAMP_M1_1;
+   fsat->clamp = BI_CLAMP_CLAMP_M1_1;
 
-        bi_index fmul = bi_fma_f32(b, fsat->dest[0], bi_imm_f32(max_lod * 256.0f),
-                        bi_negzero());
+   bi_index fmul =
+      bi_fma_f32(b, fsat->dest[0], bi_imm_f32(max_lod * 256.0f), bi_negzero());
 
-        return bi_mkvec_v2i16(b,
-                        bi_half(bi_f32_to_s32(b, fmul), false), bi_imm_u16(0));
+   return bi_mkvec_v2i16(b, bi_half(bi_f32_to_s32(b, fmul), false),
+                         bi_imm_u16(0));
 }
 
 /* FETCH takes a 32-bit staging register containing the LOD as an integer in
@@ -3213,7 +3197,7 @@ bi_emit_texc_lod_88(bi_builder *b, bi_index lod, bool fp16)
 static bi_index
 bi_emit_texc_lod_cube(bi_builder *b, bi_index lod)
 {
-        return bi_lshift_or_i32(b, lod, bi_zero(), bi_imm_u8(8));
+   return bi_lshift_or_i32(b, lod, bi_zero(), bi_imm_u8(8));
 }
 
 /* The hardware specifies texel offsets and multisample indices together as a
@@ -3225,31 +3209,28 @@ bi_emit_texc_lod_cube(bi_builder *b, bi_index lod)
 static bi_index
 bi_emit_texc_offset_ms_index(bi_builder *b, nir_tex_instr *instr)
 {
-        bi_index dest = bi_zero();
+   bi_index dest = bi_zero();
 
-        int offs_idx = nir_tex_instr_src_index(instr, nir_tex_src_offset);
-        if (offs_idx >= 0 &&
-            (!nir_src_is_const(instr->src[offs_idx].src) ||
-             nir_src_as_uint(instr->src[offs_idx].src) != 0)) {
-                unsigned nr = nir_src_num_components(instr->src[offs_idx].src);
-                bi_index idx = bi_src_index(&instr->src[offs_idx].src);
-                dest = bi_mkvec_v4i8(b, 
-                                (nr > 0) ? bi_byte(bi_extract(b, idx, 0), 0) : bi_imm_u8(0),
-                                (nr > 1) ? bi_byte(bi_extract(b, idx, 1), 0) : bi_imm_u8(0),
-                                (nr > 2) ? bi_byte(bi_extract(b, idx, 2), 0) : bi_imm_u8(0),
-                                bi_imm_u8(0));
-        }
+   int offs_idx = nir_tex_instr_src_index(instr, nir_tex_src_offset);
+   if (offs_idx >= 0 && (!nir_src_is_const(instr->src[offs_idx].src) ||
+                         nir_src_as_uint(instr->src[offs_idx].src) != 0)) {
+      unsigned nr = nir_src_num_components(instr->src[offs_idx].src);
+      bi_index idx = bi_src_index(&instr->src[offs_idx].src);
+      dest = bi_mkvec_v4i8(
+         b, (nr > 0) ? bi_byte(bi_extract(b, idx, 0), 0) : bi_imm_u8(0),
+         (nr > 1) ? bi_byte(bi_extract(b, idx, 1), 0) : bi_imm_u8(0),
+         (nr > 2) ? bi_byte(bi_extract(b, idx, 2), 0) : bi_imm_u8(0),
+         bi_imm_u8(0));
+   }
 
-        int ms_idx = nir_tex_instr_src_index(instr, nir_tex_src_ms_index);
-        if (ms_idx >= 0 &&
-            (!nir_src_is_const(instr->src[ms_idx].src) ||
-             nir_src_as_uint(instr->src[ms_idx].src) != 0)) {
-                dest = bi_lshift_or_i32(b,
-                                bi_src_index(&instr->src[ms_idx].src), dest,
-                                bi_imm_u8(24));
-        }
+   int ms_idx = nir_tex_instr_src_index(instr, nir_tex_src_ms_index);
+   if (ms_idx >= 0 && (!nir_src_is_const(instr->src[ms_idx].src) ||
+                       nir_src_as_uint(instr->src[ms_idx].src) != 0)) {
+      dest = bi_lshift_or_i32(b, bi_src_index(&instr->src[ms_idx].src), dest,
+                              bi_imm_u8(24));
+   }
 
-        return dest;
+   return dest;
 }
 
 /*
@@ -3261,107 +3242,102 @@ bi_emit_texc_offset_ms_index(bi_builder *b, nir_tex_instr *instr)
 static bi_index
 bi_emit_valhall_offsets(bi_builder *b, nir_tex_instr *instr)
 {
-        bi_index dest = bi_zero();
+   bi_index dest = bi_zero();
 
-        int offs_idx = nir_tex_instr_src_index(instr, nir_tex_src_offset);
-        int ms_idx = nir_tex_instr_src_index(instr, nir_tex_src_ms_index);
-        int lod_idx = nir_tex_instr_src_index(instr, nir_tex_src_lod);
+   int offs_idx = nir_tex_instr_src_index(instr, nir_tex_src_offset);
+   int ms_idx = nir_tex_instr_src_index(instr, nir_tex_src_ms_index);
+   int lod_idx = nir_tex_instr_src_index(instr, nir_tex_src_lod);
 
-        /* Components 0-2: offsets */
-        if (offs_idx >= 0 &&
-            (!nir_src_is_const(instr->src[offs_idx].src) ||
-             nir_src_as_uint(instr->src[offs_idx].src) != 0)) {
-                unsigned nr = nir_src_num_components(instr->src[offs_idx].src);
-                bi_index idx = bi_src_index(&instr->src[offs_idx].src);
+   /* Components 0-2: offsets */
+   if (offs_idx >= 0 && (!nir_src_is_const(instr->src[offs_idx].src) ||
+                         nir_src_as_uint(instr->src[offs_idx].src) != 0)) {
+      unsigned nr = nir_src_num_components(instr->src[offs_idx].src);
+      bi_index idx = bi_src_index(&instr->src[offs_idx].src);
 
-                /* No multisample index with 3D */
-                assert((nr <= 2) || (ms_idx < 0));
+      /* No multisample index with 3D */
+      assert((nr <= 2) || (ms_idx < 0));
 
-                /* Zero extend the Z byte so we can use it with MKVEC.v2i8 */
-                bi_index z = (nr > 2) ?
-                             bi_mkvec_v2i8(b, bi_byte(bi_extract(b, idx, 2), 0),
-                                              bi_imm_u8(0), bi_zero()) :
-                             bi_zero();
+      /* Zero extend the Z byte so we can use it with MKVEC.v2i8 */
+      bi_index z = (nr > 2)
+                      ? bi_mkvec_v2i8(b, bi_byte(bi_extract(b, idx, 2), 0),
+                                      bi_imm_u8(0), bi_zero())
+                      : bi_zero();
 
-                dest = bi_mkvec_v2i8(b,
-                                (nr > 0) ? bi_byte(bi_extract(b, idx, 0), 0) : bi_imm_u8(0),
-                                (nr > 1) ? bi_byte(bi_extract(b, idx, 1), 0) : bi_imm_u8(0),
-                                z);
-        }
+      dest = bi_mkvec_v2i8(
+         b, (nr > 0) ? bi_byte(bi_extract(b, idx, 0), 0) : bi_imm_u8(0),
+         (nr > 1) ? bi_byte(bi_extract(b, idx, 1), 0) : bi_imm_u8(0), z);
+   }
 
-        /* Component 2: multisample index */
-        if (ms_idx >= 0 &&
-            (!nir_src_is_const(instr->src[ms_idx].src) ||
-             nir_src_as_uint(instr->src[ms_idx].src) != 0)) {
-                dest = bi_mkvec_v2i16(b, dest,
-                                bi_src_index(&instr->src[ms_idx].src));
-        }
+   /* Component 2: multisample index */
+   if (ms_idx >= 0 && (!nir_src_is_const(instr->src[ms_idx].src) ||
+                       nir_src_as_uint(instr->src[ms_idx].src) != 0)) {
+      dest = bi_mkvec_v2i16(b, dest, bi_src_index(&instr->src[ms_idx].src));
+   }
 
-        /* Component 3: 8-bit LOD */
-        if (lod_idx >= 0 &&
-            (!nir_src_is_const(instr->src[lod_idx].src) ||
-             nir_src_as_uint(instr->src[lod_idx].src) != 0) &&
-            nir_tex_instr_src_type(instr, lod_idx) != nir_type_float) {
-                dest = bi_lshift_or_i32(b,
-                                bi_src_index(&instr->src[lod_idx].src), dest,
-                                bi_imm_u8(24));
-        }
+   /* Component 3: 8-bit LOD */
+   if (lod_idx >= 0 &&
+       (!nir_src_is_const(instr->src[lod_idx].src) ||
+        nir_src_as_uint(instr->src[lod_idx].src) != 0) &&
+       nir_tex_instr_src_type(instr, lod_idx) != nir_type_float) {
+      dest = bi_lshift_or_i32(b, bi_src_index(&instr->src[lod_idx].src), dest,
+                              bi_imm_u8(24));
+   }
 
-        return dest;
+   return dest;
 }
 
 static void
-bi_emit_cube_coord(bi_builder *b, bi_index coord,
-                    bi_index *face, bi_index *s, bi_index *t)
+bi_emit_cube_coord(bi_builder *b, bi_index coord, bi_index *face, bi_index *s,
+                   bi_index *t)
 {
-        /* Compute max { |x|, |y|, |z| } */
-        bi_index maxxyz = bi_temp(b->shader);
-        *face = bi_temp(b->shader);
+   /* Compute max { |x|, |y|, |z| } */
+   bi_index maxxyz = bi_temp(b->shader);
+   *face = bi_temp(b->shader);
 
-        bi_index cx = bi_extract(b, coord, 0),
-                 cy = bi_extract(b, coord, 1),
-                 cz = bi_extract(b, coord, 2);
+   bi_index cx = bi_extract(b, coord, 0), cy = bi_extract(b, coord, 1),
+            cz = bi_extract(b, coord, 2);
 
-        /* Use a pseudo op on Bifrost due to tuple restrictions */
-        if (b->shader->arch <= 8) {
-                bi_cubeface_to(b, maxxyz, *face, cx, cy, cz);
-        } else {
-                bi_cubeface1_to(b, maxxyz, cx, cy, cz);
-                bi_cubeface2_v9_to(b, *face, cx, cy, cz);
-        }
+   /* Use a pseudo op on Bifrost due to tuple restrictions */
+   if (b->shader->arch <= 8) {
+      bi_cubeface_to(b, maxxyz, *face, cx, cy, cz);
+   } else {
+      bi_cubeface1_to(b, maxxyz, cx, cy, cz);
+      bi_cubeface2_v9_to(b, *face, cx, cy, cz);
+   }
 
-        /* Select coordinates */
-        bi_index ssel = bi_cube_ssel(b, bi_extract(b, coord, 2), bi_extract(b, coord, 0), *face);
-        bi_index tsel = bi_cube_tsel(b, bi_extract(b, coord, 1), bi_extract(b, coord, 2),
-                        *face);
+   /* Select coordinates */
+   bi_index ssel =
+      bi_cube_ssel(b, bi_extract(b, coord, 2), bi_extract(b, coord, 0), *face);
+   bi_index tsel =
+      bi_cube_tsel(b, bi_extract(b, coord, 1), bi_extract(b, coord, 2), *face);
 
-        /* The OpenGL ES specification requires us to transform an input vector
-         * (x, y, z) to the coordinate, given the selected S/T:
-         *
-         * (1/2 ((s / max{x,y,z}) + 1), 1/2 ((t / max{x, y, z}) + 1))
-         *
-         * We implement (s shown, t similar) in a form friendlier to FMA
-         * instructions, and clamp coordinates at the end for correct
-         * NaN/infinity handling:
-         *
-         * fsat(s * (0.5 * (1 / max{x, y, z})) + 0.5)
-         *
-         * Take the reciprocal of max{x, y, z}
-         */
-        bi_index rcp = bi_frcp_f32(b, maxxyz);
+   /* The OpenGL ES specification requires us to transform an input vector
+    * (x, y, z) to the coordinate, given the selected S/T:
+    *
+    * (1/2 ((s / max{x,y,z}) + 1), 1/2 ((t / max{x, y, z}) + 1))
+    *
+    * We implement (s shown, t similar) in a form friendlier to FMA
+    * instructions, and clamp coordinates at the end for correct
+    * NaN/infinity handling:
+    *
+    * fsat(s * (0.5 * (1 / max{x, y, z})) + 0.5)
+    *
+    * Take the reciprocal of max{x, y, z}
+    */
+   bi_index rcp = bi_frcp_f32(b, maxxyz);
 
-        /* Calculate 0.5 * (1.0 / max{x, y, z}) */
-        bi_index fma1 = bi_fma_f32(b, rcp, bi_imm_f32(0.5f), bi_negzero());
+   /* Calculate 0.5 * (1.0 / max{x, y, z}) */
+   bi_index fma1 = bi_fma_f32(b, rcp, bi_imm_f32(0.5f), bi_negzero());
 
-        /* Transform the coordinates */
-        *s = bi_temp(b->shader);
-        *t = bi_temp(b->shader);
+   /* Transform the coordinates */
+   *s = bi_temp(b->shader);
+   *t = bi_temp(b->shader);
 
-        bi_instr *S = bi_fma_f32_to(b, *s, fma1, ssel, bi_imm_f32(0.5f));
-        bi_instr *T = bi_fma_f32_to(b, *t, fma1, tsel, bi_imm_f32(0.5f));
+   bi_instr *S = bi_fma_f32_to(b, *s, fma1, ssel, bi_imm_f32(0.5f));
+   bi_instr *T = bi_fma_f32_to(b, *t, fma1, tsel, bi_imm_f32(0.5f));
 
-        S->clamp = BI_CLAMP_CLAMP_0_1;
-        T->clamp = BI_CLAMP_CLAMP_0_1;
+   S->clamp = BI_CLAMP_CLAMP_0_1;
+   T->clamp = BI_CLAMP_CLAMP_0_1;
 }
 
 /* Emits a cube map descriptor, returning lower 32-bits and putting upper
@@ -3383,10 +3359,10 @@ bi_emit_cube_coord(bi_builder *b, bi_index coord,
 static bi_index
 bi_emit_texc_cube_coord(bi_builder *b, bi_index coord, bi_index *t)
 {
-        bi_index face, s;
-        bi_emit_cube_coord(b, coord, &face, &s, t);
-        bi_index mask = bi_imm_u32(BITFIELD_MASK(29));
-        return bi_mux_i32(b, s, face, mask, BI_MUX_BIT);
+   bi_index face, s;
+   bi_emit_cube_coord(b, coord, &face, &s, t);
+   bi_index mask = bi_imm_u32(BITFIELD_MASK(29));
+   return bi_mux_i32(b, s, face, mask, BI_MUX_BIT);
 }
 
 /* Map to the main texture op used. Some of these (txd in particular) will
@@ -3397,27 +3373,27 @@ bi_emit_texc_cube_coord(bi_builder *b, bi_index coord, bi_index *t)
 static enum bifrost_tex_op
 bi_tex_op(nir_texop op)
 {
-        switch (op) {
-        case nir_texop_tex:
-        case nir_texop_txb:
-        case nir_texop_txl:
-        case nir_texop_txd:
-        case nir_texop_tex_prefetch:
-                return BIFROST_TEX_OP_TEX;
-        case nir_texop_txf:
-        case nir_texop_txf_ms:
-        case nir_texop_txf_ms_fb:
-        case nir_texop_tg4:
-                return BIFROST_TEX_OP_FETCH;
-        case nir_texop_txs:
-        case nir_texop_lod:
-        case nir_texop_query_levels:
-        case nir_texop_texture_samples:
-        case nir_texop_samples_identical:
-                unreachable("should've been lowered");
-        default:
-                unreachable("unsupported tex op");
-        }
+   switch (op) {
+   case nir_texop_tex:
+   case nir_texop_txb:
+   case nir_texop_txl:
+   case nir_texop_txd:
+   case nir_texop_tex_prefetch:
+      return BIFROST_TEX_OP_TEX;
+   case nir_texop_txf:
+   case nir_texop_txf_ms:
+   case nir_texop_txf_ms_fb:
+   case nir_texop_tg4:
+      return BIFROST_TEX_OP_FETCH;
+   case nir_texop_txs:
+   case nir_texop_lod:
+   case nir_texop_query_levels:
+   case nir_texop_texture_samples:
+   case nir_texop_samples_identical:
+      unreachable("should've been lowered");
+   default:
+      unreachable("unsupported tex op");
+   }
 }
 
 /* Data registers required by texturing in the order they appear. All are
@@ -3426,422 +3402,415 @@ bi_tex_op(nir_texop op)
  * ARRAY/SHADOW are exlusive, so TEXC in practice reads at most 8 registers */
 
 enum bifrost_tex_dreg {
-        BIFROST_TEX_DREG_Z_COORD = 0,
-        BIFROST_TEX_DREG_Y_DELTAS = 1,
-        BIFROST_TEX_DREG_LOD = 2,
-        BIFROST_TEX_DREG_GRDESC_HI = 3,
-        BIFROST_TEX_DREG_SHADOW = 4,
-        BIFROST_TEX_DREG_ARRAY = 5,
-        BIFROST_TEX_DREG_OFFSETMS = 6,
-        BIFROST_TEX_DREG_SAMPLER = 7,
-        BIFROST_TEX_DREG_TEXTURE = 8,
-        BIFROST_TEX_DREG_COUNT,
+   BIFROST_TEX_DREG_Z_COORD = 0,
+   BIFROST_TEX_DREG_Y_DELTAS = 1,
+   BIFROST_TEX_DREG_LOD = 2,
+   BIFROST_TEX_DREG_GRDESC_HI = 3,
+   BIFROST_TEX_DREG_SHADOW = 4,
+   BIFROST_TEX_DREG_ARRAY = 5,
+   BIFROST_TEX_DREG_OFFSETMS = 6,
+   BIFROST_TEX_DREG_SAMPLER = 7,
+   BIFROST_TEX_DREG_TEXTURE = 8,
+   BIFROST_TEX_DREG_COUNT,
 };
 
 static void
 bi_emit_texc(bi_builder *b, nir_tex_instr *instr)
 {
-        struct bifrost_texture_operation desc = {
-                .op = bi_tex_op(instr->op),
-                .offset_or_bias_disable = false, /* TODO */
-                .shadow_or_clamp_disable = instr->is_shadow,
-                .array = instr->is_array,
-                .dimension = bifrost_tex_format(instr->sampler_dim),
-                .format = bi_texture_format(instr->dest_type | nir_dest_bit_size(instr->dest), BI_CLAMP_NONE), /* TODO */
-                .mask = 0xF,
-        };
+   struct bifrost_texture_operation desc = {
+      .op = bi_tex_op(instr->op),
+      .offset_or_bias_disable = false, /* TODO */
+      .shadow_or_clamp_disable = instr->is_shadow,
+      .array = instr->is_array,
+      .dimension = bifrost_tex_format(instr->sampler_dim),
+      .format =
+         bi_texture_format(instr->dest_type | nir_dest_bit_size(instr->dest),
+                           BI_CLAMP_NONE), /* TODO */
+      .mask = 0xF,
+   };
 
-        switch (desc.op) {
-        case BIFROST_TEX_OP_TEX:
-                desc.lod_or_fetch = BIFROST_LOD_MODE_COMPUTE;
-                break;
-        case BIFROST_TEX_OP_FETCH:
-                desc.lod_or_fetch = (enum bifrost_lod_mode)
-                   (instr->op == nir_texop_tg4 ?
-                        BIFROST_TEXTURE_FETCH_GATHER4_R + instr->component :
-                        BIFROST_TEXTURE_FETCH_TEXEL);
-                break;
-        default:
-                unreachable("texture op unsupported");
-        }
+   switch (desc.op) {
+   case BIFROST_TEX_OP_TEX:
+      desc.lod_or_fetch = BIFROST_LOD_MODE_COMPUTE;
+      break;
+   case BIFROST_TEX_OP_FETCH:
+      desc.lod_or_fetch = (enum bifrost_lod_mode)(
+         instr->op == nir_texop_tg4
+            ? BIFROST_TEXTURE_FETCH_GATHER4_R + instr->component
+            : BIFROST_TEXTURE_FETCH_TEXEL);
+      break;
+   default:
+      unreachable("texture op unsupported");
+   }
 
-        /* 32-bit indices to be allocated as consecutive staging registers */
-        bi_index dregs[BIFROST_TEX_DREG_COUNT] = { };
-        bi_index cx = bi_null(), cy = bi_null();
+   /* 32-bit indices to be allocated as consecutive staging registers */
+   bi_index dregs[BIFROST_TEX_DREG_COUNT] = {};
+   bi_index cx = bi_null(), cy = bi_null();
 
-        for (unsigned i = 0; i < instr->num_srcs; ++i) {
-                bi_index index = bi_src_index(&instr->src[i].src);
-                unsigned sz = nir_src_bit_size(instr->src[i].src);
-                unsigned components = nir_src_num_components(instr->src[i].src);
-                ASSERTED nir_alu_type base = nir_tex_instr_src_type(instr, i);
-                nir_alu_type T = base | sz;
+   for (unsigned i = 0; i < instr->num_srcs; ++i) {
+      bi_index index = bi_src_index(&instr->src[i].src);
+      unsigned sz = nir_src_bit_size(instr->src[i].src);
+      unsigned components = nir_src_num_components(instr->src[i].src);
+      ASSERTED nir_alu_type base = nir_tex_instr_src_type(instr, i);
+      nir_alu_type T = base | sz;
 
-                switch (instr->src[i].src_type) {
-                case nir_tex_src_coord:
-                        if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
-                                cx = bi_emit_texc_cube_coord(b, index, &cy);
-			} else {
-                                /* Copy XY (for 2D+) or XX (for 1D) */
-                                cx = bi_extract(b, index, 0);
-                                cy = bi_extract(b, index, MIN2(1, components - 1));
+      switch (instr->src[i].src_type) {
+      case nir_tex_src_coord:
+         if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
+            cx = bi_emit_texc_cube_coord(b, index, &cy);
+         } else {
+            /* Copy XY (for 2D+) or XX (for 1D) */
+            cx = bi_extract(b, index, 0);
+            cy = bi_extract(b, index, MIN2(1, components - 1));
 
-                                assert(components >= 1 && components <= 3);
+            assert(components >= 1 && components <= 3);
 
-                                if (components == 3 && !desc.array) {
-                                        /* 3D */
-                                        dregs[BIFROST_TEX_DREG_Z_COORD] =
-                                                bi_extract(b, index, 2);
-                                }
-                        }
+            if (components == 3 && !desc.array) {
+               /* 3D */
+               dregs[BIFROST_TEX_DREG_Z_COORD] = bi_extract(b, index, 2);
+            }
+         }
 
-                        if (desc.array) {
-                                dregs[BIFROST_TEX_DREG_ARRAY] =
-                                                bi_emit_texc_array_index(b,
-                                                                bi_extract(b, index, components - 1), T);
-                        }
+         if (desc.array) {
+            dregs[BIFROST_TEX_DREG_ARRAY] = bi_emit_texc_array_index(
+               b, bi_extract(b, index, components - 1), T);
+         }
 
-                        break;
+         break;
 
-                case nir_tex_src_lod:
-                        if (desc.op == BIFROST_TEX_OP_TEX &&
-                            nir_src_is_const(instr->src[i].src) &&
-                            nir_src_as_uint(instr->src[i].src) == 0) {
-                                desc.lod_or_fetch = BIFROST_LOD_MODE_ZERO;
-                        } else if (desc.op == BIFROST_TEX_OP_TEX) {
-                                assert(base == nir_type_float);
+      case nir_tex_src_lod:
+         if (desc.op == BIFROST_TEX_OP_TEX &&
+             nir_src_is_const(instr->src[i].src) &&
+             nir_src_as_uint(instr->src[i].src) == 0) {
+            desc.lod_or_fetch = BIFROST_LOD_MODE_ZERO;
+         } else if (desc.op == BIFROST_TEX_OP_TEX) {
+            assert(base == nir_type_float);
 
-                                assert(sz == 16 || sz == 32);
-                                dregs[BIFROST_TEX_DREG_LOD] =
-                                        bi_emit_texc_lod_88(b, index, sz == 16);
-                                desc.lod_or_fetch = BIFROST_LOD_MODE_EXPLICIT;
-                        } else {
-                                assert(desc.op == BIFROST_TEX_OP_FETCH);
-                                assert(base == nir_type_uint || base == nir_type_int);
-                                assert(sz == 16 || sz == 32);
+            assert(sz == 16 || sz == 32);
+            dregs[BIFROST_TEX_DREG_LOD] =
+               bi_emit_texc_lod_88(b, index, sz == 16);
+            desc.lod_or_fetch = BIFROST_LOD_MODE_EXPLICIT;
+         } else {
+            assert(desc.op == BIFROST_TEX_OP_FETCH);
+            assert(base == nir_type_uint || base == nir_type_int);
+            assert(sz == 16 || sz == 32);
 
-                                dregs[BIFROST_TEX_DREG_LOD] =
-                                        bi_emit_texc_lod_cube(b, index);
-                        }
+            dregs[BIFROST_TEX_DREG_LOD] = bi_emit_texc_lod_cube(b, index);
+         }
 
-                        break;
+         break;
 
-                case nir_tex_src_bias:
-                        /* Upper 16-bits interpreted as a clamp, leave zero */
-                        assert(desc.op == BIFROST_TEX_OP_TEX);
-                        assert(base == nir_type_float);
-                        assert(sz == 16 || sz == 32);
-                        dregs[BIFROST_TEX_DREG_LOD] =
-                                bi_emit_texc_lod_88(b, index, sz == 16);
-                        desc.lod_or_fetch = BIFROST_LOD_MODE_BIAS;
-                        break;
+      case nir_tex_src_bias:
+         /* Upper 16-bits interpreted as a clamp, leave zero */
+         assert(desc.op == BIFROST_TEX_OP_TEX);
+         assert(base == nir_type_float);
+         assert(sz == 16 || sz == 32);
+         dregs[BIFROST_TEX_DREG_LOD] = bi_emit_texc_lod_88(b, index, sz == 16);
+         desc.lod_or_fetch = BIFROST_LOD_MODE_BIAS;
+         break;
 
-                case nir_tex_src_ms_index:
-                case nir_tex_src_offset:
-                        if (desc.offset_or_bias_disable)
-                                break;
+      case nir_tex_src_ms_index:
+      case nir_tex_src_offset:
+         if (desc.offset_or_bias_disable)
+            break;
 
-                        dregs[BIFROST_TEX_DREG_OFFSETMS] =
-	                        bi_emit_texc_offset_ms_index(b, instr);
-                        if (!bi_is_equiv(dregs[BIFROST_TEX_DREG_OFFSETMS], bi_zero()))
-                                desc.offset_or_bias_disable = true;
-                        break;
+         dregs[BIFROST_TEX_DREG_OFFSETMS] =
+            bi_emit_texc_offset_ms_index(b, instr);
+         if (!bi_is_equiv(dregs[BIFROST_TEX_DREG_OFFSETMS], bi_zero()))
+            desc.offset_or_bias_disable = true;
+         break;
 
-                case nir_tex_src_comparator:
-                        dregs[BIFROST_TEX_DREG_SHADOW] = index;
-                        break;
+      case nir_tex_src_comparator:
+         dregs[BIFROST_TEX_DREG_SHADOW] = index;
+         break;
 
-                case nir_tex_src_texture_offset:
-                        if (instr->texture_index)
-                                index = bi_iadd_u32(b, index, bi_imm_u32(instr->texture_index), false);
+      case nir_tex_src_texture_offset:
+         if (instr->texture_index)
+            index =
+               bi_iadd_u32(b, index, bi_imm_u32(instr->texture_index), false);
 
-                        dregs[BIFROST_TEX_DREG_TEXTURE] = index;
+         dregs[BIFROST_TEX_DREG_TEXTURE] = index;
 
-                        break;
+         break;
 
-                case nir_tex_src_sampler_offset:
-                        if (instr->sampler_index)
-                                index = bi_iadd_u32(b, index, bi_imm_u32(instr->sampler_index), false);
+      case nir_tex_src_sampler_offset:
+         if (instr->sampler_index)
+            index =
+               bi_iadd_u32(b, index, bi_imm_u32(instr->sampler_index), false);
 
-                        dregs[BIFROST_TEX_DREG_SAMPLER] = index;
-                        break;
+         dregs[BIFROST_TEX_DREG_SAMPLER] = index;
+         break;
 
-                default:
-                        unreachable("Unhandled src type in texc emit");
-                }
-        }
+      default:
+         unreachable("Unhandled src type in texc emit");
+      }
+   }
 
-        if (desc.op == BIFROST_TEX_OP_FETCH && bi_is_null(dregs[BIFROST_TEX_DREG_LOD])) {
-                dregs[BIFROST_TEX_DREG_LOD] =
-                        bi_emit_texc_lod_cube(b, bi_zero());
-        }
+   if (desc.op == BIFROST_TEX_OP_FETCH &&
+       bi_is_null(dregs[BIFROST_TEX_DREG_LOD])) {
+      dregs[BIFROST_TEX_DREG_LOD] = bi_emit_texc_lod_cube(b, bi_zero());
+   }
 
-        /* Choose an index mode */
+   /* Choose an index mode */
 
-        bool direct_tex = bi_is_null(dregs[BIFROST_TEX_DREG_TEXTURE]);
-        bool direct_samp = bi_is_null(dregs[BIFROST_TEX_DREG_SAMPLER]);
-        bool direct = direct_tex && direct_samp;
+   bool direct_tex = bi_is_null(dregs[BIFROST_TEX_DREG_TEXTURE]);
+   bool direct_samp = bi_is_null(dregs[BIFROST_TEX_DREG_SAMPLER]);
+   bool direct = direct_tex && direct_samp;
 
-        desc.immediate_indices = direct && (instr->sampler_index < 16);
+   desc.immediate_indices = direct && (instr->sampler_index < 16);
 
-        if (desc.immediate_indices) {
-                desc.sampler_index_or_mode = instr->sampler_index;
-                desc.index = instr->texture_index;
-        } else {
-                unsigned mode = 0;
+   if (desc.immediate_indices) {
+      desc.sampler_index_or_mode = instr->sampler_index;
+      desc.index = instr->texture_index;
+   } else {
+      unsigned mode = 0;
 
-                if (direct && instr->sampler_index == instr->texture_index) {
-                        mode = BIFROST_INDEX_IMMEDIATE_SHARED;
-                        desc.index = instr->texture_index;
-                } else if (direct) {
-                        mode = BIFROST_INDEX_IMMEDIATE_SAMPLER;
-                        desc.index = instr->sampler_index;
-                        dregs[BIFROST_TEX_DREG_TEXTURE] = bi_mov_i32(b,
-                                        bi_imm_u32(instr->texture_index));
-                } else if (direct_tex) {
-                        assert(!direct_samp);
-                        mode = BIFROST_INDEX_IMMEDIATE_TEXTURE;
-                        desc.index = instr->texture_index;
-                } else if (direct_samp) {
-                        assert(!direct_tex);
-                        mode = BIFROST_INDEX_IMMEDIATE_SAMPLER;
-                        desc.index = instr->sampler_index;
-                } else {
-                        mode = BIFROST_INDEX_REGISTER;
-                }
+      if (direct && instr->sampler_index == instr->texture_index) {
+         mode = BIFROST_INDEX_IMMEDIATE_SHARED;
+         desc.index = instr->texture_index;
+      } else if (direct) {
+         mode = BIFROST_INDEX_IMMEDIATE_SAMPLER;
+         desc.index = instr->sampler_index;
+         dregs[BIFROST_TEX_DREG_TEXTURE] =
+            bi_mov_i32(b, bi_imm_u32(instr->texture_index));
+      } else if (direct_tex) {
+         assert(!direct_samp);
+         mode = BIFROST_INDEX_IMMEDIATE_TEXTURE;
+         desc.index = instr->texture_index;
+      } else if (direct_samp) {
+         assert(!direct_tex);
+         mode = BIFROST_INDEX_IMMEDIATE_SAMPLER;
+         desc.index = instr->sampler_index;
+      } else {
+         mode = BIFROST_INDEX_REGISTER;
+      }
 
-                mode |= (BIFROST_TEXTURE_OPERATION_SINGLE << 2);
-                desc.sampler_index_or_mode = mode;
-        }
+      mode |= (BIFROST_TEXTURE_OPERATION_SINGLE << 2);
+      desc.sampler_index_or_mode = mode;
+   }
 
-        /* Allocate staging registers contiguously by compacting the array. */
-        unsigned sr_count = 0;
+   /* Allocate staging registers contiguously by compacting the array. */
+   unsigned sr_count = 0;
 
-        for (unsigned i = 0; i < ARRAY_SIZE(dregs); ++i) {
-                if (!bi_is_null(dregs[i]))
-                        dregs[sr_count++] = dregs[i];
-        }
+   for (unsigned i = 0; i < ARRAY_SIZE(dregs); ++i) {
+      if (!bi_is_null(dregs[i]))
+         dregs[sr_count++] = dregs[i];
+   }
 
-        unsigned res_size = nir_dest_bit_size(instr->dest) == 16 ? 2 : 4;
+   unsigned res_size = nir_dest_bit_size(instr->dest) == 16 ? 2 : 4;
 
-        bi_index sr = sr_count ? bi_temp(b->shader) : bi_null();
-        bi_index dst = bi_temp(b->shader);
+   bi_index sr = sr_count ? bi_temp(b->shader) : bi_null();
+   bi_index dst = bi_temp(b->shader);
 
-        if (sr_count)
-                bi_emit_collect_to(b, sr, dregs, sr_count);
+   if (sr_count)
+      bi_emit_collect_to(b, sr, dregs, sr_count);
 
-        uint32_t desc_u = 0;
-        memcpy(&desc_u, &desc, sizeof(desc_u));
-        bi_instr *I = bi_texc_to(b, dst, sr, cx, cy, bi_imm_u32(desc_u),
-                                 !nir_tex_instr_has_implicit_derivative(instr),
-                                 sr_count, 0);
-        I->register_format = bi_reg_fmt_for_nir(instr->dest_type);
+   uint32_t desc_u = 0;
+   memcpy(&desc_u, &desc, sizeof(desc_u));
+   bi_instr *I =
+      bi_texc_to(b, dst, sr, cx, cy, bi_imm_u32(desc_u),
+                 !nir_tex_instr_has_implicit_derivative(instr), sr_count, 0);
+   I->register_format = bi_reg_fmt_for_nir(instr->dest_type);
 
-        bi_index w[4] = { bi_null(), bi_null(), bi_null(), bi_null() };
-        bi_emit_split_i32(b, w, dst, res_size);
-        bi_emit_collect_to(b, bi_dest_index(&instr->dest), w,
-                        DIV_ROUND_UP(nir_dest_num_components(instr->dest) * res_size, 4));
+   bi_index w[4] = {bi_null(), bi_null(), bi_null(), bi_null()};
+   bi_emit_split_i32(b, w, dst, res_size);
+   bi_emit_collect_to(
+      b, bi_dest_index(&instr->dest), w,
+      DIV_ROUND_UP(nir_dest_num_components(instr->dest) * res_size, 4));
 }
 
 /* Staging registers required by texturing in the order they appear (Valhall) */
 
 enum valhall_tex_sreg {
-        VALHALL_TEX_SREG_X_COORD = 0,
-        VALHALL_TEX_SREG_Y_COORD = 1,
-        VALHALL_TEX_SREG_Z_COORD = 2,
-        VALHALL_TEX_SREG_Y_DELTAS = 3,
-        VALHALL_TEX_SREG_ARRAY = 4,
-        VALHALL_TEX_SREG_SHADOW = 5,
-        VALHALL_TEX_SREG_OFFSETMS = 6,
-        VALHALL_TEX_SREG_LOD = 7,
-        VALHALL_TEX_SREG_GRDESC = 8,
-        VALHALL_TEX_SREG_COUNT,
+   VALHALL_TEX_SREG_X_COORD = 0,
+   VALHALL_TEX_SREG_Y_COORD = 1,
+   VALHALL_TEX_SREG_Z_COORD = 2,
+   VALHALL_TEX_SREG_Y_DELTAS = 3,
+   VALHALL_TEX_SREG_ARRAY = 4,
+   VALHALL_TEX_SREG_SHADOW = 5,
+   VALHALL_TEX_SREG_OFFSETMS = 6,
+   VALHALL_TEX_SREG_LOD = 7,
+   VALHALL_TEX_SREG_GRDESC = 8,
+   VALHALL_TEX_SREG_COUNT,
 };
 
 static void
 bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *instr)
 {
-        bool explicit_offset = false;
-        enum bi_va_lod_mode lod_mode = BI_VA_LOD_MODE_COMPUTED_LOD;
+   bool explicit_offset = false;
+   enum bi_va_lod_mode lod_mode = BI_VA_LOD_MODE_COMPUTED_LOD;
 
-        bool has_lod_mode =
-                (instr->op == nir_texop_tex) ||
-                (instr->op == nir_texop_txl) ||
-                (instr->op == nir_texop_txb);
+   bool has_lod_mode = (instr->op == nir_texop_tex) ||
+                       (instr->op == nir_texop_txl) ||
+                       (instr->op == nir_texop_txb);
 
-        /* 32-bit indices to be allocated as consecutive staging registers */
-        bi_index sregs[VALHALL_TEX_SREG_COUNT] = { };
+   /* 32-bit indices to be allocated as consecutive staging registers */
+   bi_index sregs[VALHALL_TEX_SREG_COUNT] = {};
 
+   bool has_sampler = nir_tex_instr_need_sampler(instr);
+   bi_index sampler = bi_imm_u32(has_sampler ? instr->sampler_index : 0);
+   bi_index texture = bi_imm_u32(instr->texture_index);
+   uint32_t tables = (PAN_TABLE_SAMPLER << 11) | (PAN_TABLE_TEXTURE << 27);
 
-        bool has_sampler = nir_tex_instr_need_sampler(instr);
-        bi_index sampler = bi_imm_u32(has_sampler ? instr->sampler_index : 0);
-        bi_index texture = bi_imm_u32(instr->texture_index);
-        uint32_t tables = (PAN_TABLE_SAMPLER << 11) | (PAN_TABLE_TEXTURE << 27);
+   for (unsigned i = 0; i < instr->num_srcs; ++i) {
+      bi_index index = bi_src_index(&instr->src[i].src);
+      unsigned sz = nir_src_bit_size(instr->src[i].src);
+      unsigned components = nir_src_num_components(instr->src[i].src);
 
-        for (unsigned i = 0; i < instr->num_srcs; ++i) {
-                bi_index index = bi_src_index(&instr->src[i].src);
-                unsigned sz = nir_src_bit_size(instr->src[i].src);
-                unsigned components = nir_src_num_components(instr->src[i].src);
+      switch (instr->src[i].src_type) {
+      case nir_tex_src_coord:
+         if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
+            sregs[VALHALL_TEX_SREG_X_COORD] = bi_emit_texc_cube_coord(
+               b, index, &sregs[VALHALL_TEX_SREG_Y_COORD]);
+         } else {
+            assert(components >= 1 && components <= 3);
 
-                switch (instr->src[i].src_type) {
-                case nir_tex_src_coord:
-                        if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
-                                sregs[VALHALL_TEX_SREG_X_COORD] =
-                                        bi_emit_texc_cube_coord(b, index,
-                                                &sregs[VALHALL_TEX_SREG_Y_COORD]);
-			} else {
-                                assert(components >= 1 && components <= 3);
+            /* Copy XY (for 2D+) or XX (for 1D) */
+            sregs[VALHALL_TEX_SREG_X_COORD] = index;
 
-                                /* Copy XY (for 2D+) or XX (for 1D) */
-                                sregs[VALHALL_TEX_SREG_X_COORD] = index;
+            if (components >= 2)
+               sregs[VALHALL_TEX_SREG_Y_COORD] = bi_extract(b, index, 1);
 
-                                if (components >= 2)
-                                        sregs[VALHALL_TEX_SREG_Y_COORD] = bi_extract(b, index, 1);
+            if (components == 3 && !instr->is_array) {
+               sregs[VALHALL_TEX_SREG_Z_COORD] = bi_extract(b, index, 2);
+            }
+         }
 
-                                if (components == 3 && !instr->is_array) {
-                                        sregs[VALHALL_TEX_SREG_Z_COORD] =
-                                                bi_extract(b, index, 2);
-                                }
-                        }
+         if (instr->is_array) {
+            sregs[VALHALL_TEX_SREG_ARRAY] =
+               bi_extract(b, index, components - 1);
+         }
 
-                        if (instr->is_array) {
-                                sregs[VALHALL_TEX_SREG_ARRAY] =
-                                        bi_extract(b, index, components - 1);
-                        }
+         break;
 
-                        break;
+      case nir_tex_src_lod:
+         if (nir_src_is_const(instr->src[i].src) &&
+             nir_src_as_uint(instr->src[i].src) == 0) {
+            lod_mode = BI_VA_LOD_MODE_ZERO_LOD;
+         } else if (has_lod_mode) {
+            lod_mode = BI_VA_LOD_MODE_EXPLICIT;
 
-                case nir_tex_src_lod:
-                        if (nir_src_is_const(instr->src[i].src) &&
-                            nir_src_as_uint(instr->src[i].src) == 0) {
-                                lod_mode = BI_VA_LOD_MODE_ZERO_LOD;
-                        } else if (has_lod_mode) {
-                                lod_mode = BI_VA_LOD_MODE_EXPLICIT;
+            assert(sz == 16 || sz == 32);
+            sregs[VALHALL_TEX_SREG_LOD] =
+               bi_emit_texc_lod_88(b, index, sz == 16);
+         }
+         break;
 
-                                assert(sz == 16 || sz == 32);
-                                sregs[VALHALL_TEX_SREG_LOD] =
-                                        bi_emit_texc_lod_88(b, index, sz == 16);
-                        }
-                        break;
+      case nir_tex_src_bias:
+         /* Upper 16-bits interpreted as a clamp, leave zero */
+         assert(sz == 16 || sz == 32);
+         sregs[VALHALL_TEX_SREG_LOD] = bi_emit_texc_lod_88(b, index, sz == 16);
 
-                case nir_tex_src_bias:
-                        /* Upper 16-bits interpreted as a clamp, leave zero */
-                        assert(sz == 16 || sz == 32);
-                        sregs[VALHALL_TEX_SREG_LOD] =
-                                bi_emit_texc_lod_88(b, index, sz == 16);
+         lod_mode = BI_VA_LOD_MODE_COMPUTED_BIAS;
+         break;
+      case nir_tex_src_ms_index:
+      case nir_tex_src_offset:
+         /* Handled below */
+         break;
 
-                        lod_mode = BI_VA_LOD_MODE_COMPUTED_BIAS;
-                        break;
-                case nir_tex_src_ms_index:
-                case nir_tex_src_offset:
-                        /* Handled below */
-                        break;
+      case nir_tex_src_comparator:
+         sregs[VALHALL_TEX_SREG_SHADOW] = index;
+         break;
 
-                case nir_tex_src_comparator:
-                        sregs[VALHALL_TEX_SREG_SHADOW] = index;
-                        break;
+      case nir_tex_src_texture_offset:
+         assert(instr->texture_index == 0);
+         texture = index;
+         break;
 
-                case nir_tex_src_texture_offset:
-                        assert(instr->texture_index == 0);
-                        texture = index;
-                        break;
+      case nir_tex_src_sampler_offset:
+         assert(instr->sampler_index == 0);
+         sampler = index;
+         break;
 
-                case nir_tex_src_sampler_offset:
-                        assert(instr->sampler_index == 0);
-                        sampler = index;
-                        break;
+      default:
+         unreachable("Unhandled src type in tex emit");
+      }
+   }
 
-                default:
-                        unreachable("Unhandled src type in tex emit");
-                }
-        }
+   /* Generate packed offset + ms index + LOD register. These default to
+    * zero so we only need to encode if these features are actually in use.
+    */
+   bi_index offsets = bi_emit_valhall_offsets(b, instr);
 
-        /* Generate packed offset + ms index + LOD register. These default to
-         * zero so we only need to encode if these features are actually in use.
-         */
-        bi_index offsets = bi_emit_valhall_offsets(b, instr);
+   if (!bi_is_equiv(offsets, bi_zero())) {
+      sregs[VALHALL_TEX_SREG_OFFSETMS] = offsets;
+      explicit_offset = true;
+   }
 
-        if (!bi_is_equiv(offsets, bi_zero())) {
-                sregs[VALHALL_TEX_SREG_OFFSETMS] = offsets;
-                explicit_offset = true;
-        }
+   /* Allocate staging registers contiguously by compacting the array. */
+   unsigned sr_count = 0;
 
-        /* Allocate staging registers contiguously by compacting the array. */
-        unsigned sr_count = 0;
+   for (unsigned i = 0; i < ARRAY_SIZE(sregs); ++i) {
+      if (!bi_is_null(sregs[i]))
+         sregs[sr_count++] = sregs[i];
+   }
 
-        for (unsigned i = 0; i < ARRAY_SIZE(sregs); ++i) {
-                if (!bi_is_null(sregs[i]))
-                        sregs[sr_count++] = sregs[i];
-        }
+   bi_index idx = sr_count ? bi_temp(b->shader) : bi_null();
 
-        bi_index idx = sr_count ? bi_temp(b->shader) : bi_null();
+   if (sr_count)
+      bi_make_vec_to(b, idx, sregs, NULL, sr_count, 32);
 
-        if (sr_count)
-                bi_make_vec_to(b, idx, sregs, NULL, sr_count, 32);
+   bi_index image_src = bi_imm_u32(tables);
+   image_src = bi_lshift_or_i32(b, sampler, image_src, bi_imm_u8(0));
+   image_src = bi_lshift_or_i32(b, texture, image_src, bi_imm_u8(16));
 
-        bi_index image_src = bi_imm_u32(tables);
-        image_src = bi_lshift_or_i32(b, sampler, image_src, bi_imm_u8(0));
-        image_src = bi_lshift_or_i32(b, texture, image_src, bi_imm_u8(16));
+   /* Only write the components that we actually read */
+   unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
+   unsigned comps_per_reg = nir_dest_bit_size(instr->dest) == 16 ? 2 : 1;
+   unsigned res_size = DIV_ROUND_UP(util_bitcount(mask), comps_per_reg);
 
+   enum bi_register_format regfmt = bi_reg_fmt_for_nir(instr->dest_type);
+   enum bi_dimension dim = valhall_tex_dimension(instr->sampler_dim);
+   bi_index dest = bi_temp(b->shader);
 
-        /* Only write the components that we actually read */
-        unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
-        unsigned comps_per_reg = nir_dest_bit_size(instr->dest) == 16 ? 2 : 1;
-        unsigned res_size = DIV_ROUND_UP(util_bitcount(mask), comps_per_reg);
+   switch (instr->op) {
+   case nir_texop_tex:
+   case nir_texop_txl:
+   case nir_texop_txb:
+      bi_tex_single_to(b, dest, idx, image_src, bi_zero(), instr->is_array, dim,
+                       regfmt, instr->is_shadow, explicit_offset, lod_mode,
+                       mask, sr_count);
+      break;
+   case nir_texop_txf:
+   case nir_texop_txf_ms:
+      bi_tex_fetch_to(b, dest, idx, image_src, bi_zero(), instr->is_array, dim,
+                      regfmt, explicit_offset, mask, sr_count);
+      break;
+   case nir_texop_tg4:
+      bi_tex_gather_to(b, dest, idx, image_src, bi_zero(), instr->is_array, dim,
+                       instr->component, false, regfmt, instr->is_shadow,
+                       explicit_offset, mask, sr_count);
+      break;
+   default:
+      unreachable("Unhandled Valhall texture op");
+   }
 
-        enum bi_register_format regfmt = bi_reg_fmt_for_nir(instr->dest_type);
-        enum bi_dimension dim = valhall_tex_dimension(instr->sampler_dim);
-        bi_index dest = bi_temp(b->shader);
+   /* The hardware will write only what we read, and it will into
+    * contiguous registers without gaps (different from Bifrost). NIR
+    * expects the gaps, so fill in the holes (they'll be copypropped and
+    * DCE'd away later).
+    */
+   bi_index unpacked[4] = {bi_null(), bi_null(), bi_null(), bi_null()};
 
-        switch (instr->op) {
-        case nir_texop_tex:
-        case nir_texop_txl:
-        case nir_texop_txb:
-                bi_tex_single_to(b, dest, idx, image_src, bi_zero(),
-                                 instr->is_array, dim, regfmt, instr->is_shadow,
-                                 explicit_offset, lod_mode, mask, sr_count);
-                break;
-        case nir_texop_txf:
-        case nir_texop_txf_ms:
-                bi_tex_fetch_to(b, dest, idx, image_src, bi_zero(),
-                                instr->is_array, dim, regfmt, explicit_offset,
-                                mask, sr_count);
-                break;
-        case nir_texop_tg4:
-                bi_tex_gather_to(b, dest, idx, image_src, bi_zero(),
-                                 instr->is_array, dim, instr->component, false,
-                                 regfmt, instr->is_shadow, explicit_offset,
-                                 mask, sr_count);
-                break;
-        default:
-                unreachable("Unhandled Valhall texture op");
-        }
+   bi_emit_cached_split_i32(b, dest, res_size);
 
-        /* The hardware will write only what we read, and it will into
-         * contiguous registers without gaps (different from Bifrost). NIR
-         * expects the gaps, so fill in the holes (they'll be copypropped and
-         * DCE'd away later).
-         */
-        bi_index unpacked[4] = { bi_null(), bi_null(), bi_null(), bi_null() };
+   /* Index into the packed component array */
+   unsigned j = 0;
+   unsigned comps[4] = {0};
+   unsigned nr_components = nir_dest_num_components(instr->dest);
 
-        bi_emit_cached_split_i32(b, dest, res_size);
+   for (unsigned i = 0; i < nr_components; ++i) {
+      if (mask & BITFIELD_BIT(i)) {
+         unpacked[i] = dest;
+         comps[i] = j++;
+      } else {
+         unpacked[i] = bi_zero();
+      }
+   }
 
-        /* Index into the packed component array */
-        unsigned j = 0;
-        unsigned comps[4] = { 0 };
-        unsigned nr_components = nir_dest_num_components(instr->dest);
-
-        for (unsigned i = 0; i < nr_components; ++i) {
-                if (mask & BITFIELD_BIT(i)) {
-                        unpacked[i] = dest;
-                        comps[i] = j++;
-                } else {
-                        unpacked[i] = bi_zero();
-                }
-        }
-
-        bi_make_vec_to(b, bi_dest_index(&instr->dest), unpacked,
-                        comps, nir_dest_num_components(instr->dest),
-                        nir_dest_bit_size(instr->dest));
+   bi_make_vec_to(b, bi_dest_index(&instr->dest), unpacked, comps,
+                  nir_dest_num_components(instr->dest),
+                  nir_dest_bit_size(instr->dest));
 }
 
 /* Simple textures ops correspond to NIR tex or txl with LOD = 0 on 2D/cube
@@ -3851,114 +3820,112 @@ bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *instr)
 static void
 bi_emit_texs(bi_builder *b, nir_tex_instr *instr)
 {
-        int coord_idx = nir_tex_instr_src_index(instr, nir_tex_src_coord);
-        assert(coord_idx >= 0);
-        bi_index coords = bi_src_index(&instr->src[coord_idx].src);
+   int coord_idx = nir_tex_instr_src_index(instr, nir_tex_src_coord);
+   assert(coord_idx >= 0);
+   bi_index coords = bi_src_index(&instr->src[coord_idx].src);
 
-        if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
-                bi_index face, s, t;
-                bi_emit_cube_coord(b, coords, &face, &s, &t);
+   if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
+      bi_index face, s, t;
+      bi_emit_cube_coord(b, coords, &face, &s, &t);
 
-                bi_texs_cube_to(b, nir_dest_bit_size(instr->dest),
-                                bi_dest_index(&instr->dest),
-                                s, t, face,
-                                instr->sampler_index, instr->texture_index);
-        } else {
-                bi_texs_2d_to(b, nir_dest_bit_size(instr->dest),
-                                bi_dest_index(&instr->dest),
-                                bi_extract(b, coords, 0),
-                                bi_extract(b, coords, 1),
-                                instr->op != nir_texop_tex, /* zero LOD */
-                                instr->sampler_index, instr->texture_index);
-        }
+      bi_texs_cube_to(b, nir_dest_bit_size(instr->dest),
+                      bi_dest_index(&instr->dest), s, t, face,
+                      instr->sampler_index, instr->texture_index);
+   } else {
+      bi_texs_2d_to(b, nir_dest_bit_size(instr->dest),
+                    bi_dest_index(&instr->dest), bi_extract(b, coords, 0),
+                    bi_extract(b, coords, 1),
+                    instr->op != nir_texop_tex, /* zero LOD */
+                    instr->sampler_index, instr->texture_index);
+   }
 
-        bi_split_dest(b, instr->dest);
+   bi_split_dest(b, instr->dest);
 }
 
 static bool
 bi_is_simple_tex(nir_tex_instr *instr)
 {
-        if (instr->op != nir_texop_tex && instr->op != nir_texop_txl)
-                return false;
+   if (instr->op != nir_texop_tex && instr->op != nir_texop_txl)
+      return false;
 
-        if (instr->dest_type != nir_type_float32 &&
-            instr->dest_type != nir_type_float16)
-                return false;
+   if (instr->dest_type != nir_type_float32 &&
+       instr->dest_type != nir_type_float16)
+      return false;
 
-        if (instr->is_shadow || instr->is_array)
-                return false;
+   if (instr->is_shadow || instr->is_array)
+      return false;
 
-        switch (instr->sampler_dim) {
-        case GLSL_SAMPLER_DIM_2D:
-        case GLSL_SAMPLER_DIM_EXTERNAL:
-        case GLSL_SAMPLER_DIM_RECT:
-                break;
+   switch (instr->sampler_dim) {
+   case GLSL_SAMPLER_DIM_2D:
+   case GLSL_SAMPLER_DIM_EXTERNAL:
+   case GLSL_SAMPLER_DIM_RECT:
+      break;
 
-        case GLSL_SAMPLER_DIM_CUBE:
-                /* LOD can't be specified with TEXS_CUBE */
-                if (instr->op == nir_texop_txl)
-                        return false;
-                break;
+   case GLSL_SAMPLER_DIM_CUBE:
+      /* LOD can't be specified with TEXS_CUBE */
+      if (instr->op == nir_texop_txl)
+         return false;
+      break;
 
-        default:
-                return false;
-        }
+   default:
+      return false;
+   }
 
-        for (unsigned i = 0; i < instr->num_srcs; ++i) {
-                if (instr->src[i].src_type != nir_tex_src_lod &&
-                    instr->src[i].src_type != nir_tex_src_coord)
-                        return false;
-        }
+   for (unsigned i = 0; i < instr->num_srcs; ++i) {
+      if (instr->src[i].src_type != nir_tex_src_lod &&
+          instr->src[i].src_type != nir_tex_src_coord)
+         return false;
+   }
 
-        /* Indices need to fit in provided bits */
-        unsigned idx_bits = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE ? 2 : 3;
-        if (MAX2(instr->sampler_index, instr->texture_index) >= (1 << idx_bits))
-                return false;
+   /* Indices need to fit in provided bits */
+   unsigned idx_bits = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE ? 2 : 3;
+   if (MAX2(instr->sampler_index, instr->texture_index) >= (1 << idx_bits))
+      return false;
 
-        int lod_idx = nir_tex_instr_src_index(instr, nir_tex_src_lod);
-        if (lod_idx < 0)
-                return true;
+   int lod_idx = nir_tex_instr_src_index(instr, nir_tex_src_lod);
+   if (lod_idx < 0)
+      return true;
 
-        nir_src lod = instr->src[lod_idx].src;
-        return nir_src_is_const(lod) && nir_src_as_uint(lod) == 0;
+   nir_src lod = instr->src[lod_idx].src;
+   return nir_src_is_const(lod) && nir_src_as_uint(lod) == 0;
 }
 
 static void
 bi_emit_tex(bi_builder *b, nir_tex_instr *instr)
 {
-        switch (instr->op) {
-        case nir_texop_txs:
-                bi_load_sysval_to(b, bi_dest_index(&instr->dest),
-                                panfrost_sysval_for_instr(&instr->instr, NULL),
-                                nir_dest_num_components(instr->dest), 0);
-                return;
-        case nir_texop_tex:
-        case nir_texop_txl:
-        case nir_texop_txb:
-        case nir_texop_txf:
-        case nir_texop_txf_ms:
-        case nir_texop_tg4:
-                break;
-        default:
-                unreachable("Invalid texture operation");
-        }
+   switch (instr->op) {
+   case nir_texop_txs:
+      bi_load_sysval_to(b, bi_dest_index(&instr->dest),
+                        panfrost_sysval_for_instr(&instr->instr, NULL),
+                        nir_dest_num_components(instr->dest), 0);
+      return;
+   case nir_texop_tex:
+   case nir_texop_txl:
+   case nir_texop_txb:
+   case nir_texop_txf:
+   case nir_texop_txf_ms:
+   case nir_texop_tg4:
+      break;
+   default:
+      unreachable("Invalid texture operation");
+   }
 
-        if (b->shader->arch >= 9)
-                bi_emit_tex_valhall(b, instr);
-        else if (bi_is_simple_tex(instr))
-                bi_emit_texs(b, instr);
-        else
-                bi_emit_texc(b, instr);
+   if (b->shader->arch >= 9)
+      bi_emit_tex_valhall(b, instr);
+   else if (bi_is_simple_tex(instr))
+      bi_emit_texs(b, instr);
+   else
+      bi_emit_texc(b, instr);
 }
 
 static void
 bi_emit_phi(bi_builder *b, nir_phi_instr *instr)
 {
-        unsigned nr_srcs = exec_list_length(&instr->srcs);
-        bi_instr *I = bi_phi_to(b, bi_dest_index(&instr->dest), nr_srcs);
+   unsigned nr_srcs = exec_list_length(&instr->srcs);
+   bi_instr *I = bi_phi_to(b, bi_dest_index(&instr->dest), nr_srcs);
 
-        /* Deferred */
-        I->phi = instr;
+   /* Deferred */
+   I->phi = instr;
 }
 
 /* Look up the AGX block corresponding to a given NIR block. Used when
@@ -3967,266 +3934,267 @@ bi_emit_phi(bi_builder *b, nir_phi_instr *instr)
 static bi_block *
 bi_from_nir_block(bi_context *ctx, nir_block *block)
 {
-        return ctx->indexed_nir_blocks[block->index];
+   return ctx->indexed_nir_blocks[block->index];
 }
 
 static void
 bi_emit_phi_deferred(bi_context *ctx, bi_block *block, bi_instr *I)
 {
-        nir_phi_instr *phi = I->phi;
+   nir_phi_instr *phi = I->phi;
 
-        /* Guaranteed by lower_phis_to_scalar */
-        assert(phi->dest.ssa.num_components == 1);
+   /* Guaranteed by lower_phis_to_scalar */
+   assert(phi->dest.ssa.num_components == 1);
 
-        nir_foreach_phi_src(src, phi) {
-                bi_block *pred = bi_from_nir_block(ctx, src->pred);
-                unsigned i = bi_predecessor_index(block, pred);
-                assert(i < I->nr_srcs);
+   nir_foreach_phi_src(src, phi) {
+      bi_block *pred = bi_from_nir_block(ctx, src->pred);
+      unsigned i = bi_predecessor_index(block, pred);
+      assert(i < I->nr_srcs);
 
-                I->src[i] = bi_src_index(&src->src);
-        }
+      I->src[i] = bi_src_index(&src->src);
+   }
 
-        I->phi = NULL;
+   I->phi = NULL;
 }
 
 static void
 bi_emit_phis_deferred(bi_context *ctx)
 {
-        bi_foreach_block(ctx, block) {
-                bi_foreach_instr_in_block(block, I) {
-                        if (I->op == BI_OPCODE_PHI)
-                                bi_emit_phi_deferred(ctx, block, I);
-                }
-        }
+   bi_foreach_block(ctx, block) {
+      bi_foreach_instr_in_block(block, I) {
+         if (I->op == BI_OPCODE_PHI)
+            bi_emit_phi_deferred(ctx, block, I);
+      }
+   }
 }
 
 static void
 bi_emit_instr(bi_builder *b, struct nir_instr *instr)
 {
-        switch (instr->type) {
-        case nir_instr_type_load_const:
-                bi_emit_load_const(b, nir_instr_as_load_const(instr));
-                break;
+   switch (instr->type) {
+   case nir_instr_type_load_const:
+      bi_emit_load_const(b, nir_instr_as_load_const(instr));
+      break;
 
-        case nir_instr_type_intrinsic:
-                bi_emit_intrinsic(b, nir_instr_as_intrinsic(instr));
-                break;
+   case nir_instr_type_intrinsic:
+      bi_emit_intrinsic(b, nir_instr_as_intrinsic(instr));
+      break;
 
-        case nir_instr_type_alu:
-                bi_emit_alu(b, nir_instr_as_alu(instr));
-                break;
+   case nir_instr_type_alu:
+      bi_emit_alu(b, nir_instr_as_alu(instr));
+      break;
 
-        case nir_instr_type_tex:
-                bi_emit_tex(b, nir_instr_as_tex(instr));
-                break;
+   case nir_instr_type_tex:
+      bi_emit_tex(b, nir_instr_as_tex(instr));
+      break;
 
-        case nir_instr_type_jump:
-                bi_emit_jump(b, nir_instr_as_jump(instr));
-                break;
+   case nir_instr_type_jump:
+      bi_emit_jump(b, nir_instr_as_jump(instr));
+      break;
 
-        case nir_instr_type_phi:
-                bi_emit_phi(b, nir_instr_as_phi(instr));
-                break;
+   case nir_instr_type_phi:
+      bi_emit_phi(b, nir_instr_as_phi(instr));
+      break;
 
-        default:
-                unreachable("should've been lowered");
-        }
+   default:
+      unreachable("should've been lowered");
+   }
 }
 
 static bi_block *
 create_empty_block(bi_context *ctx)
 {
-        bi_block *blk = rzalloc(ctx, bi_block);
+   bi_block *blk = rzalloc(ctx, bi_block);
 
-        util_dynarray_init(&blk->predecessors, blk);
+   util_dynarray_init(&blk->predecessors, blk);
 
-        return blk;
+   return blk;
 }
 
 static bi_block *
 emit_block(bi_context *ctx, nir_block *block)
 {
-        if (ctx->after_block) {
-                ctx->current_block = ctx->after_block;
-                ctx->after_block = NULL;
-        } else {
-                ctx->current_block = create_empty_block(ctx);
-        }
+   if (ctx->after_block) {
+      ctx->current_block = ctx->after_block;
+      ctx->after_block = NULL;
+   } else {
+      ctx->current_block = create_empty_block(ctx);
+   }
 
-        list_addtail(&ctx->current_block->link, &ctx->blocks);
-        list_inithead(&ctx->current_block->instructions);
+   list_addtail(&ctx->current_block->link, &ctx->blocks);
+   list_inithead(&ctx->current_block->instructions);
 
-        bi_builder _b = bi_init_builder(ctx, bi_after_block(ctx->current_block));
+   bi_builder _b = bi_init_builder(ctx, bi_after_block(ctx->current_block));
 
-        ctx->indexed_nir_blocks[block->index] = ctx->current_block;
+   ctx->indexed_nir_blocks[block->index] = ctx->current_block;
 
-        nir_foreach_instr(instr, block) {
-                bi_emit_instr(&_b, instr);
-        }
+   nir_foreach_instr(instr, block) {
+      bi_emit_instr(&_b, instr);
+   }
 
-        return ctx->current_block;
+   return ctx->current_block;
 }
 
 static void
 emit_if(bi_context *ctx, nir_if *nif)
 {
-        bi_block *before_block = ctx->current_block;
+   bi_block *before_block = ctx->current_block;
 
-        /* Speculatively emit the branch, but we can't fill it in until later */
-        bi_builder _b = bi_init_builder(ctx, bi_after_block(ctx->current_block));
-        bi_instr *then_branch = bi_branchz_i16(&_b,
-                        bi_half(bi_src_index(&nif->condition), false),
-                        bi_zero(), BI_CMPF_EQ);
+   /* Speculatively emit the branch, but we can't fill it in until later */
+   bi_builder _b = bi_init_builder(ctx, bi_after_block(ctx->current_block));
+   bi_instr *then_branch =
+      bi_branchz_i16(&_b, bi_half(bi_src_index(&nif->condition), false),
+                     bi_zero(), BI_CMPF_EQ);
 
-        /* Emit the two subblocks. */
-        bi_block *then_block = emit_cf_list(ctx, &nif->then_list);
-        bi_block *end_then_block = ctx->current_block;
+   /* Emit the two subblocks. */
+   bi_block *then_block = emit_cf_list(ctx, &nif->then_list);
+   bi_block *end_then_block = ctx->current_block;
 
-        /* Emit second block */
+   /* Emit second block */
 
-        bi_block *else_block = emit_cf_list(ctx, &nif->else_list);
-        bi_block *end_else_block = ctx->current_block;
-        ctx->after_block = create_empty_block(ctx);
+   bi_block *else_block = emit_cf_list(ctx, &nif->else_list);
+   bi_block *end_else_block = ctx->current_block;
+   ctx->after_block = create_empty_block(ctx);
 
-        /* Now that we have the subblocks emitted, fix up the branches */
+   /* Now that we have the subblocks emitted, fix up the branches */
 
-        assert(then_block);
-        assert(else_block);
+   assert(then_block);
+   assert(else_block);
 
-        then_branch->branch_target = else_block;
+   then_branch->branch_target = else_block;
 
-        /* Emit a jump from the end of the then block to the end of the else */
-        _b.cursor = bi_after_block(end_then_block);
-        bi_instr *then_exit = bi_jump(&_b, bi_zero());
-        then_exit->branch_target = ctx->after_block;
+   /* Emit a jump from the end of the then block to the end of the else */
+   _b.cursor = bi_after_block(end_then_block);
+   bi_instr *then_exit = bi_jump(&_b, bi_zero());
+   then_exit->branch_target = ctx->after_block;
 
-        bi_block_add_successor(end_then_block, then_exit->branch_target);
-        bi_block_add_successor(end_else_block, ctx->after_block); /* fallthrough */
+   bi_block_add_successor(end_then_block, then_exit->branch_target);
+   bi_block_add_successor(end_else_block, ctx->after_block); /* fallthrough */
 
-        bi_block_add_successor(before_block, then_branch->branch_target); /* then_branch */
-        bi_block_add_successor(before_block, then_block); /* fallthrough */
+   bi_block_add_successor(before_block,
+                          then_branch->branch_target); /* then_branch */
+   bi_block_add_successor(before_block, then_block);   /* fallthrough */
 }
 
 static void
 emit_loop(bi_context *ctx, nir_loop *nloop)
 {
-        /* Remember where we are */
-        bi_block *start_block = ctx->current_block;
+   /* Remember where we are */
+   bi_block *start_block = ctx->current_block;
 
-        bi_block *saved_break = ctx->break_block;
-        bi_block *saved_continue = ctx->continue_block;
+   bi_block *saved_break = ctx->break_block;
+   bi_block *saved_continue = ctx->continue_block;
 
-        ctx->continue_block = create_empty_block(ctx);
-        ctx->break_block = create_empty_block(ctx);
-        ctx->after_block = ctx->continue_block;
+   ctx->continue_block = create_empty_block(ctx);
+   ctx->break_block = create_empty_block(ctx);
+   ctx->after_block = ctx->continue_block;
 
-        /* Emit the body itself */
-        emit_cf_list(ctx, &nloop->body);
+   /* Emit the body itself */
+   emit_cf_list(ctx, &nloop->body);
 
-        /* Branch back to loop back */
-        bi_builder _b = bi_init_builder(ctx, bi_after_block(ctx->current_block));
-        bi_instr *I = bi_jump(&_b, bi_zero());
-        I->branch_target = ctx->continue_block;
-        bi_block_add_successor(start_block, ctx->continue_block);
-        bi_block_add_successor(ctx->current_block, ctx->continue_block);
+   /* Branch back to loop back */
+   bi_builder _b = bi_init_builder(ctx, bi_after_block(ctx->current_block));
+   bi_instr *I = bi_jump(&_b, bi_zero());
+   I->branch_target = ctx->continue_block;
+   bi_block_add_successor(start_block, ctx->continue_block);
+   bi_block_add_successor(ctx->current_block, ctx->continue_block);
 
-        ctx->after_block = ctx->break_block;
+   ctx->after_block = ctx->break_block;
 
-        /* Pop off */
-        ctx->break_block = saved_break;
-        ctx->continue_block = saved_continue;
-        ++ctx->loop_count;
+   /* Pop off */
+   ctx->break_block = saved_break;
+   ctx->continue_block = saved_continue;
+   ++ctx->loop_count;
 }
 
 static bi_block *
 emit_cf_list(bi_context *ctx, struct exec_list *list)
 {
-        bi_block *start_block = NULL;
+   bi_block *start_block = NULL;
 
-        foreach_list_typed(nir_cf_node, node, node, list) {
-                switch (node->type) {
-                case nir_cf_node_block: {
-                        bi_block *block = emit_block(ctx, nir_cf_node_as_block(node));
+   foreach_list_typed(nir_cf_node, node, node, list) {
+      switch (node->type) {
+      case nir_cf_node_block: {
+         bi_block *block = emit_block(ctx, nir_cf_node_as_block(node));
 
-                        if (!start_block)
-                                start_block = block;
+         if (!start_block)
+            start_block = block;
 
-                        break;
-                }
+         break;
+      }
 
-                case nir_cf_node_if:
-                        emit_if(ctx, nir_cf_node_as_if(node));
-                        break;
+      case nir_cf_node_if:
+         emit_if(ctx, nir_cf_node_as_if(node));
+         break;
 
-                case nir_cf_node_loop:
-                        emit_loop(ctx, nir_cf_node_as_loop(node));
-                        break;
+      case nir_cf_node_loop:
+         emit_loop(ctx, nir_cf_node_as_loop(node));
+         break;
 
-                default:
-                        unreachable("Unknown control flow");
-                }
-        }
+      default:
+         unreachable("Unknown control flow");
+      }
+   }
 
-        return start_block;
+   return start_block;
 }
 
 /* shader-db stuff */
 
 struct bi_stats {
-        unsigned nr_clauses, nr_tuples, nr_ins;
-        unsigned nr_arith, nr_texture, nr_varying, nr_ldst;
+   unsigned nr_clauses, nr_tuples, nr_ins;
+   unsigned nr_arith, nr_texture, nr_varying, nr_ldst;
 };
 
 static void
 bi_count_tuple_stats(bi_clause *clause, bi_tuple *tuple, struct bi_stats *stats)
 {
-        /* Count instructions */
-        stats->nr_ins += (tuple->fma ? 1 : 0) + (tuple->add ? 1 : 0);
+   /* Count instructions */
+   stats->nr_ins += (tuple->fma ? 1 : 0) + (tuple->add ? 1 : 0);
 
-        /* Non-message passing tuples are always arithmetic */
-        if (tuple->add != clause->message) {
-                stats->nr_arith++;
-                return;
-        }
+   /* Non-message passing tuples are always arithmetic */
+   if (tuple->add != clause->message) {
+      stats->nr_arith++;
+      return;
+   }
 
-        /* Message + FMA we'll count as arithmetic _and_ message */
-        if (tuple->fma)
-                stats->nr_arith++;
+   /* Message + FMA we'll count as arithmetic _and_ message */
+   if (tuple->fma)
+      stats->nr_arith++;
 
-        switch (clause->message_type) {
-        case BIFROST_MESSAGE_VARYING:
-                /* Check components interpolated */
-                stats->nr_varying += (clause->message->vecsize + 1) *
-                        (bi_is_regfmt_16(clause->message->register_format) ? 1 : 2);
-                break;
+   switch (clause->message_type) {
+   case BIFROST_MESSAGE_VARYING:
+      /* Check components interpolated */
+      stats->nr_varying +=
+         (clause->message->vecsize + 1) *
+         (bi_is_regfmt_16(clause->message->register_format) ? 1 : 2);
+      break;
 
-        case BIFROST_MESSAGE_VARTEX:
-                /* 2 coordinates, fp32 each */
-                stats->nr_varying += (2 * 2);
-                FALLTHROUGH;
-        case BIFROST_MESSAGE_TEX:
-                stats->nr_texture++;
-                break;
+   case BIFROST_MESSAGE_VARTEX:
+      /* 2 coordinates, fp32 each */
+      stats->nr_varying += (2 * 2);
+      FALLTHROUGH;
+   case BIFROST_MESSAGE_TEX:
+      stats->nr_texture++;
+      break;
 
-        case BIFROST_MESSAGE_ATTRIBUTE:
-        case BIFROST_MESSAGE_LOAD:
-        case BIFROST_MESSAGE_STORE:
-        case BIFROST_MESSAGE_ATOMIC:
-                stats->nr_ldst++;
-                break;
-
-        case BIFROST_MESSAGE_NONE:
-        case BIFROST_MESSAGE_BARRIER:
-        case BIFROST_MESSAGE_BLEND:
-        case BIFROST_MESSAGE_TILE:
-        case BIFROST_MESSAGE_Z_STENCIL:
-        case BIFROST_MESSAGE_ATEST:
-        case BIFROST_MESSAGE_JOB:
-        case BIFROST_MESSAGE_64BIT:
-                /* Nothing to do */
-                break;
-        };
+   case BIFROST_MESSAGE_ATTRIBUTE:
+   case BIFROST_MESSAGE_LOAD:
+   case BIFROST_MESSAGE_STORE:
+   case BIFROST_MESSAGE_ATOMIC:
+      stats->nr_ldst++;
+      break;
 
+   case BIFROST_MESSAGE_NONE:
+   case BIFROST_MESSAGE_BARRIER:
+   case BIFROST_MESSAGE_BLEND:
+   case BIFROST_MESSAGE_TILE:
+   case BIFROST_MESSAGE_Z_STENCIL:
+   case BIFROST_MESSAGE_ATEST:
+   case BIFROST_MESSAGE_JOB:
+   case BIFROST_MESSAGE_64BIT:
+      /* Nothing to do */
+      break;
+   };
 }
 
 /*
@@ -4238,151 +4206,150 @@ bi_count_tuple_stats(bi_clause *clause, bi_tuple *tuple, struct bi_stats *stats)
 static unsigned
 bi_count_preload_cost(bi_context *ctx)
 {
-        /* Units: 1/16 of a normalized cycle, assuming that we may interpolate
-         * 16 fp16 varying components per cycle or fetch two texels per cycle.
-         */
-        unsigned cost = 0;
+   /* Units: 1/16 of a normalized cycle, assuming that we may interpolate
+    * 16 fp16 varying components per cycle or fetch two texels per cycle.
+    */
+   unsigned cost = 0;
 
-        for (unsigned i = 0; i < ARRAY_SIZE(ctx->info.bifrost->messages); ++i) {
-                struct bifrost_message_preload msg = ctx->info.bifrost->messages[i];
+   for (unsigned i = 0; i < ARRAY_SIZE(ctx->info.bifrost->messages); ++i) {
+      struct bifrost_message_preload msg = ctx->info.bifrost->messages[i];
 
-                if (msg.enabled && msg.texture) {
-                        /* 2 coordinate, 2 half-words each, plus texture */
-                        cost += 12;
-                } else if (msg.enabled) {
-                        cost += (msg.num_components * (msg.fp16 ? 1 : 2));
-                }
-        }
+      if (msg.enabled && msg.texture) {
+         /* 2 coordinate, 2 half-words each, plus texture */
+         cost += 12;
+      } else if (msg.enabled) {
+         cost += (msg.num_components * (msg.fp16 ? 1 : 2));
+      }
+   }
 
-        return cost;
+   return cost;
 }
 
 static const char *
 bi_shader_stage_name(bi_context *ctx)
 {
-        if (ctx->idvs == BI_IDVS_VARYING)
-                return "MESA_SHADER_VARYING";
-        else if (ctx->idvs == BI_IDVS_POSITION)
-                return "MESA_SHADER_POSITION";
-        else if (ctx->inputs->is_blend)
-                return "MESA_SHADER_BLEND";
-        else
-                return gl_shader_stage_name(ctx->stage);
+   if (ctx->idvs == BI_IDVS_VARYING)
+      return "MESA_SHADER_VARYING";
+   else if (ctx->idvs == BI_IDVS_POSITION)
+      return "MESA_SHADER_POSITION";
+   else if (ctx->inputs->is_blend)
+      return "MESA_SHADER_BLEND";
+   else
+      return gl_shader_stage_name(ctx->stage);
 }
 
 static char *
 bi_print_stats(bi_context *ctx, unsigned size)
 {
-        struct bi_stats stats = { 0 };
+   struct bi_stats stats = {0};
 
-        /* Count instructions, clauses, and tuples. Also attempt to construct
-         * normalized execution engine cycle counts, using the following ratio:
-         *
-         * 24 arith tuples/cycle
-         * 2 texture messages/cycle
-         * 16 x 16-bit varying channels interpolated/cycle
-         * 1 load store message/cycle
-         *
-         * These numbers seem to match Arm Mobile Studio's heuristic. The real
-         * cycle counts are surely more complicated.
-         */
+   /* Count instructions, clauses, and tuples. Also attempt to construct
+    * normalized execution engine cycle counts, using the following ratio:
+    *
+    * 24 arith tuples/cycle
+    * 2 texture messages/cycle
+    * 16 x 16-bit varying channels interpolated/cycle
+    * 1 load store message/cycle
+    *
+    * These numbers seem to match Arm Mobile Studio's heuristic. The real
+    * cycle counts are surely more complicated.
+    */
 
-        bi_foreach_block(ctx, block) {
-                bi_foreach_clause_in_block(block, clause) {
-                        stats.nr_clauses++;
-                        stats.nr_tuples += clause->tuple_count;
+   bi_foreach_block(ctx, block) {
+      bi_foreach_clause_in_block(block, clause) {
+         stats.nr_clauses++;
+         stats.nr_tuples += clause->tuple_count;
 
-                        for (unsigned i = 0; i < clause->tuple_count; ++i)
-                                bi_count_tuple_stats(clause, &clause->tuples[i], &stats);
-                }
-        }
+         for (unsigned i = 0; i < clause->tuple_count; ++i)
+            bi_count_tuple_stats(clause, &clause->tuples[i], &stats);
+      }
+   }
 
-        float cycles_arith = ((float) stats.nr_arith) / 24.0;
-        float cycles_texture = ((float) stats.nr_texture) / 2.0;
-        float cycles_varying = ((float) stats.nr_varying) / 16.0;
-        float cycles_ldst = ((float) stats.nr_ldst) / 1.0;
+   float cycles_arith = ((float)stats.nr_arith) / 24.0;
+   float cycles_texture = ((float)stats.nr_texture) / 2.0;
+   float cycles_varying = ((float)stats.nr_varying) / 16.0;
+   float cycles_ldst = ((float)stats.nr_ldst) / 1.0;
 
-        float cycles_message = MAX3(cycles_texture, cycles_varying, cycles_ldst);
-        float cycles_bound = MAX2(cycles_arith, cycles_message);
+   float cycles_message = MAX3(cycles_texture, cycles_varying, cycles_ldst);
+   float cycles_bound = MAX2(cycles_arith, cycles_message);
 
-        /* Thread count and register pressure are traded off only on v7 */
-        bool full_threads = (ctx->arch == 7 && ctx->info.work_reg_count <= 32);
-        unsigned nr_threads = full_threads ? 2 : 1;
+   /* Thread count and register pressure are traded off only on v7 */
+   bool full_threads = (ctx->arch == 7 && ctx->info.work_reg_count <= 32);
+   unsigned nr_threads = full_threads ? 2 : 1;
 
-        /* Dump stats */
-        char *str = ralloc_asprintf(NULL, "%s shader: "
-                        "%u inst, %u tuples, %u clauses, "
-                        "%f cycles, %f arith, %f texture, %f vary, %f ldst, "
-                        "%u quadwords, %u threads",
-                        bi_shader_stage_name(ctx),
-                        stats.nr_ins, stats.nr_tuples, stats.nr_clauses,
-                        cycles_bound, cycles_arith, cycles_texture,
-                        cycles_varying, cycles_ldst,
-                        size / 16, nr_threads);
+   /* Dump stats */
+   char *str = ralloc_asprintf(
+      NULL,
+      "%s shader: "
+      "%u inst, %u tuples, %u clauses, "
+      "%f cycles, %f arith, %f texture, %f vary, %f ldst, "
+      "%u quadwords, %u threads",
+      bi_shader_stage_name(ctx), stats.nr_ins, stats.nr_tuples,
+      stats.nr_clauses, cycles_bound, cycles_arith, cycles_texture,
+      cycles_varying, cycles_ldst, size / 16, nr_threads);
 
-        if (ctx->arch == 7) {
-                ralloc_asprintf_append(&str, ", %u preloads", bi_count_preload_cost(ctx));
-        }
+   if (ctx->arch == 7) {
+      ralloc_asprintf_append(&str, ", %u preloads", bi_count_preload_cost(ctx));
+   }
 
-        ralloc_asprintf_append(&str, ", %u loops, %u:%u spills:fills",
-                        ctx->loop_count, ctx->spills, ctx->fills);
+   ralloc_asprintf_append(&str, ", %u loops, %u:%u spills:fills",
+                          ctx->loop_count, ctx->spills, ctx->fills);
 
-        return str;
+   return str;
 }
 
 static char *
 va_print_stats(bi_context *ctx, unsigned size)
 {
-        unsigned nr_ins = 0;
-        struct va_stats stats = { 0 };
+   unsigned nr_ins = 0;
+   struct va_stats stats = {0};
 
-        /* Count instructions */
-        bi_foreach_instr_global(ctx, I) {
-                nr_ins++;
-                va_count_instr_stats(I, &stats);
-        }
+   /* Count instructions */
+   bi_foreach_instr_global(ctx, I) {
+      nr_ins++;
+      va_count_instr_stats(I, &stats);
+   }
 
-        /* Mali G78 peak performance:
-         *
-         * 64 FMA instructions per cycle
-         * 64 CVT instructions per cycle
-         * 16 SFU instructions per cycle
-         * 8 x 32-bit varying channels interpolated per cycle
-         * 4 texture instructions per cycle
-         * 1 load/store operation per cycle
-         */
+   /* Mali G78 peak performance:
+    *
+    * 64 FMA instructions per cycle
+    * 64 CVT instructions per cycle
+    * 16 SFU instructions per cycle
+    * 8 x 32-bit varying channels interpolated per cycle
+    * 4 texture instructions per cycle
+    * 1 load/store operation per cycle
+    */
 
-        float cycles_fma = ((float) stats.fma) / 64.0;
-        float cycles_cvt = ((float) stats.cvt) / 64.0;
-        float cycles_sfu = ((float) stats.sfu) / 16.0;
-        float cycles_v = ((float) stats.v) / 16.0;
-        float cycles_t = ((float) stats.t) / 4.0;
-        float cycles_ls = ((float) stats.ls) / 1.0;
+   float cycles_fma = ((float)stats.fma) / 64.0;
+   float cycles_cvt = ((float)stats.cvt) / 64.0;
+   float cycles_sfu = ((float)stats.sfu) / 16.0;
+   float cycles_v = ((float)stats.v) / 16.0;
+   float cycles_t = ((float)stats.t) / 4.0;
+   float cycles_ls = ((float)stats.ls) / 1.0;
 
-        /* Calculate the bound */
-        float cycles = MAX2(
-                        MAX3(cycles_fma, cycles_cvt, cycles_sfu),
-                        MAX3(cycles_v,   cycles_t,   cycles_ls));
+   /* Calculate the bound */
+   float cycles = MAX2(MAX3(cycles_fma, cycles_cvt, cycles_sfu),
+                       MAX3(cycles_v, cycles_t, cycles_ls));
 
+   /* Thread count and register pressure are traded off */
+   unsigned nr_threads = (ctx->info.work_reg_count <= 32) ? 2 : 1;
 
-        /* Thread count and register pressure are traded off */
-        unsigned nr_threads = (ctx->info.work_reg_count <= 32) ? 2 : 1;
-
-        /* Dump stats */
-        return ralloc_asprintf(NULL, "%s shader: "
-                        "%u inst, %f cycles, %f fma, %f cvt, %f sfu, %f v, "
-                        "%f t, %f ls, %u quadwords, %u threads, %u loops, "
-                        "%u:%u spills:fills",
-                        bi_shader_stage_name(ctx),
-                        nr_ins, cycles, cycles_fma, cycles_cvt, cycles_sfu,
-                        cycles_v, cycles_t, cycles_ls, size / 16, nr_threads,
-                        ctx->loop_count, ctx->spills, ctx->fills);
+   /* Dump stats */
+   return ralloc_asprintf(NULL,
+                          "%s shader: "
+                          "%u inst, %f cycles, %f fma, %f cvt, %f sfu, %f v, "
+                          "%f t, %f ls, %u quadwords, %u threads, %u loops, "
+                          "%u:%u spills:fills",
+                          bi_shader_stage_name(ctx), nr_ins, cycles, cycles_fma,
+                          cycles_cvt, cycles_sfu, cycles_v, cycles_t, cycles_ls,
+                          size / 16, nr_threads, ctx->loop_count, ctx->spills,
+                          ctx->fills);
 }
 
 static int
 glsl_type_size(const struct glsl_type *type, bool bindless)
 {
-        return glsl_count_attribute_slots(type, false);
+   return glsl_count_attribute_slots(type, false);
 }
 
 /* Split stores to memory. We don't split stores to vertex outputs, since
@@ -4392,17 +4359,17 @@ glsl_type_size(const struct glsl_type *type, bool bindless)
 static bool
 should_split_wrmask(const nir_instr *instr, UNUSED const void *data)
 {
-        nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 
-        switch (intr->intrinsic) {
-        case nir_intrinsic_store_ssbo:
-        case nir_intrinsic_store_shared:
-        case nir_intrinsic_store_global:
-        case nir_intrinsic_store_scratch:
-                return true;
-        default:
-                return false;
-        }
+   switch (intr->intrinsic) {
+   case nir_intrinsic_store_ssbo:
+   case nir_intrinsic_store_shared:
+   case nir_intrinsic_store_global:
+   case nir_intrinsic_store_scratch:
+      return true;
+   default:
+      return false;
+   }
 }
 
 /*
@@ -4413,23 +4380,23 @@ should_split_wrmask(const nir_instr *instr, UNUSED const void *data)
 static unsigned
 bi_lower_bit_size(const nir_instr *instr, UNUSED void *data)
 {
-        if (instr->type != nir_instr_type_alu)
-                return 0;
+   if (instr->type != nir_instr_type_alu)
+      return 0;
 
-        nir_alu_instr *alu = nir_instr_as_alu(instr);
+   nir_alu_instr *alu = nir_instr_as_alu(instr);
 
-        switch (alu->op) {
-        case nir_op_fexp2:
-        case nir_op_flog2:
-        case nir_op_fpow:
-        case nir_op_fsin:
-        case nir_op_fcos:
-        case nir_op_bit_count:
-        case nir_op_bitfield_reverse:
-                return (nir_src_bit_size(alu->src[0].src) == 32) ? 0 : 32;
-        default:
-                return 0;
-        }
+   switch (alu->op) {
+   case nir_op_fexp2:
+   case nir_op_flog2:
+   case nir_op_fpow:
+   case nir_op_fsin:
+   case nir_op_fcos:
+   case nir_op_bit_count:
+   case nir_op_bitfield_reverse:
+      return (nir_src_bit_size(alu->src[0].src) == 32) ? 0 : 32;
+   default:
+      return 0;
+   }
 }
 
 /* Although Bifrost generally supports packed 16-bit vec2 and 8-bit vec4,
@@ -4440,64 +4407,64 @@ bi_lower_bit_size(const nir_instr *instr, UNUSED void *data)
 static uint8_t
 bi_vectorize_filter(const nir_instr *instr, const void *data)
 {
-        /* Defaults work for everything else */
-        if (instr->type != nir_instr_type_alu)
-                return 0;
+   /* Defaults work for everything else */
+   if (instr->type != nir_instr_type_alu)
+      return 0;
 
-        const nir_alu_instr *alu = nir_instr_as_alu(instr);
+   const nir_alu_instr *alu = nir_instr_as_alu(instr);
 
-        switch (alu->op) {
-        case nir_op_frcp:
-        case nir_op_frsq:
-        case nir_op_ishl:
-        case nir_op_ishr:
-        case nir_op_ushr:
-        case nir_op_f2i16:
-        case nir_op_f2u16:
-        case nir_op_extract_u8:
-        case nir_op_extract_i8:
-        case nir_op_extract_u16:
-        case nir_op_extract_i16:
-        case nir_op_insert_u16:
-                return 1;
-        default:
-                break;
-        }
+   switch (alu->op) {
+   case nir_op_frcp:
+   case nir_op_frsq:
+   case nir_op_ishl:
+   case nir_op_ishr:
+   case nir_op_ushr:
+   case nir_op_f2i16:
+   case nir_op_f2u16:
+   case nir_op_extract_u8:
+   case nir_op_extract_i8:
+   case nir_op_extract_u16:
+   case nir_op_extract_i16:
+   case nir_op_insert_u16:
+      return 1;
+   default:
+      break;
+   }
 
-        /* Vectorized instructions cannot write more than 32-bit */
-        int dst_bit_size = nir_dest_bit_size(alu->dest.dest);
-        if (dst_bit_size == 16)
-                return 2;
-        else
-                return 1;
+   /* Vectorized instructions cannot write more than 32-bit */
+   int dst_bit_size = nir_dest_bit_size(alu->dest.dest);
+   if (dst_bit_size == 16)
+      return 2;
+   else
+      return 1;
 }
 
 static bool
 bi_scalarize_filter(const nir_instr *instr, const void *data)
 {
-        if (instr->type != nir_instr_type_alu)
-                return false;
+   if (instr->type != nir_instr_type_alu)
+      return false;
 
-        const nir_alu_instr *alu = nir_instr_as_alu(instr);
+   const nir_alu_instr *alu = nir_instr_as_alu(instr);
 
-        switch (alu->op) {
-        case nir_op_pack_uvec2_to_uint:
-        case nir_op_pack_uvec4_to_uint:
-                return false;
-        default:
-                return true;
-        }
+   switch (alu->op) {
+   case nir_op_pack_uvec2_to_uint:
+   case nir_op_pack_uvec4_to_uint:
+      return false;
+   default:
+      return true;
+   }
 }
 
 /* Ensure we write exactly 4 components */
 static nir_ssa_def *
-bifrost_nir_valid_channel(nir_builder *b, nir_ssa_def *in,
-                          unsigned channel, unsigned first, unsigned mask)
+bifrost_nir_valid_channel(nir_builder *b, nir_ssa_def *in, unsigned channel,
+                          unsigned first, unsigned mask)
 {
-        if (!(mask & BITFIELD_BIT(channel)))
-                channel = first;
+   if (!(mask & BITFIELD_BIT(channel)))
+      channel = first;
 
-        return nir_channel(b, in, channel);
+   return nir_channel(b, in, channel);
 }
 
 /* Lower fragment store_output instructions to always write 4 components,
@@ -4506,202 +4473,197 @@ bifrost_nir_valid_channel(nir_builder *b, nir_ssa_def *in,
  * compiler. The DDK inserts these moves, so we will as well. */
 
 static bool
-bifrost_nir_lower_blend_components(struct nir_builder *b,
-                                   nir_instr *instr, void *data)
+bifrost_nir_lower_blend_components(struct nir_builder *b, nir_instr *instr,
+                                   void *data)
 {
-        if (instr->type != nir_instr_type_intrinsic)
-                return false;
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
 
-        nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 
-        if (intr->intrinsic != nir_intrinsic_store_output)
-                return false;
+   if (intr->intrinsic != nir_intrinsic_store_output)
+      return false;
 
-        nir_ssa_def *in = intr->src[0].ssa;
-        unsigned first = nir_intrinsic_component(intr);
-        unsigned mask = nir_intrinsic_write_mask(intr);
+   nir_ssa_def *in = intr->src[0].ssa;
+   unsigned first = nir_intrinsic_component(intr);
+   unsigned mask = nir_intrinsic_write_mask(intr);
 
-        assert(first == 0 && "shouldn't get nonzero components");
+   assert(first == 0 && "shouldn't get nonzero components");
 
-        /* Nothing to do */
-        if (mask == BITFIELD_MASK(4))
-                return false;
+   /* Nothing to do */
+   if (mask == BITFIELD_MASK(4))
+      return false;
 
-        b->cursor = nir_before_instr(&intr->instr);
+   b->cursor = nir_before_instr(&intr->instr);
 
-        /* Replicate the first valid component instead */
-        nir_ssa_def *replicated =
-                nir_vec4(b, bifrost_nir_valid_channel(b, in, 0, first, mask),
-                            bifrost_nir_valid_channel(b, in, 1, first, mask),
-                            bifrost_nir_valid_channel(b, in, 2, first, mask),
-                            bifrost_nir_valid_channel(b, in, 3, first, mask));
+   /* Replicate the first valid component instead */
+   nir_ssa_def *replicated =
+      nir_vec4(b, bifrost_nir_valid_channel(b, in, 0, first, mask),
+               bifrost_nir_valid_channel(b, in, 1, first, mask),
+               bifrost_nir_valid_channel(b, in, 2, first, mask),
+               bifrost_nir_valid_channel(b, in, 3, first, mask));
 
-        /* Rewrite to use our replicated version */
-        nir_instr_rewrite_src_ssa(instr, &intr->src[0], replicated);
-        nir_intrinsic_set_component(intr, 0);
-        nir_intrinsic_set_write_mask(intr, 0xF);
-        intr->num_components = 4;
+   /* Rewrite to use our replicated version */
+   nir_instr_rewrite_src_ssa(instr, &intr->src[0], replicated);
+   nir_intrinsic_set_component(intr, 0);
+   nir_intrinsic_set_write_mask(intr, 0xF);
+   intr->num_components = 4;
 
-        return true;
+   return true;
 }
 
 static void
 bi_optimize_nir(nir_shader *nir, unsigned gpu_id, bool is_blend)
 {
-        bool progress;
-        unsigned lower_flrp = 16 | 32 | 64;
+   bool progress;
+   unsigned lower_flrp = 16 | 32 | 64;
 
-        NIR_PASS(progress, nir, nir_lower_regs_to_ssa);
+   NIR_PASS(progress, nir, nir_lower_regs_to_ssa);
 
-        nir_lower_tex_options lower_tex_options = {
-                .lower_txs_lod = true,
-                .lower_txp = ~0,
-                .lower_tg4_broadcom_swizzle = true,
-                .lower_txd = true,
-                .lower_invalid_implicit_lod = true,
-        };
+   nir_lower_tex_options lower_tex_options = {
+      .lower_txs_lod = true,
+      .lower_txp = ~0,
+      .lower_tg4_broadcom_swizzle = true,
+      .lower_txd = true,
+      .lower_invalid_implicit_lod = true,
+   };
 
-        NIR_PASS(progress, nir, pan_nir_lower_64bit_intrin);
-        NIR_PASS(progress, nir, pan_lower_helper_invocation);
+   NIR_PASS(progress, nir, pan_nir_lower_64bit_intrin);
+   NIR_PASS(progress, nir, pan_lower_helper_invocation);
 
-        NIR_PASS(progress, nir, nir_lower_int64);
+   NIR_PASS(progress, nir, nir_lower_int64);
 
-        nir_lower_idiv_options idiv_options = {
-                .allow_fp16 = true,
-        };
-        NIR_PASS(progress, nir, nir_opt_idiv_const, 8);
-        NIR_PASS(progress, nir, nir_lower_idiv, &idiv_options);
+   nir_lower_idiv_options idiv_options = {
+      .allow_fp16 = true,
+   };
+   NIR_PASS(progress, nir, nir_opt_idiv_const, 8);
+   NIR_PASS(progress, nir, nir_lower_idiv, &idiv_options);
 
-        NIR_PASS(progress, nir, nir_lower_tex, &lower_tex_options);
-        NIR_PASS(progress, nir, nir_lower_alu_to_scalar, bi_scalarize_filter, NULL);
-        NIR_PASS(progress, nir, nir_lower_load_const_to_scalar);
-        NIR_PASS(progress, nir, nir_lower_phis_to_scalar, true);
+   NIR_PASS(progress, nir, nir_lower_tex, &lower_tex_options);
+   NIR_PASS(progress, nir, nir_lower_alu_to_scalar, bi_scalarize_filter, NULL);
+   NIR_PASS(progress, nir, nir_lower_load_const_to_scalar);
+   NIR_PASS(progress, nir, nir_lower_phis_to_scalar, true);
 
-        do {
-                progress = false;
+   do {
+      progress = false;
 
-                NIR_PASS(progress, nir, nir_lower_var_copies);
-                NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
-                NIR_PASS(progress, nir, nir_lower_wrmasks, should_split_wrmask, NULL);
+      NIR_PASS(progress, nir, nir_lower_var_copies);
+      NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
+      NIR_PASS(progress, nir, nir_lower_wrmasks, should_split_wrmask, NULL);
 
-                NIR_PASS(progress, nir, nir_copy_prop);
-                NIR_PASS(progress, nir, nir_opt_remove_phis);
-                NIR_PASS(progress, nir, nir_opt_dce);
-                NIR_PASS(progress, nir, nir_opt_dead_cf);
-                NIR_PASS(progress, nir, nir_opt_cse);
-                NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
-                NIR_PASS(progress, nir, nir_opt_algebraic);
-                NIR_PASS(progress, nir, nir_opt_constant_folding);
+      NIR_PASS(progress, nir, nir_copy_prop);
+      NIR_PASS(progress, nir, nir_opt_remove_phis);
+      NIR_PASS(progress, nir, nir_opt_dce);
+      NIR_PASS(progress, nir, nir_opt_dead_cf);
+      NIR_PASS(progress, nir, nir_opt_cse);
+      NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
+      NIR_PASS(progress, nir, nir_opt_algebraic);
+      NIR_PASS(progress, nir, nir_opt_constant_folding);
 
-                NIR_PASS(progress, nir, nir_lower_alu);
+      NIR_PASS(progress, nir, nir_lower_alu);
 
-                if (lower_flrp != 0) {
-                        bool lower_flrp_progress = false;
-                        NIR_PASS(lower_flrp_progress,
-                                 nir,
-                                 nir_lower_flrp,
-                                 lower_flrp,
-                                 false /* always_precise */);
-                        if (lower_flrp_progress) {
-                                NIR_PASS(progress, nir,
-                                         nir_opt_constant_folding);
-                                progress = true;
-                        }
+      if (lower_flrp != 0) {
+         bool lower_flrp_progress = false;
+         NIR_PASS(lower_flrp_progress, nir, nir_lower_flrp, lower_flrp,
+                  false /* always_precise */);
+         if (lower_flrp_progress) {
+            NIR_PASS(progress, nir, nir_opt_constant_folding);
+            progress = true;
+         }
 
-                        /* Nothing should rematerialize any flrps, so we only
-                         * need to do this lowering once.
-                         */
-                        lower_flrp = 0;
-                }
+         /* Nothing should rematerialize any flrps, so we only
+          * need to do this lowering once.
+          */
+         lower_flrp = 0;
+      }
 
-                NIR_PASS(progress, nir, nir_opt_undef);
-                NIR_PASS(progress, nir, nir_lower_undef_to_zero);
+      NIR_PASS(progress, nir, nir_opt_undef);
+      NIR_PASS(progress, nir, nir_lower_undef_to_zero);
 
-                NIR_PASS(progress, nir, nir_opt_shrink_vectors);
-                NIR_PASS(progress, nir, nir_opt_loop_unroll);
-        } while (progress);
+      NIR_PASS(progress, nir, nir_opt_shrink_vectors);
+      NIR_PASS(progress, nir, nir_opt_loop_unroll);
+   } while (progress);
 
-        /* TODO: Why is 64-bit getting rematerialized?
-         * KHR-GLES31.core.shader_image_load_store.basic-allTargets-atomicFS */
-        NIR_PASS(progress, nir, nir_lower_int64);
+   /* TODO: Why is 64-bit getting rematerialized?
+    * KHR-GLES31.core.shader_image_load_store.basic-allTargets-atomicFS */
+   NIR_PASS(progress, nir, nir_lower_int64);
 
-        /* We need to cleanup after each iteration of late algebraic
-         * optimizations, since otherwise NIR can produce weird edge cases
-         * (like fneg of a constant) which we don't handle */
-        bool late_algebraic = true;
-        while (late_algebraic) {
-                late_algebraic = false;
-                NIR_PASS(late_algebraic, nir, nir_opt_algebraic_late);
-                NIR_PASS(progress, nir, nir_opt_constant_folding);
-                NIR_PASS(progress, nir, nir_copy_prop);
-                NIR_PASS(progress, nir, nir_opt_dce);
-                NIR_PASS(progress, nir, nir_opt_cse);
-        }
+   /* We need to cleanup after each iteration of late algebraic
+    * optimizations, since otherwise NIR can produce weird edge cases
+    * (like fneg of a constant) which we don't handle */
+   bool late_algebraic = true;
+   while (late_algebraic) {
+      late_algebraic = false;
+      NIR_PASS(late_algebraic, nir, nir_opt_algebraic_late);
+      NIR_PASS(progress, nir, nir_opt_constant_folding);
+      NIR_PASS(progress, nir, nir_copy_prop);
+      NIR_PASS(progress, nir, nir_opt_dce);
+      NIR_PASS(progress, nir, nir_opt_cse);
+   }
 
-        /* This opt currently helps on Bifrost but not Valhall */
-        if (gpu_id < 0x9000)
-                NIR_PASS(progress, nir, bifrost_nir_opt_boolean_bitwise);
+   /* This opt currently helps on Bifrost but not Valhall */
+   if (gpu_id < 0x9000)
+      NIR_PASS(progress, nir, bifrost_nir_opt_boolean_bitwise);
 
-        NIR_PASS(progress, nir, nir_lower_alu_to_scalar, bi_scalarize_filter, NULL);
-        NIR_PASS(progress, nir, nir_opt_vectorize, bi_vectorize_filter, NULL);
-        NIR_PASS(progress, nir, nir_lower_bool_to_bitsize);
+   NIR_PASS(progress, nir, nir_lower_alu_to_scalar, bi_scalarize_filter, NULL);
+   NIR_PASS(progress, nir, nir_opt_vectorize, bi_vectorize_filter, NULL);
+   NIR_PASS(progress, nir, nir_lower_bool_to_bitsize);
 
-        /* Prepass to simplify instruction selection */
-        late_algebraic = false;
-        NIR_PASS(late_algebraic, nir, bifrost_nir_lower_algebraic_late);
+   /* Prepass to simplify instruction selection */
+   late_algebraic = false;
+   NIR_PASS(late_algebraic, nir, bifrost_nir_lower_algebraic_late);
 
-        while (late_algebraic) {
-                late_algebraic = false;
-                NIR_PASS(late_algebraic, nir, nir_opt_algebraic_late);
-                NIR_PASS(progress, nir, nir_opt_constant_folding);
-                NIR_PASS(progress, nir, nir_copy_prop);
-                NIR_PASS(progress, nir, nir_opt_dce);
-                NIR_PASS(progress, nir, nir_opt_cse);
-        }
+   while (late_algebraic) {
+      late_algebraic = false;
+      NIR_PASS(late_algebraic, nir, nir_opt_algebraic_late);
+      NIR_PASS(progress, nir, nir_opt_constant_folding);
+      NIR_PASS(progress, nir, nir_copy_prop);
+      NIR_PASS(progress, nir, nir_opt_dce);
+      NIR_PASS(progress, nir, nir_opt_cse);
+   }
 
-        NIR_PASS(progress, nir, nir_lower_load_const_to_scalar);
-        NIR_PASS(progress, nir, nir_opt_dce);
+   NIR_PASS(progress, nir, nir_lower_load_const_to_scalar);
+   NIR_PASS(progress, nir, nir_opt_dce);
 
-        if (nir->info.stage == MESA_SHADER_FRAGMENT) {
-                NIR_PASS_V(nir, nir_shader_instructions_pass,
-                           bifrost_nir_lower_blend_components,
-                           nir_metadata_block_index | nir_metadata_dominance,
-                           NULL);
-        }
+   if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+      NIR_PASS_V(nir, nir_shader_instructions_pass,
+                 bifrost_nir_lower_blend_components,
+                 nir_metadata_block_index | nir_metadata_dominance, NULL);
+   }
 
-        /* Backend scheduler is purely local, so do some global optimizations
-         * to reduce register pressure. */
-        nir_move_options move_all =
-                nir_move_const_undef | nir_move_load_ubo | nir_move_load_input |
-                nir_move_comparisons | nir_move_copies | nir_move_load_ssbo;
+   /* Backend scheduler is purely local, so do some global optimizations
+    * to reduce register pressure. */
+   nir_move_options move_all = nir_move_const_undef | nir_move_load_ubo |
+                               nir_move_load_input | nir_move_comparisons |
+                               nir_move_copies | nir_move_load_ssbo;
 
-        NIR_PASS_V(nir, nir_opt_sink, move_all);
-        NIR_PASS_V(nir, nir_opt_move, move_all);
+   NIR_PASS_V(nir, nir_opt_sink, move_all);
+   NIR_PASS_V(nir, nir_opt_move, move_all);
 
-        /* We might lower attribute, varying, and image indirects. Use the
-         * gathered info to skip the extra analysis in the happy path. */
-        bool any_indirects =
-                nir->info.inputs_read_indirectly ||
-                nir->info.outputs_accessed_indirectly ||
-                nir->info.patch_inputs_read_indirectly ||
-                nir->info.patch_outputs_accessed_indirectly ||
-                nir->info.images_used[0];
+   /* We might lower attribute, varying, and image indirects. Use the
+    * gathered info to skip the extra analysis in the happy path. */
+   bool any_indirects = nir->info.inputs_read_indirectly ||
+                        nir->info.outputs_accessed_indirectly ||
+                        nir->info.patch_inputs_read_indirectly ||
+                        nir->info.patch_outputs_accessed_indirectly ||
+                        nir->info.images_used[0];
 
-        if (any_indirects) {
-                nir_convert_to_lcssa(nir, true, true);
-                NIR_PASS_V(nir, nir_divergence_analysis);
-                NIR_PASS_V(nir, bi_lower_divergent_indirects,
-                                pan_subgroup_size(gpu_id >> 12));
-        }
+   if (any_indirects) {
+      nir_convert_to_lcssa(nir, true, true);
+      NIR_PASS_V(nir, nir_divergence_analysis);
+      NIR_PASS_V(nir, bi_lower_divergent_indirects,
+                 pan_subgroup_size(gpu_id >> 12));
+   }
 }
 
 static void
 bi_opt_post_ra(bi_context *ctx)
 {
-        bi_foreach_instr_global_safe(ctx, ins) {
-                if (ins->op == BI_OPCODE_MOV_I32 && bi_is_equiv(ins->dest[0], ins->src[0]))
-                        bi_remove_instruction(ins);
-        }
+   bi_foreach_instr_global_safe(ctx, ins) {
+      if (ins->op == BI_OPCODE_MOV_I32 &&
+          bi_is_equiv(ins->dest[0], ins->src[0]))
+         bi_remove_instruction(ins);
+   }
 }
 
 /* Dead code elimination for branches at the end of a block - only one branch
@@ -4714,52 +4676,53 @@ bi_opt_post_ra(bi_context *ctx)
 static void
 bi_lower_branch(bi_context *ctx, bi_block *block)
 {
-        bool cull_terminal = (ctx->arch <= 8);
-        bool branched = false;
+   bool cull_terminal = (ctx->arch <= 8);
+   bool branched = false;
 
-        bi_foreach_instr_in_block_safe(block, ins) {
-                if (!ins->branch_target) continue;
+   bi_foreach_instr_in_block_safe(block, ins) {
+      if (!ins->branch_target)
+         continue;
 
-                if (branched) {
-                        bi_remove_instruction(ins);
-                        continue;
-                }
+      if (branched) {
+         bi_remove_instruction(ins);
+         continue;
+      }
 
-                branched = true;
+      branched = true;
 
-                if (!bi_is_terminal_block(ins->branch_target))
-                        continue;
+      if (!bi_is_terminal_block(ins->branch_target))
+         continue;
 
-                if (cull_terminal)
-                        ins->branch_target = NULL;
-                else if (ins->branch_target)
-                        ins->branch_target->needs_nop = true;
-        }
+      if (cull_terminal)
+         ins->branch_target = NULL;
+      else if (ins->branch_target)
+         ins->branch_target->needs_nop = true;
+   }
 }
 
 static void
 bi_pack_clauses(bi_context *ctx, struct util_dynarray *binary, unsigned offset)
 {
-        unsigned final_clause = bi_pack(ctx, binary);
+   unsigned final_clause = bi_pack(ctx, binary);
 
-        /* If we need to wait for ATEST or BLEND in the first clause, pass the
-         * corresponding bits through to the renderer state descriptor */
-        bi_block *first_block = list_first_entry(&ctx->blocks, bi_block, link);
-        bi_clause *first_clause = bi_next_clause(ctx, first_block, NULL);
+   /* If we need to wait for ATEST or BLEND in the first clause, pass the
+    * corresponding bits through to the renderer state descriptor */
+   bi_block *first_block = list_first_entry(&ctx->blocks, bi_block, link);
+   bi_clause *first_clause = bi_next_clause(ctx, first_block, NULL);
 
-        unsigned first_deps = first_clause ? first_clause->dependencies : 0;
-        ctx->info.bifrost->wait_6 = (first_deps & (1 << 6));
-        ctx->info.bifrost->wait_7 = (first_deps & (1 << 7));
+   unsigned first_deps = first_clause ? first_clause->dependencies : 0;
+   ctx->info.bifrost->wait_6 = (first_deps & (1 << 6));
+   ctx->info.bifrost->wait_7 = (first_deps & (1 << 7));
 
-        /* Pad the shader with enough zero bytes to trick the prefetcher,
-         * unless we're compiling an empty shader (in which case we don't pad
-         * so the size remains 0) */
-        unsigned prefetch_size = BIFROST_SHADER_PREFETCH - final_clause;
+   /* Pad the shader with enough zero bytes to trick the prefetcher,
+    * unless we're compiling an empty shader (in which case we don't pad
+    * so the size remains 0) */
+   unsigned prefetch_size = BIFROST_SHADER_PREFETCH - final_clause;
 
-        if (binary->size - offset) {
-                memset(util_dynarray_grow(binary, uint8_t, prefetch_size),
-                       0, prefetch_size);
-        }
+   if (binary->size - offset) {
+      memset(util_dynarray_grow(binary, uint8_t, prefetch_size), 0,
+             prefetch_size);
+   }
 }
 
 /*
@@ -4780,133 +4743,132 @@ bi_pack_clauses(bi_context *ctx, struct util_dynarray *binary, unsigned offset)
 static bool
 bi_gather_texcoords(nir_builder *b, nir_instr *instr, void *data)
 {
-        uint64_t *mask = data;
+   uint64_t *mask = data;
 
-        if (instr->type != nir_instr_type_tex)
-                return false;
+   if (instr->type != nir_instr_type_tex)
+      return false;
 
-        nir_tex_instr *tex = nir_instr_as_tex(instr);
+   nir_tex_instr *tex = nir_instr_as_tex(instr);
 
-        int coord_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
-        if (coord_idx < 0)
-                return false;
+   int coord_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
+   if (coord_idx < 0)
+      return false;
 
-        nir_src src = tex->src[coord_idx].src;
-        nir_ssa_scalar x = nir_ssa_scalar_resolved(src.ssa, 0);
-        nir_ssa_scalar y = nir_ssa_scalar_resolved(src.ssa, 1);
+   nir_src src = tex->src[coord_idx].src;
+   nir_ssa_scalar x = nir_ssa_scalar_resolved(src.ssa, 0);
+   nir_ssa_scalar y = nir_ssa_scalar_resolved(src.ssa, 1);
 
-        if (x.def != y.def)
-                return false;
+   if (x.def != y.def)
+      return false;
 
-        nir_instr *parent = x.def->parent_instr;
+   nir_instr *parent = x.def->parent_instr;
 
-        if (parent->type != nir_instr_type_intrinsic)
-                return false;
+   if (parent->type != nir_instr_type_intrinsic)
+      return false;
 
-        nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
 
-        if (intr->intrinsic != nir_intrinsic_load_interpolated_input)
-                return false;
+   if (intr->intrinsic != nir_intrinsic_load_interpolated_input)
+      return false;
 
-        nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
-        *mask |= BITFIELD64_BIT(sem.location);
-        return false;
+   nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
+   *mask |= BITFIELD64_BIT(sem.location);
+   return false;
 }
 
 static uint64_t
 bi_fp32_varying_mask(nir_shader *nir)
 {
-        uint64_t mask = 0;
+   uint64_t mask = 0;
 
-        assert(nir->info.stage == MESA_SHADER_FRAGMENT);
+   assert(nir->info.stage == MESA_SHADER_FRAGMENT);
 
-        nir_foreach_shader_in_variable(var, nir) {
-                if (var->data.interpolation == INTERP_MODE_FLAT)
-                        mask |= BITFIELD64_BIT(var->data.location);
-        }
+   nir_foreach_shader_in_variable(var, nir) {
+      if (var->data.interpolation == INTERP_MODE_FLAT)
+         mask |= BITFIELD64_BIT(var->data.location);
+   }
 
-        nir_shader_instructions_pass(nir, bi_gather_texcoords, nir_metadata_all, &mask);
+   nir_shader_instructions_pass(nir, bi_gather_texcoords, nir_metadata_all,
+                                &mask);
 
-        return mask;
+   return mask;
 }
 
 static void
 bi_finalize_nir(nir_shader *nir, unsigned gpu_id, bool is_blend)
 {
-        /* Lower gl_Position pre-optimisation, but after lowering vars to ssa
-         * (so we don't accidentally duplicate the epilogue since mesa/st has
-         * messed with our I/O quite a bit already) */
+   /* Lower gl_Position pre-optimisation, but after lowering vars to ssa
+    * (so we don't accidentally duplicate the epilogue since mesa/st has
+    * messed with our I/O quite a bit already) */
 
-        NIR_PASS_V(nir, nir_lower_vars_to_ssa);
+   NIR_PASS_V(nir, nir_lower_vars_to_ssa);
 
-        if (nir->info.stage == MESA_SHADER_VERTEX) {
-                NIR_PASS_V(nir, nir_lower_viewport_transform);
-                NIR_PASS_V(nir, nir_lower_point_size, 1.0, 0.0);
+   if (nir->info.stage == MESA_SHADER_VERTEX) {
+      NIR_PASS_V(nir, nir_lower_viewport_transform);
+      NIR_PASS_V(nir, nir_lower_point_size, 1.0, 0.0);
 
-                nir_variable *psiz = nir_find_variable_with_location(nir,
-                                                                     nir_var_shader_out,
-                                                                     VARYING_SLOT_PSIZ);
-                if (psiz != NULL)
-                        psiz->data.precision = GLSL_PRECISION_MEDIUM;
-        }
+      nir_variable *psiz = nir_find_variable_with_location(
+         nir, nir_var_shader_out, VARYING_SLOT_PSIZ);
+      if (psiz != NULL)
+         psiz->data.precision = GLSL_PRECISION_MEDIUM;
+   }
 
-        /* Get rid of any global vars before we lower to scratch. */
-        NIR_PASS_V(nir, nir_lower_global_vars_to_local);
+   /* Get rid of any global vars before we lower to scratch. */
+   NIR_PASS_V(nir, nir_lower_global_vars_to_local);
 
-        /* Valhall introduces packed thread local storage, which improves cache
-         * locality of TLS access. However, access to packed TLS cannot
-         * straddle 16-byte boundaries. As such, when packed TLS is in use
-         * (currently unconditional for Valhall), we force vec4 alignment for
-         * scratch access.
-         */
-        bool packed_tls = (gpu_id >= 0x9000);
+   /* Valhall introduces packed thread local storage, which improves cache
+    * locality of TLS access. However, access to packed TLS cannot
+    * straddle 16-byte boundaries. As such, when packed TLS is in use
+    * (currently unconditional for Valhall), we force vec4 alignment for
+    * scratch access.
+    */
+   bool packed_tls = (gpu_id >= 0x9000);
 
-        /* Lower large arrays to scratch and small arrays to bcsel */
-        NIR_PASS_V(nir, nir_lower_vars_to_scratch, nir_var_function_temp, 256,
-                        packed_tls ?
-                        glsl_get_vec4_size_align_bytes :
-                        glsl_get_natural_size_align_bytes);
-        NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_function_temp, ~0);
+   /* Lower large arrays to scratch and small arrays to bcsel */
+   NIR_PASS_V(nir, nir_lower_vars_to_scratch, nir_var_function_temp, 256,
+              packed_tls ? glsl_get_vec4_size_align_bytes
+                         : glsl_get_natural_size_align_bytes);
+   NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_function_temp, ~0);
 
-        NIR_PASS_V(nir, nir_split_var_copies);
-        NIR_PASS_V(nir, nir_lower_var_copies);
-        NIR_PASS_V(nir, nir_lower_vars_to_ssa);
-        NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
-                        glsl_type_size, 0);
+   NIR_PASS_V(nir, nir_split_var_copies);
+   NIR_PASS_V(nir, nir_lower_var_copies);
+   NIR_PASS_V(nir, nir_lower_vars_to_ssa);
+   NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
+              glsl_type_size, 0);
 
-        /* nir_lower[_explicit]_io is lazy and emits mul+add chains even for
-         * offsets it could figure out are constant.  Do some constant folding
-         * before bifrost_nir_lower_store_component below.
-         */
-        NIR_PASS_V(nir, nir_opt_constant_folding);
+   /* nir_lower[_explicit]_io is lazy and emits mul+add chains even for
+    * offsets it could figure out are constant.  Do some constant folding
+    * before bifrost_nir_lower_store_component below.
+    */
+   NIR_PASS_V(nir, nir_opt_constant_folding);
 
-        if (nir->info.stage == MESA_SHADER_FRAGMENT) {
-                NIR_PASS_V(nir, nir_lower_mediump_io,
-                           nir_var_shader_in | nir_var_shader_out,
-                           ~bi_fp32_varying_mask(nir), false);
-        } else if (nir->info.stage == MESA_SHADER_VERTEX) {
-                if (gpu_id >= 0x9000) {
-                        NIR_PASS_V(nir, nir_lower_mediump_io, nir_var_shader_out,
-                                        BITFIELD64_BIT(VARYING_SLOT_PSIZ), false);
-                }
+   if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+      NIR_PASS_V(nir, nir_lower_mediump_io,
+                 nir_var_shader_in | nir_var_shader_out,
+                 ~bi_fp32_varying_mask(nir), false);
+   } else if (nir->info.stage == MESA_SHADER_VERTEX) {
+      if (gpu_id >= 0x9000) {
+         NIR_PASS_V(nir, nir_lower_mediump_io, nir_var_shader_out,
+                    BITFIELD64_BIT(VARYING_SLOT_PSIZ), false);
+      }
 
-                NIR_PASS_V(nir, pan_nir_lower_store_component);
-        }
+      NIR_PASS_V(nir, pan_nir_lower_store_component);
+   }
 
-        NIR_PASS_V(nir, nir_lower_ssbo);
-        NIR_PASS_V(nir, pan_nir_lower_zs_store);
-        NIR_PASS_V(nir, pan_lower_sample_pos);
-        NIR_PASS_V(nir, nir_lower_bit_size, bi_lower_bit_size, NULL);
-        NIR_PASS_V(nir, nir_lower_64bit_phis);
+   NIR_PASS_V(nir, nir_lower_ssbo);
+   NIR_PASS_V(nir, pan_nir_lower_zs_store);
+   NIR_PASS_V(nir, pan_lower_sample_pos);
+   NIR_PASS_V(nir, nir_lower_bit_size, bi_lower_bit_size, NULL);
+   NIR_PASS_V(nir, nir_lower_64bit_phis);
 
-        if (nir->xfb_info != NULL && nir->info.has_transform_feedback_varyings) {
-                NIR_PASS_V(nir, nir_io_add_const_offset_to_base,
-                           nir_var_shader_in | nir_var_shader_out);
-                NIR_PASS_V(nir, nir_io_add_intrinsic_xfb_info);
-                NIR_PASS_V(nir, pan_lower_xfb);
-        }
+   if (nir->xfb_info != NULL && nir->info.has_transform_feedback_varyings) {
+      NIR_PASS_V(nir, nir_io_add_const_offset_to_base,
+                 nir_var_shader_in | nir_var_shader_out);
+      NIR_PASS_V(nir, nir_io_add_intrinsic_xfb_info);
+      NIR_PASS_V(nir, pan_lower_xfb);
+   }
 
-        bi_optimize_nir(nir, gpu_id, is_blend);
+   bi_optimize_nir(nir, gpu_id, is_blend);
 }
 
 static bi_context *
@@ -4914,271 +4876,266 @@ bi_compile_variant_nir(nir_shader *nir,
                        const struct panfrost_compile_inputs *inputs,
                        struct util_dynarray *binary,
                        struct hash_table_u64 *sysval_to_id,
-                       struct bi_shader_info info,
-                       enum bi_idvs_mode idvs)
+                       struct bi_shader_info info, enum bi_idvs_mode idvs)
 {
-        bi_context *ctx = rzalloc(NULL, bi_context);
+   bi_context *ctx = rzalloc(NULL, bi_context);
 
-        /* There may be another program in the dynarray, start at the end */
-        unsigned offset = binary->size;
+   /* There may be another program in the dynarray, start at the end */
+   unsigned offset = binary->size;
 
-        ctx->sysval_to_id = sysval_to_id;
-        ctx->inputs = inputs;
-        ctx->nir = nir;
-        ctx->stage = nir->info.stage;
-        ctx->quirks = bifrost_get_quirks(inputs->gpu_id);
-        ctx->arch = inputs->gpu_id >> 12;
-        ctx->info = info;
-        ctx->idvs = idvs;
-        ctx->malloc_idvs = (ctx->arch >= 9) && !inputs->no_idvs;
+   ctx->sysval_to_id = sysval_to_id;
+   ctx->inputs = inputs;
+   ctx->nir = nir;
+   ctx->stage = nir->info.stage;
+   ctx->quirks = bifrost_get_quirks(inputs->gpu_id);
+   ctx->arch = inputs->gpu_id >> 12;
+   ctx->info = info;
+   ctx->idvs = idvs;
+   ctx->malloc_idvs = (ctx->arch >= 9) && !inputs->no_idvs;
 
-        if (idvs != BI_IDVS_NONE) {
-                /* Specializing shaders for IDVS is destructive, so we need to
-                 * clone. However, the last (second) IDVS shader does not need
-                 * to be preserved so we can skip cloning that one.
-                 */
-                if (offset == 0)
-                        ctx->nir = nir = nir_shader_clone(ctx, nir);
+   if (idvs != BI_IDVS_NONE) {
+      /* Specializing shaders for IDVS is destructive, so we need to
+       * clone. However, the last (second) IDVS shader does not need
+       * to be preserved so we can skip cloning that one.
+       */
+      if (offset == 0)
+         ctx->nir = nir = nir_shader_clone(ctx, nir);
 
-                NIR_PASS_V(nir, nir_shader_instructions_pass,
-                           bifrost_nir_specialize_idvs,
-                           nir_metadata_block_index | nir_metadata_dominance,
-                           &idvs);
+      NIR_PASS_V(nir, nir_shader_instructions_pass, bifrost_nir_specialize_idvs,
+                 nir_metadata_block_index | nir_metadata_dominance, &idvs);
 
-                /* After specializing, clean up the mess */
-                bool progress = true;
+      /* After specializing, clean up the mess */
+      bool progress = true;
 
-                while (progress) {
-                        progress = false;
+      while (progress) {
+         progress = false;
 
-                        NIR_PASS(progress, nir, nir_opt_dce);
-                        NIR_PASS(progress, nir, nir_opt_dead_cf);
-                }
-        }
+         NIR_PASS(progress, nir, nir_opt_dce);
+         NIR_PASS(progress, nir, nir_opt_dead_cf);
+      }
+   }
 
-        /* If nothing is pushed, all UBOs need to be uploaded */
-        ctx->ubo_mask = ~0;
+   /* If nothing is pushed, all UBOs need to be uploaded */
+   ctx->ubo_mask = ~0;
 
-        list_inithead(&ctx->blocks);
+   list_inithead(&ctx->blocks);
 
-        bool skip_internal = nir->info.internal;
-        skip_internal &= !(bifrost_debug & BIFROST_DBG_INTERNAL);
+   bool skip_internal = nir->info.internal;
+   skip_internal &= !(bifrost_debug & BIFROST_DBG_INTERNAL);
 
-        if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal) {
-                nir_print_shader(nir, stdout);
-        }
+   if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal) {
+      nir_print_shader(nir, stdout);
+   }
 
-        ctx->allocated_vec = _mesa_hash_table_u64_create(ctx);
+   ctx->allocated_vec = _mesa_hash_table_u64_create(ctx);
 
-        nir_foreach_function(func, nir) {
-                if (!func->impl)
-                        continue;
+   nir_foreach_function(func, nir) {
+      if (!func->impl)
+         continue;
 
-                nir_index_blocks(func->impl);
+      nir_index_blocks(func->impl);
 
-                ctx->indexed_nir_blocks =
-                        rzalloc_array(ctx, bi_block *, func->impl->num_blocks);
+      ctx->indexed_nir_blocks =
+         rzalloc_array(ctx, bi_block *, func->impl->num_blocks);
 
-                ctx->ssa_alloc += func->impl->ssa_alloc;
-                ctx->reg_alloc += func->impl->reg_alloc;
+      ctx->ssa_alloc += func->impl->ssa_alloc;
+      ctx->reg_alloc += func->impl->reg_alloc;
 
-                emit_cf_list(ctx, &func->impl->body);
-                bi_emit_phis_deferred(ctx);
-                break; /* TODO: Multi-function shaders */
-        }
+      emit_cf_list(ctx, &func->impl->body);
+      bi_emit_phis_deferred(ctx);
+      break; /* TODO: Multi-function shaders */
+   }
 
-        /* Index blocks now that we're done emitting */
-        bi_foreach_block(ctx, block) {
-                block->index = ctx->num_blocks++;
-        }
+   /* Index blocks now that we're done emitting */
+   bi_foreach_block(ctx, block) {
+      block->index = ctx->num_blocks++;
+   }
 
-        bi_validate(ctx, "NIR -> BIR");
+   bi_validate(ctx, "NIR -> BIR");
 
-        /* If the shader doesn't write any colour or depth outputs, it may
-         * still need an ATEST at the very end! */
-        bool need_dummy_atest =
-                (ctx->stage == MESA_SHADER_FRAGMENT) &&
-                !ctx->emitted_atest &&
-                !bi_skip_atest(ctx, false);
+   /* If the shader doesn't write any colour or depth outputs, it may
+    * still need an ATEST at the very end! */
+   bool need_dummy_atest = (ctx->stage == MESA_SHADER_FRAGMENT) &&
+                           !ctx->emitted_atest && !bi_skip_atest(ctx, false);
 
-        if (need_dummy_atest) {
-                bi_block *end = list_last_entry(&ctx->blocks, bi_block, link);
-                bi_builder b = bi_init_builder(ctx, bi_after_block(end));
-                bi_emit_atest(&b, bi_zero());
-        }
+   if (need_dummy_atest) {
+      bi_block *end = list_last_entry(&ctx->blocks, bi_block, link);
+      bi_builder b = bi_init_builder(ctx, bi_after_block(end));
+      bi_emit_atest(&b, bi_zero());
+   }
 
-        bool optimize = !(bifrost_debug & BIFROST_DBG_NOOPT);
+   bool optimize = !(bifrost_debug & BIFROST_DBG_NOOPT);
 
-        /* Runs before constant folding */
-        bi_lower_swizzle(ctx);
-        bi_validate(ctx, "Early lowering");
+   /* Runs before constant folding */
+   bi_lower_swizzle(ctx);
+   bi_validate(ctx, "Early lowering");
 
-        /* Runs before copy prop */
-        if (optimize && !ctx->inputs->no_ubo_to_push) {
-                bi_opt_push_ubo(ctx);
-        }
+   /* Runs before copy prop */
+   if (optimize && !ctx->inputs->no_ubo_to_push) {
+      bi_opt_push_ubo(ctx);
+   }
 
-        if (likely(optimize)) {
-                bi_opt_copy_prop(ctx);
+   if (likely(optimize)) {
+      bi_opt_copy_prop(ctx);
 
-                while (bi_opt_constant_fold(ctx))
-                        bi_opt_copy_prop(ctx);
+      while (bi_opt_constant_fold(ctx))
+         bi_opt_copy_prop(ctx);
 
-                bi_opt_mod_prop_forward(ctx);
-                bi_opt_mod_prop_backward(ctx);
+      bi_opt_mod_prop_forward(ctx);
+      bi_opt_mod_prop_backward(ctx);
 
-                /* Push LD_VAR_IMM/VAR_TEX instructions. Must run after
-                 * mod_prop_backward to fuse VAR_TEX */
-                if (ctx->arch == 7 && ctx->stage == MESA_SHADER_FRAGMENT &&
-                    !(bifrost_debug & BIFROST_DBG_NOPRELOAD)) {
-                        bi_opt_dead_code_eliminate(ctx);
-                        bi_opt_message_preload(ctx);
-                        bi_opt_copy_prop(ctx);
-                }
+      /* Push LD_VAR_IMM/VAR_TEX instructions. Must run after
+       * mod_prop_backward to fuse VAR_TEX */
+      if (ctx->arch == 7 && ctx->stage == MESA_SHADER_FRAGMENT &&
+          !(bifrost_debug & BIFROST_DBG_NOPRELOAD)) {
+         bi_opt_dead_code_eliminate(ctx);
+         bi_opt_message_preload(ctx);
+         bi_opt_copy_prop(ctx);
+      }
 
-                bi_opt_dead_code_eliminate(ctx);
-                bi_opt_cse(ctx);
-                bi_opt_dead_code_eliminate(ctx);
-                if (!ctx->inputs->no_ubo_to_push)
-                        bi_opt_reorder_push(ctx);
-                bi_validate(ctx, "Optimization passes");
-        }
+      bi_opt_dead_code_eliminate(ctx);
+      bi_opt_cse(ctx);
+      bi_opt_dead_code_eliminate(ctx);
+      if (!ctx->inputs->no_ubo_to_push)
+         bi_opt_reorder_push(ctx);
+      bi_validate(ctx, "Optimization passes");
+   }
 
-        bi_lower_opt_instructions(ctx);
+   bi_lower_opt_instructions(ctx);
 
-        if (ctx->arch >= 9) {
-                va_optimize(ctx);
-                va_lower_isel(ctx);
+   if (ctx->arch >= 9) {
+      va_optimize(ctx);
+      va_lower_isel(ctx);
 
-                bi_foreach_instr_global_safe(ctx, I) {
-                        /* Phis become single moves so shouldn't be affected */
-                        if (I->op == BI_OPCODE_PHI)
-                                continue;
+      bi_foreach_instr_global_safe(ctx, I) {
+         /* Phis become single moves so shouldn't be affected */
+         if (I->op == BI_OPCODE_PHI)
+            continue;
 
-                        va_lower_constants(ctx, I);
+         va_lower_constants(ctx, I);
 
-                        bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
-                        va_repair_fau(&b, I);
-                }
+         bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
+         va_repair_fau(&b, I);
+      }
 
-                /* We need to clean up after constant lowering */
-                if (likely(optimize)) {
-                        bi_opt_cse(ctx);
-                        bi_opt_dead_code_eliminate(ctx);
-                }
+      /* We need to clean up after constant lowering */
+      if (likely(optimize)) {
+         bi_opt_cse(ctx);
+         bi_opt_dead_code_eliminate(ctx);
+      }
 
-                bi_validate(ctx, "Valhall passes");
-        }
+      bi_validate(ctx, "Valhall passes");
+   }
 
-        bi_foreach_block(ctx, block) {
-                bi_lower_branch(ctx, block);
-        }
+   bi_foreach_block(ctx, block) {
+      bi_lower_branch(ctx, block);
+   }
 
-        if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal)
-                bi_print_shader(ctx, stdout);
+   if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal)
+      bi_print_shader(ctx, stdout);
 
-        /* Analyze before register allocation to avoid false dependencies. The
-         * skip bit is a function of only the data flow graph and is invariant
-         * under valid scheduling. Helpers are only defined for fragment
-         * shaders, so this analysis is only required in fragment shaders.
-         */
-        if (ctx->stage == MESA_SHADER_FRAGMENT)
-                bi_analyze_helper_requirements(ctx);
+   /* Analyze before register allocation to avoid false dependencies. The
+    * skip bit is a function of only the data flow graph and is invariant
+    * under valid scheduling. Helpers are only defined for fragment
+    * shaders, so this analysis is only required in fragment shaders.
+    */
+   if (ctx->stage == MESA_SHADER_FRAGMENT)
+      bi_analyze_helper_requirements(ctx);
 
-        /* Fuse TEXC after analyzing helper requirements so the analysis
-         * doesn't have to know about dual textures */
-        if (likely(optimize)) {
-                bi_opt_fuse_dual_texture(ctx);
-        }
+   /* Fuse TEXC after analyzing helper requirements so the analysis
+    * doesn't have to know about dual textures */
+   if (likely(optimize)) {
+      bi_opt_fuse_dual_texture(ctx);
+   }
 
-        /* Lower FAU after fusing dual texture, because fusing dual texture
-         * creates new immediates that themselves may need lowering.
-         */
-        if (ctx->arch <= 8) {
-                bi_lower_fau(ctx);
-        }
+   /* Lower FAU after fusing dual texture, because fusing dual texture
+    * creates new immediates that themselves may need lowering.
+    */
+   if (ctx->arch <= 8) {
+      bi_lower_fau(ctx);
+   }
 
-        /* Lowering FAU can create redundant moves. Run CSE+DCE to clean up. */
-        if (likely(optimize)) {
-                bi_opt_cse(ctx);
-                bi_opt_dead_code_eliminate(ctx);
-        }
+   /* Lowering FAU can create redundant moves. Run CSE+DCE to clean up. */
+   if (likely(optimize)) {
+      bi_opt_cse(ctx);
+      bi_opt_dead_code_eliminate(ctx);
+   }
 
-        bi_validate(ctx, "Late lowering");
+   bi_validate(ctx, "Late lowering");
 
-        if (likely(!(bifrost_debug & BIFROST_DBG_NOPSCHED))) {
-                bi_pressure_schedule(ctx);
-                bi_validate(ctx, "Pre-RA scheduling");
-        }
+   if (likely(!(bifrost_debug & BIFROST_DBG_NOPSCHED))) {
+      bi_pressure_schedule(ctx);
+      bi_validate(ctx, "Pre-RA scheduling");
+   }
 
-        bi_register_allocate(ctx);
+   bi_register_allocate(ctx);
 
-        if (likely(optimize))
-                bi_opt_post_ra(ctx);
+   if (likely(optimize))
+      bi_opt_post_ra(ctx);
 
-        if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal)
-                bi_print_shader(ctx, stdout);
+   if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal)
+      bi_print_shader(ctx, stdout);
 
-        if (ctx->arch >= 9) {
-                va_assign_slots(ctx);
-                va_insert_flow_control_nops(ctx);
-                va_merge_flow(ctx);
-                va_mark_last(ctx);
-        } else {
-                bi_schedule(ctx);
-                bi_assign_scoreboard(ctx);
+   if (ctx->arch >= 9) {
+      va_assign_slots(ctx);
+      va_insert_flow_control_nops(ctx);
+      va_merge_flow(ctx);
+      va_mark_last(ctx);
+   } else {
+      bi_schedule(ctx);
+      bi_assign_scoreboard(ctx);
 
-                /* Analyze after scheduling since we depend on instruction
-                 * order. Valhall calls as part of va_insert_flow_control_nops,
-                 * as the handling for clauses differs from instructions.
-                 */
-                bi_analyze_helper_terminate(ctx);
-                bi_mark_clauses_td(ctx);
-        }
+      /* Analyze after scheduling since we depend on instruction
+       * order. Valhall calls as part of va_insert_flow_control_nops,
+       * as the handling for clauses differs from instructions.
+       */
+      bi_analyze_helper_terminate(ctx);
+      bi_mark_clauses_td(ctx);
+   }
 
-        if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal)
-                bi_print_shader(ctx, stdout);
+   if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal)
+      bi_print_shader(ctx, stdout);
 
-        if (ctx->arch <= 8) {
-                bi_pack_clauses(ctx, binary, offset);
-        } else {
-                bi_pack_valhall(ctx, binary);
-        }
+   if (ctx->arch <= 8) {
+      bi_pack_clauses(ctx, binary, offset);
+   } else {
+      bi_pack_valhall(ctx, binary);
+   }
 
-        if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal) {
-                if (ctx->arch <= 8) {
-                        disassemble_bifrost(stdout, binary->data + offset,
-                                            binary->size - offset,
-                                            bifrost_debug & BIFROST_DBG_VERBOSE);
-                } else {
-                        disassemble_valhall(stdout, binary->data + offset,
-                                            binary->size - offset,
-                                            bifrost_debug & BIFROST_DBG_VERBOSE);
-                }
+   if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal) {
+      if (ctx->arch <= 8) {
+         disassemble_bifrost(stdout, binary->data + offset,
+                             binary->size - offset,
+                             bifrost_debug & BIFROST_DBG_VERBOSE);
+      } else {
+         disassemble_valhall(stdout, binary->data + offset,
+                             binary->size - offset,
+                             bifrost_debug & BIFROST_DBG_VERBOSE);
+      }
 
-                fflush(stdout);
-        }
+      fflush(stdout);
+   }
 
-        if (!skip_internal &&
-            ((bifrost_debug & BIFROST_DBG_SHADERDB) || inputs->debug)) {
-                char *shaderdb;
+   if (!skip_internal &&
+       ((bifrost_debug & BIFROST_DBG_SHADERDB) || inputs->debug)) {
+      char *shaderdb;
 
-                if (ctx->arch >= 9) {
-                        shaderdb = va_print_stats(ctx, binary->size - offset);
-                } else {
-                        shaderdb = bi_print_stats(ctx, binary->size - offset);
-                }
+      if (ctx->arch >= 9) {
+         shaderdb = va_print_stats(ctx, binary->size - offset);
+      } else {
+         shaderdb = bi_print_stats(ctx, binary->size - offset);
+      }
 
-                if (bifrost_debug & BIFROST_DBG_SHADERDB)
-                        fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
+      if (bifrost_debug & BIFROST_DBG_SHADERDB)
+         fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
 
-                if (inputs->debug)
-                        util_debug_message(inputs->debug, SHADER_INFO, "%s", shaderdb);
+      if (inputs->debug)
+         util_debug_message(inputs->debug, SHADER_INFO, "%s", shaderdb);
 
-                ralloc_free(shaderdb);
-        }
+      ralloc_free(shaderdb);
+   }
 
-        return ctx;
+   return ctx;
 }
 
 static void
@@ -5186,114 +5143,113 @@ bi_compile_variant(nir_shader *nir,
                    const struct panfrost_compile_inputs *inputs,
                    struct util_dynarray *binary,
                    struct hash_table_u64 *sysval_to_id,
-                   struct pan_shader_info *info,
-                   enum bi_idvs_mode idvs)
+                   struct pan_shader_info *info, enum bi_idvs_mode idvs)
 {
-        struct bi_shader_info local_info = {
-                .push = &info->push,
-                .bifrost = &info->bifrost,
-                .tls_size = info->tls_size,
-                .sysvals = &info->sysvals,
-                .push_offset = info->push.count,
-        };
+   struct bi_shader_info local_info = {
+      .push = &info->push,
+      .bifrost = &info->bifrost,
+      .tls_size = info->tls_size,
+      .sysvals = &info->sysvals,
+      .push_offset = info->push.count,
+   };
 
-        unsigned offset = binary->size;
+   unsigned offset = binary->size;
 
-        /* If there is no position shader (gl_Position is not written), then
-         * there is no need to build a varying shader either. This case is hit
-         * for transform feedback only vertex shaders which only make sense with
-         * rasterizer discard.
-         */
-        if ((offset == 0) && (idvs == BI_IDVS_VARYING))
-                return;
+   /* If there is no position shader (gl_Position is not written), then
+    * there is no need to build a varying shader either. This case is hit
+    * for transform feedback only vertex shaders which only make sense with
+    * rasterizer discard.
+    */
+   if ((offset == 0) && (idvs == BI_IDVS_VARYING))
+      return;
 
-        /* Software invariant: Only a secondary shader can appear at a nonzero
-         * offset, to keep the ABI simple. */
-        assert((offset == 0) ^ (idvs == BI_IDVS_VARYING));
+   /* Software invariant: Only a secondary shader can appear at a nonzero
+    * offset, to keep the ABI simple. */
+   assert((offset == 0) ^ (idvs == BI_IDVS_VARYING));
 
-        bi_context *ctx = bi_compile_variant_nir(nir, inputs, binary, sysval_to_id, local_info, idvs);
+   bi_context *ctx = bi_compile_variant_nir(nir, inputs, binary, sysval_to_id,
+                                            local_info, idvs);
 
-        /* A register is preloaded <==> it is live before the first block */
-        bi_block *first_block = list_first_entry(&ctx->blocks, bi_block, link);
-        uint64_t preload = first_block->reg_live_in;
+   /* A register is preloaded <==> it is live before the first block */
+   bi_block *first_block = list_first_entry(&ctx->blocks, bi_block, link);
+   uint64_t preload = first_block->reg_live_in;
 
-        /* If multisampling is used with a blend shader, the blend shader needs
-         * to access the sample coverage mask in r60 and the sample ID in r61.
-         * Blend shaders run in the same context as fragment shaders, so if a
-         * blend shader could run, we need to preload these registers
-         * conservatively. There is believed to be little cost to doing so, so
-         * do so always to avoid variants of the preload descriptor.
-         *
-         * We only do this on Valhall, as Bifrost has to update the RSD for
-         * multisampling w/ blend shader anyway, so this is handled in the
-         * driver. We could unify the paths if the cost is acceptable.
-         */
-        if (nir->info.stage == MESA_SHADER_FRAGMENT && ctx->arch >= 9)
-                preload |= BITFIELD64_BIT(60) | BITFIELD64_BIT(61);
+   /* If multisampling is used with a blend shader, the blend shader needs
+    * to access the sample coverage mask in r60 and the sample ID in r61.
+    * Blend shaders run in the same context as fragment shaders, so if a
+    * blend shader could run, we need to preload these registers
+    * conservatively. There is believed to be little cost to doing so, so
+    * do so always to avoid variants of the preload descriptor.
+    *
+    * We only do this on Valhall, as Bifrost has to update the RSD for
+    * multisampling w/ blend shader anyway, so this is handled in the
+    * driver. We could unify the paths if the cost is acceptable.
+    */
+   if (nir->info.stage == MESA_SHADER_FRAGMENT && ctx->arch >= 9)
+      preload |= BITFIELD64_BIT(60) | BITFIELD64_BIT(61);
 
-        info->ubo_mask |= ctx->ubo_mask;
-        info->tls_size = MAX2(info->tls_size, ctx->info.tls_size);
+   info->ubo_mask |= ctx->ubo_mask;
+   info->tls_size = MAX2(info->tls_size, ctx->info.tls_size);
 
-        if (idvs == BI_IDVS_VARYING) {
-                info->vs.secondary_enable = (binary->size > offset);
-                info->vs.secondary_offset = offset;
-                info->vs.secondary_preload = preload;
-                info->vs.secondary_work_reg_count = ctx->info.work_reg_count;
-        } else {
-                info->preload = preload;
-                info->work_reg_count = ctx->info.work_reg_count;
-        }
+   if (idvs == BI_IDVS_VARYING) {
+      info->vs.secondary_enable = (binary->size > offset);
+      info->vs.secondary_offset = offset;
+      info->vs.secondary_preload = preload;
+      info->vs.secondary_work_reg_count = ctx->info.work_reg_count;
+   } else {
+      info->preload = preload;
+      info->work_reg_count = ctx->info.work_reg_count;
+   }
 
-        if (idvs == BI_IDVS_POSITION &&
-            !nir->info.internal &&
-            nir->info.outputs_written & BITFIELD_BIT(VARYING_SLOT_PSIZ)) {
-                /* Find the psiz write */
-                bi_instr *write = NULL;
+   if (idvs == BI_IDVS_POSITION && !nir->info.internal &&
+       nir->info.outputs_written & BITFIELD_BIT(VARYING_SLOT_PSIZ)) {
+      /* Find the psiz write */
+      bi_instr *write = NULL;
 
-                bi_foreach_instr_global(ctx, I) {
-                        if (I->op == BI_OPCODE_STORE_I16 && I->seg == BI_SEG_POS) {
-                                write = I;
-                                break;
-                        }
-                }
+      bi_foreach_instr_global(ctx, I) {
+         if (I->op == BI_OPCODE_STORE_I16 && I->seg == BI_SEG_POS) {
+            write = I;
+            break;
+         }
+      }
 
-                assert(write != NULL);
+      assert(write != NULL);
 
-                /* NOP it out, preserving its flow control. TODO: maybe DCE */
-                if (write->flow) {
-                        bi_builder b = bi_init_builder(ctx, bi_before_instr(write));
-                        bi_instr *nop = bi_nop(&b);
-                        nop->flow = write->flow;
-                }
+      /* NOP it out, preserving its flow control. TODO: maybe DCE */
+      if (write->flow) {
+         bi_builder b = bi_init_builder(ctx, bi_before_instr(write));
+         bi_instr *nop = bi_nop(&b);
+         nop->flow = write->flow;
+      }
 
-                bi_remove_instruction(write);
+      bi_remove_instruction(write);
 
-                info->vs.no_psiz_offset = binary->size;
-                bi_pack_valhall(ctx, binary);
-        }
+      info->vs.no_psiz_offset = binary->size;
+      bi_pack_valhall(ctx, binary);
+   }
 
-        ralloc_free(ctx);
+   ralloc_free(ctx);
 }
 
 /* Decide if Index-Driven Vertex Shading should be used for a given shader */
 static bool
 bi_should_idvs(nir_shader *nir, const struct panfrost_compile_inputs *inputs)
 {
-        /* Opt-out */
-        if (inputs->no_idvs || bifrost_debug & BIFROST_DBG_NOIDVS)
-                return false;
+   /* Opt-out */
+   if (inputs->no_idvs || bifrost_debug & BIFROST_DBG_NOIDVS)
+      return false;
 
-        /* IDVS splits up vertex shaders, not defined on other shader stages */
-        if (nir->info.stage != MESA_SHADER_VERTEX)
-                return false;
+   /* IDVS splits up vertex shaders, not defined on other shader stages */
+   if (nir->info.stage != MESA_SHADER_VERTEX)
+      return false;
 
-        /* Bifrost cannot write gl_PointSize during IDVS */
-        if ((inputs->gpu_id < 0x9000) &&
-            nir->info.outputs_written & BITFIELD_BIT(VARYING_SLOT_PSIZ))
-                return false;
+   /* Bifrost cannot write gl_PointSize during IDVS */
+   if ((inputs->gpu_id < 0x9000) &&
+       nir->info.outputs_written & BITFIELD_BIT(VARYING_SLOT_PSIZ))
+      return false;
 
-        /* Otherwise, IDVS is usually better */
-        return true;
+   /* Otherwise, IDVS is usually better */
+   return true;
 }
 
 void
@@ -5302,39 +5258,38 @@ bifrost_compile_shader_nir(nir_shader *nir,
                            struct util_dynarray *binary,
                            struct pan_shader_info *info)
 {
-        bifrost_debug = debug_get_option_bifrost_debug();
+   bifrost_debug = debug_get_option_bifrost_debug();
 
-        bi_finalize_nir(nir, inputs->gpu_id, inputs->is_blend);
-        struct hash_table_u64 *sysval_to_id =
-                panfrost_init_sysvals(&info->sysvals,
-                                      inputs->fixed_sysval_layout,
-                                      NULL);
+   bi_finalize_nir(nir, inputs->gpu_id, inputs->is_blend);
+   struct hash_table_u64 *sysval_to_id =
+      panfrost_init_sysvals(&info->sysvals, inputs->fixed_sysval_layout, NULL);
 
-        info->tls_size = nir->scratch_size;
-        info->vs.idvs = bi_should_idvs(nir, inputs);
+   info->tls_size = nir->scratch_size;
+   info->vs.idvs = bi_should_idvs(nir, inputs);
 
-        pan_nir_collect_varyings(nir, info);
+   pan_nir_collect_varyings(nir, info);
 
-        if (info->vs.idvs) {
-                bi_compile_variant(nir, inputs, binary, sysval_to_id, info, BI_IDVS_POSITION);
-                bi_compile_variant(nir, inputs, binary, sysval_to_id, info, BI_IDVS_VARYING);
-        } else {
-                bi_compile_variant(nir, inputs, binary, sysval_to_id, info, BI_IDVS_NONE);
-        }
+   if (info->vs.idvs) {
+      bi_compile_variant(nir, inputs, binary, sysval_to_id, info,
+                         BI_IDVS_POSITION);
+      bi_compile_variant(nir, inputs, binary, sysval_to_id, info,
+                         BI_IDVS_VARYING);
+   } else {
+      bi_compile_variant(nir, inputs, binary, sysval_to_id, info, BI_IDVS_NONE);
+   }
 
-        if (gl_shader_stage_is_compute(nir->info.stage)) {
-                /* Workgroups may be merged if the structure of the workgroup is
-                 * not software visible. This is true if neither shared memory
-                 * nor barriers are used. The hardware may be able to optimize
-                 * compute shaders that set this flag.
-                 */
-                info->cs.allow_merging_workgroups =
-                        (nir->info.shared_size == 0) &&
-                        !nir->info.uses_control_barrier &&
-                        !nir->info.uses_memory_barrier;
-        }
+   if (gl_shader_stage_is_compute(nir->info.stage)) {
+      /* Workgroups may be merged if the structure of the workgroup is
+       * not software visible. This is true if neither shared memory
+       * nor barriers are used. The hardware may be able to optimize
+       * compute shaders that set this flag.
+       */
+      info->cs.allow_merging_workgroups = (nir->info.shared_size == 0) &&
+                                          !nir->info.uses_control_barrier &&
+                                          !nir->info.uses_memory_barrier;
+   }
 
-        info->ubo_mask &= (1 << nir->info.num_ubos) - 1;
+   info->ubo_mask &= (1 << nir->info.num_ubos) - 1;
 
-        _mesa_hash_table_u64_destroy(sysval_to_id);
+   _mesa_hash_table_u64_destroy(sysval_to_id);
 }
diff --git a/src/panfrost/bifrost/bifrost_compile.h b/src/panfrost/bifrost/bifrost_compile.h
index c23b51afee7..69ce3ac9511 100644
--- a/src/panfrost/bifrost/bifrost_compile.h
+++ b/src/panfrost/bifrost/bifrost_compile.h
@@ -25,73 +25,73 @@
 #define __BIFROST_PUBLIC_H_
 
 #include "compiler/nir/nir.h"
-#include "util/u_dynarray.h"
 #include "panfrost/util/pan_ir.h"
+#include "util/u_dynarray.h"
 
-void
-bifrost_compile_shader_nir(nir_shader *nir,
-                           const struct panfrost_compile_inputs *inputs,
-                           struct util_dynarray *binary,
-                           struct pan_shader_info *info);
+void bifrost_compile_shader_nir(nir_shader *nir,
+                                const struct panfrost_compile_inputs *inputs,
+                                struct util_dynarray *binary,
+                                struct pan_shader_info *info);
 
 static const nir_shader_compiler_options bifrost_nir_options = {
-        .lower_scmp = true,
-        .lower_flrp16 = true,
-        .lower_flrp32 = true,
-        .lower_flrp64 = true,
-        .lower_ffract = true,
-        .lower_fmod = true,
-        .lower_fdiv = true,
-        .lower_isign = true,
-        .lower_find_lsb = true,
-        .lower_ifind_msb = true,
-        .lower_fdph = true,
-        .lower_fsqrt = true,
+   .lower_scmp = true,
+   .lower_flrp16 = true,
+   .lower_flrp32 = true,
+   .lower_flrp64 = true,
+   .lower_ffract = true,
+   .lower_fmod = true,
+   .lower_fdiv = true,
+   .lower_isign = true,
+   .lower_find_lsb = true,
+   .lower_ifind_msb = true,
+   .lower_fdph = true,
+   .lower_fsqrt = true,
 
-        .lower_fsign = true,
+   .lower_fsign = true,
 
-        .lower_bitfield_insert_to_shifts = true,
-        .lower_bitfield_extract_to_shifts = true,
-        .lower_insert_byte = true,
-        .lower_rotate = true,
+   .lower_bitfield_insert_to_shifts = true,
+   .lower_bitfield_extract_to_shifts = true,
+   .lower_insert_byte = true,
+   .lower_rotate = true,
 
-        .lower_pack_half_2x16 = true,
-        .lower_pack_unorm_2x16 = true,
-        .lower_pack_snorm_2x16 = true,
-        .lower_pack_unorm_4x8 = true,
-        .lower_pack_snorm_4x8 = true,
-        .lower_unpack_half_2x16 = true,
-        .lower_unpack_unorm_2x16 = true,
-        .lower_unpack_snorm_2x16 = true,
-        .lower_unpack_unorm_4x8 = true,
-        .lower_unpack_snorm_4x8 = true,
-        .lower_pack_split = true,
+   .lower_pack_half_2x16 = true,
+   .lower_pack_unorm_2x16 = true,
+   .lower_pack_snorm_2x16 = true,
+   .lower_pack_unorm_4x8 = true,
+   .lower_pack_snorm_4x8 = true,
+   .lower_unpack_half_2x16 = true,
+   .lower_unpack_unorm_2x16 = true,
+   .lower_unpack_snorm_2x16 = true,
+   .lower_unpack_unorm_4x8 = true,
+   .lower_unpack_snorm_4x8 = true,
+   .lower_pack_split = true,
 
-        .lower_doubles_options = nir_lower_dmod,
-        /* TODO: Don't lower supported 64-bit operations */
-        .lower_int64_options = ~0,
-        /* TODO: Use IMULD on v7 */
-        .lower_mul_high = true,
-        .lower_fisnormal = true,
-        .lower_uadd_carry = true,
-        .lower_usub_borrow = true,
+   .lower_doubles_options = nir_lower_dmod,
+   /* TODO: Don't lower supported 64-bit operations */
+   .lower_int64_options = ~0,
+   /* TODO: Use IMULD on v7 */
+   .lower_mul_high = true,
+   .lower_fisnormal = true,
+   .lower_uadd_carry = true,
+   .lower_usub_borrow = true,
 
-        .has_fsub = true,
-        .has_isub = true,
-        .vectorize_io = true,
-        .vectorize_vec2_16bit = true,
-        .fuse_ffma16 = true,
-        .fuse_ffma32 = true,
-        .fuse_ffma64 = true,
-        .use_interpolated_input_intrinsics = true,
+   .has_fsub = true,
+   .has_isub = true,
+   .vectorize_io = true,
+   .vectorize_vec2_16bit = true,
+   .fuse_ffma16 = true,
+   .fuse_ffma32 = true,
+   .fuse_ffma64 = true,
+   .use_interpolated_input_intrinsics = true,
 
-        .lower_uniforms_to_ubo = true,
+   .lower_uniforms_to_ubo = true,
 
-        .has_cs_global_id = true,
-        .lower_cs_local_index_to_id = true,
-        .max_unroll_iterations = 32,
-        .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
-        .force_indirect_unrolling_sampler = true,
+   .has_cs_global_id = true,
+   .lower_cs_local_index_to_id = true,
+   .max_unroll_iterations = 32,
+   .force_indirect_unrolling =
+      (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
+   .force_indirect_unrolling_sampler = true,
 };
 
 #endif
diff --git a/src/panfrost/bifrost/bir.c b/src/panfrost/bifrost/bir.c
index 92076f9c667..5836c5d4ff3 100644
--- a/src/panfrost/bifrost/bir.c
+++ b/src/panfrost/bifrost/bir.c
@@ -24,21 +24,21 @@
  *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
  */
 
-#include "compiler.h"
 #include "bi_builder.h"
+#include "compiler.h"
 
 bool
 bi_has_arg(const bi_instr *ins, bi_index arg)
 {
-        if (!ins)
-                return false;
+   if (!ins)
+      return false;
 
-        bi_foreach_src(ins, s) {
-                if (bi_is_equiv(ins->src[s], arg))
-                        return true;
-        }
+   bi_foreach_src(ins, s) {
+      if (bi_is_equiv(ins->src[s], arg))
+         return true;
+   }
 
-        return false;
+   return false;
 }
 
 /* Precondition: valid 16-bit or 32-bit register format. Returns whether it is
@@ -48,131 +48,131 @@ bi_has_arg(const bi_instr *ins, bi_index arg)
 bool
 bi_is_regfmt_16(enum bi_register_format fmt)
 {
-        switch  (fmt) {
-        case BI_REGISTER_FORMAT_F16:
-        case BI_REGISTER_FORMAT_S16:
-        case BI_REGISTER_FORMAT_U16:
-                return true;
-        case BI_REGISTER_FORMAT_F32:
-        case BI_REGISTER_FORMAT_S32:
-        case BI_REGISTER_FORMAT_U32:
-        case BI_REGISTER_FORMAT_AUTO:
-                return false;
-        default:
-                unreachable("Invalid register format");
-        }
+   switch (fmt) {
+   case BI_REGISTER_FORMAT_F16:
+   case BI_REGISTER_FORMAT_S16:
+   case BI_REGISTER_FORMAT_U16:
+      return true;
+   case BI_REGISTER_FORMAT_F32:
+   case BI_REGISTER_FORMAT_S32:
+   case BI_REGISTER_FORMAT_U32:
+   case BI_REGISTER_FORMAT_AUTO:
+      return false;
+   default:
+      unreachable("Invalid register format");
+   }
 }
 
 static unsigned
 bi_count_staging_registers(const bi_instr *ins)
 {
-        enum bi_sr_count count = bi_opcode_props[ins->op].sr_count;
-        unsigned vecsize = ins->vecsize + 1; /* XXX: off-by-one */
+   enum bi_sr_count count = bi_opcode_props[ins->op].sr_count;
+   unsigned vecsize = ins->vecsize + 1; /* XXX: off-by-one */
 
-        switch (count) {
-        case BI_SR_COUNT_0 ... BI_SR_COUNT_4:
-                return count;
-        case BI_SR_COUNT_FORMAT:
-                return bi_is_regfmt_16(ins->register_format) ?
-                        DIV_ROUND_UP(vecsize, 2) : vecsize;
-        case BI_SR_COUNT_VECSIZE:
-                return vecsize;
-        case BI_SR_COUNT_SR_COUNT:
-                return ins->sr_count;
-        }
+   switch (count) {
+   case BI_SR_COUNT_0 ... BI_SR_COUNT_4:
+      return count;
+   case BI_SR_COUNT_FORMAT:
+      return bi_is_regfmt_16(ins->register_format) ? DIV_ROUND_UP(vecsize, 2)
+                                                   : vecsize;
+   case BI_SR_COUNT_VECSIZE:
+      return vecsize;
+   case BI_SR_COUNT_SR_COUNT:
+      return ins->sr_count;
+   }
 
-        unreachable("Invalid sr_count");
+   unreachable("Invalid sr_count");
 }
 
 unsigned
 bi_count_read_registers(const bi_instr *ins, unsigned s)
 {
-        /* ATOM reads 1 but writes 2. Exception for ACMPXCHG */
-        if (s == 0 && ins->op == BI_OPCODE_ATOM_RETURN_I32)
-                return (ins->atom_opc == BI_ATOM_OPC_ACMPXCHG) ? 2 : 1;
-        else if (s == 0 && bi_opcode_props[ins->op].sr_read)
-                return bi_count_staging_registers(ins);
-        else if (s == 4 && ins->op == BI_OPCODE_BLEND)
-                return ins->sr_count_2; /* Dual source blending */
-        else if (s == 0 && ins->op == BI_OPCODE_SPLIT_I32)
-                return ins->nr_dests;
-        else
-                return 1;
+   /* ATOM reads 1 but writes 2. Exception for ACMPXCHG */
+   if (s == 0 && ins->op == BI_OPCODE_ATOM_RETURN_I32)
+      return (ins->atom_opc == BI_ATOM_OPC_ACMPXCHG) ? 2 : 1;
+   else if (s == 0 && bi_opcode_props[ins->op].sr_read)
+      return bi_count_staging_registers(ins);
+   else if (s == 4 && ins->op == BI_OPCODE_BLEND)
+      return ins->sr_count_2; /* Dual source blending */
+   else if (s == 0 && ins->op == BI_OPCODE_SPLIT_I32)
+      return ins->nr_dests;
+   else
+      return 1;
 }
 
 unsigned
 bi_count_write_registers(const bi_instr *ins, unsigned d)
 {
-        if (d == 0 && bi_opcode_props[ins->op].sr_write) {
-                switch (ins->op) {
-                case BI_OPCODE_TEXC:
-                case BI_OPCODE_TEXC_DUAL:
-                        if (ins->sr_count_2)
-                                return ins->sr_count;
-                        else
-                                return bi_is_regfmt_16(ins->register_format) ? 2 : 4;
+   if (d == 0 && bi_opcode_props[ins->op].sr_write) {
+      switch (ins->op) {
+      case BI_OPCODE_TEXC:
+      case BI_OPCODE_TEXC_DUAL:
+         if (ins->sr_count_2)
+            return ins->sr_count;
+         else
+            return bi_is_regfmt_16(ins->register_format) ? 2 : 4;
 
-                case BI_OPCODE_TEX_SINGLE:
-                case BI_OPCODE_TEX_FETCH:
-                case BI_OPCODE_TEX_GATHER: {
-                        unsigned chans = util_bitcount(ins->write_mask);
+      case BI_OPCODE_TEX_SINGLE:
+      case BI_OPCODE_TEX_FETCH:
+      case BI_OPCODE_TEX_GATHER: {
+         unsigned chans = util_bitcount(ins->write_mask);
 
-                        return bi_is_regfmt_16(ins->register_format) ?
-                                DIV_ROUND_UP(chans, 2) : chans;
-                }
+         return bi_is_regfmt_16(ins->register_format) ? DIV_ROUND_UP(chans, 2)
+                                                      : chans;
+      }
 
-                case BI_OPCODE_ACMPXCHG_I32:
-                        /* Reads 2 but writes 1 */
-                        return 1;
+      case BI_OPCODE_ACMPXCHG_I32:
+         /* Reads 2 but writes 1 */
+         return 1;
 
-                case BI_OPCODE_ATOM1_RETURN_I32:
-                        /* Allow omitting the destination for plain ATOM1 */
-                        return bi_is_null(ins->dest[0]) ? 0 : ins->sr_count;
-                default:
-                        return bi_count_staging_registers(ins);
-                }
-        } else if (ins->op == BI_OPCODE_SEG_ADD_I64) {
-                return 2;
-        } else if (ins->op == BI_OPCODE_TEXC_DUAL && d == 1) {
-                return ins->sr_count_2;
-        } else if (ins->op == BI_OPCODE_COLLECT_I32 && d == 0) {
-                return ins->nr_srcs;
-        }
+      case BI_OPCODE_ATOM1_RETURN_I32:
+         /* Allow omitting the destination for plain ATOM1 */
+         return bi_is_null(ins->dest[0]) ? 0 : ins->sr_count;
+      default:
+         return bi_count_staging_registers(ins);
+      }
+   } else if (ins->op == BI_OPCODE_SEG_ADD_I64) {
+      return 2;
+   } else if (ins->op == BI_OPCODE_TEXC_DUAL && d == 1) {
+      return ins->sr_count_2;
+   } else if (ins->op == BI_OPCODE_COLLECT_I32 && d == 0) {
+      return ins->nr_srcs;
+   }
 
-        return 1;
+   return 1;
 }
 
 unsigned
 bi_writemask(const bi_instr *ins, unsigned d)
 {
-        unsigned mask = BITFIELD_MASK(bi_count_write_registers(ins, d));
-        unsigned shift = ins->dest[d].offset;
-        return (mask << shift);
+   unsigned mask = BITFIELD_MASK(bi_count_write_registers(ins, d));
+   unsigned shift = ins->dest[d].offset;
+   return (mask << shift);
 }
 
 bi_clause *
 bi_next_clause(bi_context *ctx, bi_block *block, bi_clause *clause)
 {
-        if (!block && !clause)
-                return NULL;
+   if (!block && !clause)
+      return NULL;
 
-        /* Try the first clause in this block if we're starting from scratch */
-        if (!clause && !list_is_empty(&block->clauses))
-                return list_first_entry(&block->clauses, bi_clause, link);
+   /* Try the first clause in this block if we're starting from scratch */
+   if (!clause && !list_is_empty(&block->clauses))
+      return list_first_entry(&block->clauses, bi_clause, link);
 
-        /* Try the next clause in this block */
-        if (clause && clause->link.next != &block->clauses)
-                return list_first_entry(&(clause->link), bi_clause, link);
+   /* Try the next clause in this block */
+   if (clause && clause->link.next != &block->clauses)
+      return list_first_entry(&(clause->link), bi_clause, link);
 
-        /* Try the next block, or the one after that if it's empty, etc .*/
-        bi_block *next_block = bi_next_block(block);
+   /* Try the next block, or the one after that if it's empty, etc .*/
+   bi_block *next_block = bi_next_block(block);
 
-        bi_foreach_block_from(ctx, next_block, block) {
-                if (!list_is_empty(&block->clauses))
-                        return list_first_entry(&block->clauses, bi_clause, link);
-        }
+   bi_foreach_block_from(ctx, next_block, block) {
+      if (!list_is_empty(&block->clauses))
+         return list_first_entry(&block->clauses, bi_clause, link);
+   }
 
-        return NULL;
+   return NULL;
 }
 
 /* Does an instruction have a side effect not captured by its register
@@ -184,41 +184,41 @@ bi_next_clause(bi_context *ctx, bi_block *block, bi_clause *clause)
 bool
 bi_side_effects(const bi_instr *I)
 {
-        if (bi_opcode_props[I->op].last)
-                return true;
+   if (bi_opcode_props[I->op].last)
+      return true;
 
-        switch (I->op) {
-        case BI_OPCODE_DISCARD_F32:
-        case BI_OPCODE_DISCARD_B32:
-                return true;
-        default:
-                break;
-        }
+   switch (I->op) {
+   case BI_OPCODE_DISCARD_F32:
+   case BI_OPCODE_DISCARD_B32:
+      return true;
+   default:
+      break;
+   }
 
-        switch (bi_opcode_props[I->op].message) {
-        case BIFROST_MESSAGE_NONE:
-        case BIFROST_MESSAGE_VARYING:
-        case BIFROST_MESSAGE_ATTRIBUTE:
-        case BIFROST_MESSAGE_TEX:
-        case BIFROST_MESSAGE_VARTEX:
-        case BIFROST_MESSAGE_LOAD:
-        case BIFROST_MESSAGE_64BIT:
-                return false;
+   switch (bi_opcode_props[I->op].message) {
+   case BIFROST_MESSAGE_NONE:
+   case BIFROST_MESSAGE_VARYING:
+   case BIFROST_MESSAGE_ATTRIBUTE:
+   case BIFROST_MESSAGE_TEX:
+   case BIFROST_MESSAGE_VARTEX:
+   case BIFROST_MESSAGE_LOAD:
+   case BIFROST_MESSAGE_64BIT:
+      return false;
 
-        case BIFROST_MESSAGE_STORE:
-        case BIFROST_MESSAGE_ATOMIC:
-        case BIFROST_MESSAGE_BARRIER:
-        case BIFROST_MESSAGE_BLEND:
-        case BIFROST_MESSAGE_Z_STENCIL:
-        case BIFROST_MESSAGE_ATEST:
-        case BIFROST_MESSAGE_JOB:
-                return true;
+   case BIFROST_MESSAGE_STORE:
+   case BIFROST_MESSAGE_ATOMIC:
+   case BIFROST_MESSAGE_BARRIER:
+   case BIFROST_MESSAGE_BLEND:
+   case BIFROST_MESSAGE_Z_STENCIL:
+   case BIFROST_MESSAGE_ATEST:
+   case BIFROST_MESSAGE_JOB:
+      return true;
 
-        case BIFROST_MESSAGE_TILE:
-                return (I->op != BI_OPCODE_LD_TILE);
-        }
+   case BIFROST_MESSAGE_TILE:
+      return (I->op != BI_OPCODE_LD_TILE);
+   }
 
-        unreachable("Invalid message type");
+   unreachable("Invalid message type");
 }
 
 /* Branch reconvergence is required when the execution mask may change
@@ -230,10 +230,10 @@ bi_side_effects(const bi_instr *I)
 bool
 bi_reconverge_branches(bi_block *block)
 {
-        if (bi_num_successors(block) == 1)
-                return bi_num_predecessors(block->successors[0]) > 1;
-        else
-                return true;
+   if (bi_num_successors(block) == 1)
+      return bi_num_predecessors(block->successors[0]) > 1;
+   else
+      return true;
 }
 
 /*
@@ -252,42 +252,41 @@ bi_reconverge_branches(bi_block *block)
 bool
 bi_can_replace_with_csel(bi_instr *I)
 {
-        return ((I->op == BI_OPCODE_MUX_I32) || (I->op == BI_OPCODE_MUX_V2I16)) &&
-                (I->mux != BI_MUX_BIT) &&
-                (I->src[0].swizzle == BI_SWIZZLE_H01) &&
-                (I->src[1].swizzle == BI_SWIZZLE_H01) &&
-                (I->src[2].swizzle == BI_SWIZZLE_H01);
+   return ((I->op == BI_OPCODE_MUX_I32) || (I->op == BI_OPCODE_MUX_V2I16)) &&
+          (I->mux != BI_MUX_BIT) && (I->src[0].swizzle == BI_SWIZZLE_H01) &&
+          (I->src[1].swizzle == BI_SWIZZLE_H01) &&
+          (I->src[2].swizzle == BI_SWIZZLE_H01);
 }
 
 static enum bi_opcode
 bi_csel_for_mux(bool must_sign, bool b32, enum bi_mux mux)
 {
-        switch (mux) {
-        case BI_MUX_INT_ZERO:
-                if (must_sign)
-                        return b32 ? BI_OPCODE_CSEL_U32 : BI_OPCODE_CSEL_V2U16;
-                else
-                        return b32 ? BI_OPCODE_CSEL_I32 : BI_OPCODE_CSEL_V2I16;
-        case BI_MUX_NEG:
-                return b32 ? BI_OPCODE_CSEL_S32 : BI_OPCODE_CSEL_V2S16;
-        case BI_MUX_FP_ZERO:
-                return b32 ? BI_OPCODE_CSEL_F32 : BI_OPCODE_CSEL_V2F16;
-        default:
-             unreachable("No CSEL for MUX.bit");
-        }
+   switch (mux) {
+   case BI_MUX_INT_ZERO:
+      if (must_sign)
+         return b32 ? BI_OPCODE_CSEL_U32 : BI_OPCODE_CSEL_V2U16;
+      else
+         return b32 ? BI_OPCODE_CSEL_I32 : BI_OPCODE_CSEL_V2I16;
+   case BI_MUX_NEG:
+      return b32 ? BI_OPCODE_CSEL_S32 : BI_OPCODE_CSEL_V2S16;
+   case BI_MUX_FP_ZERO:
+      return b32 ? BI_OPCODE_CSEL_F32 : BI_OPCODE_CSEL_V2F16;
+   default:
+      unreachable("No CSEL for MUX.bit");
+   }
 }
 
 bi_instr *
 bi_csel_from_mux(bi_builder *b, const bi_instr *I, bool must_sign)
 {
-        assert(I->op == BI_OPCODE_MUX_I32 || I->op == BI_OPCODE_MUX_V2I16);
+   assert(I->op == BI_OPCODE_MUX_I32 || I->op == BI_OPCODE_MUX_V2I16);
 
-        /* Build a new CSEL */
-        enum bi_cmpf cmpf = (I->mux == BI_MUX_NEG) ? BI_CMPF_LT : BI_CMPF_EQ;
-        bi_instr *csel = bi_csel_u32_to(b, I->dest[0], I->src[2], bi_zero(),
-                                        I->src[0], I->src[1], cmpf);
+   /* Build a new CSEL */
+   enum bi_cmpf cmpf = (I->mux == BI_MUX_NEG) ? BI_CMPF_LT : BI_CMPF_EQ;
+   bi_instr *csel = bi_csel_u32_to(b, I->dest[0], I->src[2], bi_zero(),
+                                   I->src[0], I->src[1], cmpf);
 
-        /* Fixup the opcode and use it */
-        csel->op = bi_csel_for_mux(must_sign, I->op == BI_OPCODE_MUX_I32, I->mux);
-        return csel;
+   /* Fixup the opcode and use it */
+   csel->op = bi_csel_for_mux(must_sign, I->op == BI_OPCODE_MUX_I32, I->mux);
+   return csel;
 }
diff --git a/src/panfrost/bifrost/cmdline.c b/src/panfrost/bifrost/cmdline.c
index 2a11486cbed..5dc5b73eab8 100644
--- a/src/panfrost/bifrost/cmdline.c
+++ b/src/panfrost/bifrost/cmdline.c
@@ -26,15 +26,15 @@
 
 #include <getopt.h>
 #include <string.h>
-#include "disassemble.h"
 #include "valhall/disassemble.h"
 #include "compiler.h"
+#include "disassemble.h"
 
-#include "main/mtypes.h"
-#include "compiler/glsl/standalone.h"
-#include "compiler/glsl/glsl_to_nir.h"
 #include "compiler/glsl/gl_nir.h"
+#include "compiler/glsl/glsl_to_nir.h"
+#include "compiler/glsl/standalone.h"
 #include "compiler/nir_types.h"
+#include "main/mtypes.h"
 #include "util/u_dynarray.h"
 #include "bifrost_compile.h"
 
@@ -44,25 +44,25 @@ int verbose = 0;
 static gl_shader_stage
 filename_to_stage(const char *stage)
 {
-        const char *ext = strrchr(stage, '.');
+   const char *ext = strrchr(stage, '.');
 
-        if (ext == NULL) {
-                fprintf(stderr, "No extension found in %s\n", stage);
-                exit(1);
-        }
+   if (ext == NULL) {
+      fprintf(stderr, "No extension found in %s\n", stage);
+      exit(1);
+   }
 
-        if (!strcmp(ext, ".cs") || !strcmp(ext, ".comp"))
-                return MESA_SHADER_COMPUTE;
-        else if (!strcmp(ext, ".vs") || !strcmp(ext, ".vert"))
-                return MESA_SHADER_VERTEX;
-        else if (!strcmp(ext, ".fs") || !strcmp(ext, ".frag"))
-                return MESA_SHADER_FRAGMENT;
-        else {
-                fprintf(stderr, "Invalid extension %s\n", ext);
-                exit(1);
-        }
+   if (!strcmp(ext, ".cs") || !strcmp(ext, ".comp"))
+      return MESA_SHADER_COMPUTE;
+   else if (!strcmp(ext, ".vs") || !strcmp(ext, ".vert"))
+      return MESA_SHADER_VERTEX;
+   else if (!strcmp(ext, ".fs") || !strcmp(ext, ".frag"))
+      return MESA_SHADER_FRAGMENT;
+   else {
+      fprintf(stderr, "Invalid extension %s\n", ext);
+      exit(1);
+   }
 
-        unreachable("Should've returned or bailed");
+   unreachable("Should've returned or bailed");
 }
 
 static int
@@ -80,7 +80,7 @@ glsl_type_size(const struct glsl_type *type, bool bindless)
 static void
 insert_sorted(struct exec_list *var_list, nir_variable *new_var)
 {
-   nir_foreach_variable_in_list (var, var_list) {
+   nir_foreach_variable_in_list(var, var_list) {
       if (var->data.location > new_var->data.location) {
          exec_node_insert_node_before(&var->node, &new_var->node);
          return;
@@ -94,7 +94,7 @@ sort_varyings(nir_shader *nir, nir_variable_mode mode)
 {
    struct exec_list new_list;
    exec_list_make_empty(&new_list);
-   nir_foreach_variable_with_modes_safe (var, nir, mode) {
+   nir_foreach_variable_with_modes_safe(var, nir, mode) {
       exec_node_remove(&var->node);
       insert_sorted(&new_list, var);
    }
@@ -104,7 +104,7 @@ sort_varyings(nir_shader *nir, nir_variable_mode mode)
 static void
 fixup_varying_slots(nir_shader *nir, nir_variable_mode mode)
 {
-   nir_foreach_variable_with_modes (var, nir, mode) {
+   nir_foreach_variable_with_modes(var, nir, mode) {
       if (var->data.location >= VARYING_SLOT_VAR0) {
          var->data.location += 9;
       } else if ((var->data.location >= VARYING_SLOT_TEX0) &&
@@ -117,228 +117,219 @@ fixup_varying_slots(nir_shader *nir, nir_variable_mode mode)
 static void
 compile_shader(int stages, char **files)
 {
-        struct gl_shader_program *prog;
-        nir_shader *nir[MESA_SHADER_COMPUTE + 1];
-        unsigned shader_types[MESA_SHADER_COMPUTE + 1];
+   struct gl_shader_program *prog;
+   nir_shader *nir[MESA_SHADER_COMPUTE + 1];
+   unsigned shader_types[MESA_SHADER_COMPUTE + 1];
 
-        if (stages > MESA_SHADER_COMPUTE) {
-                fprintf(stderr, "Too many stages");
-                exit(1);
-        }
+   if (stages > MESA_SHADER_COMPUTE) {
+      fprintf(stderr, "Too many stages");
+      exit(1);
+   }
 
-        for (unsigned i = 0; i < stages; ++i)
-                shader_types[i] = filename_to_stage(files[i]);
+   for (unsigned i = 0; i < stages; ++i)
+      shader_types[i] = filename_to_stage(files[i]);
 
-        struct standalone_options options = {
-                .glsl_version = 300, /* ES - needed for precision */
-                .do_link = true,
-                .lower_precision = true
-        };
+   struct standalone_options options = {
+      .glsl_version = 300, /* ES - needed for precision */
+      .do_link = true,
+      .lower_precision = true};
 
-        static struct gl_context local_ctx;
+   static struct gl_context local_ctx;
 
-        prog = standalone_compile_shader(&options, stages, files, &local_ctx);
+   prog = standalone_compile_shader(&options, stages, files, &local_ctx);
 
-        for (unsigned i = 0; i < stages; ++i) {
-                gl_shader_stage stage = shader_types[i];
-                prog->_LinkedShaders[stage]->Program->info.stage = stage;
-        }
+   for (unsigned i = 0; i < stages; ++i) {
+      gl_shader_stage stage = shader_types[i];
+      prog->_LinkedShaders[stage]->Program->info.stage = stage;
+   }
 
-        struct util_dynarray binary;
+   struct util_dynarray binary;
 
-        util_dynarray_init(&binary, NULL);
+   util_dynarray_init(&binary, NULL);
 
-        for (unsigned i = 0; i < stages; ++i) {
-                nir[i] = glsl_to_nir(&local_ctx.Const, prog, shader_types[i], &bifrost_nir_options);
+   for (unsigned i = 0; i < stages; ++i) {
+      nir[i] = glsl_to_nir(&local_ctx.Const, prog, shader_types[i],
+                           &bifrost_nir_options);
 
-                if (shader_types[i] == MESA_SHADER_VERTEX) {
-                        nir_assign_var_locations(nir[i], nir_var_shader_in, &nir[i]->num_inputs,
-                                        glsl_type_size);
-                        sort_varyings(nir[i], nir_var_shader_out);
-                        nir_assign_var_locations(nir[i], nir_var_shader_out, &nir[i]->num_outputs,
-                                        glsl_type_size);
-                        fixup_varying_slots(nir[i], nir_var_shader_out);
-                } else if (shader_types[i] == MESA_SHADER_FRAGMENT) {
-                      sort_varyings(nir[i], nir_var_shader_in);
-                      nir_assign_var_locations(nir[i], nir_var_shader_in, &nir[i]->num_inputs,
-                                      glsl_type_size);
-                      fixup_varying_slots(nir[i], nir_var_shader_in);
-                      nir_assign_var_locations(nir[i], nir_var_shader_out, &nir[i]->num_outputs,
-                                      glsl_type_size);
-                }
+      if (shader_types[i] == MESA_SHADER_VERTEX) {
+         nir_assign_var_locations(nir[i], nir_var_shader_in,
+                                  &nir[i]->num_inputs, glsl_type_size);
+         sort_varyings(nir[i], nir_var_shader_out);
+         nir_assign_var_locations(nir[i], nir_var_shader_out,
+                                  &nir[i]->num_outputs, glsl_type_size);
+         fixup_varying_slots(nir[i], nir_var_shader_out);
+      } else if (shader_types[i] == MESA_SHADER_FRAGMENT) {
+         sort_varyings(nir[i], nir_var_shader_in);
+         nir_assign_var_locations(nir[i], nir_var_shader_in,
+                                  &nir[i]->num_inputs, glsl_type_size);
+         fixup_varying_slots(nir[i], nir_var_shader_in);
+         nir_assign_var_locations(nir[i], nir_var_shader_out,
+                                  &nir[i]->num_outputs, glsl_type_size);
+      }
 
-                nir_assign_var_locations(nir[i], nir_var_uniform, &nir[i]->num_uniforms,
-                                glsl_type_size);
+      nir_assign_var_locations(nir[i], nir_var_uniform, &nir[i]->num_uniforms,
+                               glsl_type_size);
 
-                NIR_PASS_V(nir[i], nir_lower_global_vars_to_local);
-                NIR_PASS_V(nir[i], nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir[i]), true, i == 0);
-                NIR_PASS_V(nir[i], nir_opt_copy_prop_vars);
-                NIR_PASS_V(nir[i], nir_opt_combine_stores, nir_var_all);
+      NIR_PASS_V(nir[i], nir_lower_global_vars_to_local);
+      NIR_PASS_V(nir[i], nir_lower_io_to_temporaries,
+                 nir_shader_get_entrypoint(nir[i]), true, i == 0);
+      NIR_PASS_V(nir[i], nir_opt_copy_prop_vars);
+      NIR_PASS_V(nir[i], nir_opt_combine_stores, nir_var_all);
 
-                NIR_PASS_V(nir[i], nir_lower_system_values);
-                NIR_PASS_V(nir[i], gl_nir_lower_samplers, prog);
-                NIR_PASS_V(nir[i], nir_split_var_copies);
-                NIR_PASS_V(nir[i], nir_lower_var_copies);
+      NIR_PASS_V(nir[i], nir_lower_system_values);
+      NIR_PASS_V(nir[i], gl_nir_lower_samplers, prog);
+      NIR_PASS_V(nir[i], nir_split_var_copies);
+      NIR_PASS_V(nir[i], nir_lower_var_copies);
 
-                NIR_PASS_V(nir[i], nir_lower_io, nir_var_uniform,
-                                st_packed_uniforms_type_size,
-                                (nir_lower_io_options)0);
-                NIR_PASS_V(nir[i], nir_lower_uniforms_to_ubo, true, false);
+      NIR_PASS_V(nir[i], nir_lower_io, nir_var_uniform,
+                 st_packed_uniforms_type_size, (nir_lower_io_options)0);
+      NIR_PASS_V(nir[i], nir_lower_uniforms_to_ubo, true, false);
 
-                /* before buffers and vars_to_ssa */
-                NIR_PASS_V(nir[i], gl_nir_lower_images, true);
+      /* before buffers and vars_to_ssa */
+      NIR_PASS_V(nir[i], gl_nir_lower_images, true);
 
-                NIR_PASS_V(nir[i], gl_nir_lower_buffers, prog);
-                NIR_PASS_V(nir[i], nir_opt_constant_folding);
+      NIR_PASS_V(nir[i], gl_nir_lower_buffers, prog);
+      NIR_PASS_V(nir[i], nir_opt_constant_folding);
 
-                struct panfrost_compile_inputs inputs = {
-                        .gpu_id = gpu_id,
-                        .fixed_sysval_ubo = -1,
-                };
-                struct pan_shader_info info = { 0 };
+      struct panfrost_compile_inputs inputs = {
+         .gpu_id = gpu_id,
+         .fixed_sysval_ubo = -1,
+      };
+      struct pan_shader_info info = {0};
 
-                util_dynarray_clear(&binary);
-                bifrost_compile_shader_nir(nir[i], &inputs, &binary, &info);
+      util_dynarray_clear(&binary);
+      bifrost_compile_shader_nir(nir[i], &inputs, &binary, &info);
 
-                char *fn = NULL;
-                asprintf(&fn, "shader_%u.bin", i);
-                assert(fn != NULL);
-                FILE *fp = fopen(fn, "wb");
-                fwrite(binary.data, 1, binary.size, fp);
-                fclose(fp);
-                free(fn);
-        }
+      char *fn = NULL;
+      asprintf(&fn, "shader_%u.bin", i);
+      assert(fn != NULL);
+      FILE *fp = fopen(fn, "wb");
+      fwrite(binary.data, 1, binary.size, fp);
+      fclose(fp);
+      free(fn);
+   }
 
-        util_dynarray_fini(&binary);
+   util_dynarray_fini(&binary);
 }
 
-#define BI_FOURCC(ch0, ch1, ch2, ch3) ( \
-  (uint32_t)(ch0)        | (uint32_t)(ch1) << 8 | \
-  (uint32_t)(ch2) << 16  | (uint32_t)(ch3) << 24)
+#define BI_FOURCC(ch0, ch1, ch2, ch3)                                          \
+   ((uint32_t)(ch0) | (uint32_t)(ch1) << 8 | (uint32_t)(ch2) << 16 |           \
+    (uint32_t)(ch3) << 24)
 
 static void
 disassemble(const char *filename)
 {
-        FILE *fp = fopen(filename, "rb");
-        assert(fp);
+   FILE *fp = fopen(filename, "rb");
+   assert(fp);
 
-        fseek(fp, 0, SEEK_END);
-        unsigned filesize = ftell(fp);
-        rewind(fp);
+   fseek(fp, 0, SEEK_END);
+   unsigned filesize = ftell(fp);
+   rewind(fp);
 
-        uint32_t *code = malloc(filesize);
-        unsigned res = fread(code, 1, filesize, fp);
-        if (res != filesize) {
-                printf("Couldn't read full file\n");
-        }
+   uint32_t *code = malloc(filesize);
+   unsigned res = fread(code, 1, filesize, fp);
+   if (res != filesize) {
+      printf("Couldn't read full file\n");
+   }
 
-        fclose(fp);
+   fclose(fp);
 
-        void *entrypoint = code;
+   void *entrypoint = code;
 
-        if (filesize && code[0] == BI_FOURCC('M', 'B', 'S', '2')) {
-                for (int i = 0; i < filesize / 4; ++i) {
-                        if (code[i] != BI_FOURCC('O', 'B', 'J', 'C'))
-                                continue;
+   if (filesize && code[0] == BI_FOURCC('M', 'B', 'S', '2')) {
+      for (int i = 0; i < filesize / 4; ++i) {
+         if (code[i] != BI_FOURCC('O', 'B', 'J', 'C'))
+            continue;
 
-                        unsigned size = code[i + 1];
-                        unsigned offset = i + 2;
+         unsigned size = code[i + 1];
+         unsigned offset = i + 2;
 
-                        entrypoint = code + offset;
-                        filesize = size;
-                }
-        }
+         entrypoint = code + offset;
+         filesize = size;
+      }
+   }
 
-        if ((gpu_id >> 12) >= 9)
-                disassemble_valhall(stdout, entrypoint, filesize, verbose);
-        else
-                disassemble_bifrost(stdout, entrypoint, filesize, verbose);
+   if ((gpu_id >> 12) >= 9)
+      disassemble_valhall(stdout, entrypoint, filesize, verbose);
+   else
+      disassemble_bifrost(stdout, entrypoint, filesize, verbose);
 
-        free(code);
+   free(code);
 }
 
 int
 main(int argc, char **argv)
 {
-        int c;
+   int c;
 
-        if (argc < 2) {
-                printf("Pass a command\n");
-                exit(1);
-        }
+   if (argc < 2) {
+      printf("Pass a command\n");
+      exit(1);
+   }
 
-        static struct option longopts[] = {
-                { "id", optional_argument, NULL, 'i' },
-                { "gpu", optional_argument, NULL, 'g' },
-                { "verbose", no_argument, &verbose, 'v' },
-                { NULL, 0, NULL, 0 }
-        };
+   static struct option longopts[] = {{"id", optional_argument, NULL, 'i'},
+                                      {"gpu", optional_argument, NULL, 'g'},
+                                      {"verbose", no_argument, &verbose, 'v'},
+                                      {NULL, 0, NULL, 0}};
 
-        static struct {
-                const char *name;
-                unsigned major, minor;
-        } gpus[] = {
-                { "G71",   6, 0 },
-                { "G72",   6, 2 },
-                { "G51",   7, 0 },
-                { "G76",   7, 1 },
-                { "G52",   7, 2 },
-                { "G31",   7, 3 },
-                { "G77",   9, 0 },
-                { "G57",   9, 1 },
-                { "G78",   9, 2 },
-                { "G57",   9, 3 },
-                { "G68",   9, 4 },
-                { "G78AE", 9, 5 },
-        };
+   static struct {
+      const char *name;
+      unsigned major, minor;
+   } gpus[] = {
+      {"G71", 6, 0}, {"G72", 6, 2}, {"G51", 7, 0}, {"G76", 7, 1},
+      {"G52", 7, 2}, {"G31", 7, 3}, {"G77", 9, 0}, {"G57", 9, 1},
+      {"G78", 9, 2}, {"G57", 9, 3}, {"G68", 9, 4}, {"G78AE", 9, 5},
+   };
 
-        while ((c = getopt_long(argc, argv, "v:", longopts, NULL)) != -1) {
+   while ((c = getopt_long(argc, argv, "v:", longopts, NULL)) != -1) {
 
-                switch (c) {
-                case 'i':
-                        gpu_id = atoi(optarg);
+      switch (c) {
+      case 'i':
+         gpu_id = atoi(optarg);
 
-                        if (!gpu_id) {
-                                fprintf(stderr, "Expected GPU ID, got %s\n", optarg);
-                                return 1;
-                        }
+         if (!gpu_id) {
+            fprintf(stderr, "Expected GPU ID, got %s\n", optarg);
+            return 1;
+         }
 
-                        break;
-                case 'g':
-                        gpu_id = 0;
+         break;
+      case 'g':
+         gpu_id = 0;
 
-                        /* Compatibility with the Arm compiler */
-                        if (strncmp(optarg, "Mali-", 5) == 0) optarg += 5;
+         /* Compatibility with the Arm compiler */
+         if (strncmp(optarg, "Mali-", 5) == 0)
+            optarg += 5;
 
-                        for (unsigned i = 0; i < ARRAY_SIZE(gpus); ++i) {
-                                if (strcmp(gpus[i].name, optarg)) continue;
+         for (unsigned i = 0; i < ARRAY_SIZE(gpus); ++i) {
+            if (strcmp(gpus[i].name, optarg))
+               continue;
 
-                                unsigned major = gpus[i].major;
-                                unsigned minor = gpus[i].minor;
+            unsigned major = gpus[i].major;
+            unsigned minor = gpus[i].minor;
 
-                                gpu_id = (major << 12) | (minor << 8);
-                                break;
-                        }
+            gpu_id = (major << 12) | (minor << 8);
+            break;
+         }
 
-                        if (!gpu_id) {
-                                fprintf(stderr, "Unknown GPU %s\n", optarg);
-                                return 1;
-                        }
+         if (!gpu_id) {
+            fprintf(stderr, "Unknown GPU %s\n", optarg);
+            return 1;
+         }
 
-                        break;
-                default:
-                        break;
-                }
-        }
+         break;
+      default:
+         break;
+      }
+   }
 
-        if (strcmp(argv[optind], "compile") == 0)
-                compile_shader(argc - optind - 1, &argv[optind + 1]);
-        else if (strcmp(argv[optind], "disasm") == 0)
-                disassemble(argv[optind + 1]);
-        else {
-                fprintf(stderr, "Unknown command. Valid: compile/disasm\n");
-                return 1;
-        }
+   if (strcmp(argv[optind], "compile") == 0)
+      compile_shader(argc - optind - 1, &argv[optind + 1]);
+   else if (strcmp(argv[optind], "disasm") == 0)
+      disassemble(argv[optind + 1]);
+   else {
+      fprintf(stderr, "Unknown command. Valid: compile/disasm\n");
+      return 1;
+   }
 
-        return 0;
+   return 0;
 }
diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h
index 1502560bd18..cae4dd2e351 100644
--- a/src/panfrost/bifrost/compiler.h
+++ b/src/panfrost/bifrost/compiler.h
@@ -27,13 +27,13 @@
 #ifndef __BIFROST_COMPILER_H
 #define __BIFROST_COMPILER_H
 
-#include "bifrost.h"
-#include "bi_opcodes.h"
 #include "compiler/nir/nir.h"
 #include "panfrost/util/pan_ir.h"
-#include "util/u_math.h"
 #include "util/half_float.h"
+#include "util/u_math.h"
 #include "util/u_worklist.h"
+#include "bi_opcodes.h"
+#include "bifrost.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -49,25 +49,25 @@ extern "C" {
  */
 
 enum bi_swizzle {
-        /* 16-bit swizzle ordering deliberate for fast compute */
-        BI_SWIZZLE_H00 = 0, /* = B0101 */
-        BI_SWIZZLE_H01 = 1, /* = B0123 = W0 */
-        BI_SWIZZLE_H10 = 2, /* = B2301 */
-        BI_SWIZZLE_H11 = 3, /* = B2323 */
+   /* 16-bit swizzle ordering deliberate for fast compute */
+   BI_SWIZZLE_H00 = 0, /* = B0101 */
+   BI_SWIZZLE_H01 = 1, /* = B0123 = W0 */
+   BI_SWIZZLE_H10 = 2, /* = B2301 */
+   BI_SWIZZLE_H11 = 3, /* = B2323 */
 
-        /* replication order should be maintained for fast compute */
-        BI_SWIZZLE_B0000 = 4, /* single channel (replicate) */
-        BI_SWIZZLE_B1111 = 5,
-        BI_SWIZZLE_B2222 = 6,
-        BI_SWIZZLE_B3333 = 7,
+   /* replication order should be maintained for fast compute */
+   BI_SWIZZLE_B0000 = 4, /* single channel (replicate) */
+   BI_SWIZZLE_B1111 = 5,
+   BI_SWIZZLE_B2222 = 6,
+   BI_SWIZZLE_B3333 = 7,
 
-        /* totally special for explicit pattern matching */
-        BI_SWIZZLE_B0011 = 8, /* +SWZ.v4i8 */
-        BI_SWIZZLE_B2233 = 9, /* +SWZ.v4i8 */
-        BI_SWIZZLE_B1032 = 10, /* +SWZ.v4i8 */
-        BI_SWIZZLE_B3210 = 11, /* +SWZ.v4i8 */
+   /* totally special for explicit pattern matching */
+   BI_SWIZZLE_B0011 = 8,  /* +SWZ.v4i8 */
+   BI_SWIZZLE_B2233 = 9,  /* +SWZ.v4i8 */
+   BI_SWIZZLE_B1032 = 10, /* +SWZ.v4i8 */
+   BI_SWIZZLE_B3210 = 11, /* +SWZ.v4i8 */
 
-        BI_SWIZZLE_B0022 = 12, /* for b02 lanes */
+   BI_SWIZZLE_B0022 = 12, /* for b02 lanes */
 };
 
 /* Given a packed i16vec2/i8vec4 constant, apply a swizzle. Useful for constant
@@ -76,26 +76,39 @@ enum bi_swizzle {
 static inline uint32_t
 bi_apply_swizzle(uint32_t value, enum bi_swizzle swz)
 {
-   const uint16_t *h = (const uint16_t *) &value;
-   const uint8_t  *b = (const uint8_t *) &value;
+   const uint16_t *h = (const uint16_t *)&value;
+   const uint8_t *b = (const uint8_t *)&value;
 
-#define H(h0, h1) (h[h0] | (h[h1] << 16))
+#define H(h0, h1)         (h[h0] | (h[h1] << 16))
 #define B(b0, b1, b2, b3) (b[b0] | (b[b1] << 8) | (b[b2] << 16) | (b[b3] << 24))
 
    switch (swz) {
-   case BI_SWIZZLE_H00: return H(0, 0);
-   case BI_SWIZZLE_H01: return H(0, 1);
-   case BI_SWIZZLE_H10: return H(1, 0);
-   case BI_SWIZZLE_H11: return H(1, 1);
-   case BI_SWIZZLE_B0000: return B(0, 0, 0, 0);
-   case BI_SWIZZLE_B1111: return B(1, 1, 1, 1);
-   case BI_SWIZZLE_B2222: return B(2, 2, 2, 2);
-   case BI_SWIZZLE_B3333: return B(3, 3, 3, 3);
-   case BI_SWIZZLE_B0011: return B(0, 0, 1, 1);
-   case BI_SWIZZLE_B2233: return B(2, 2, 3, 3);
-   case BI_SWIZZLE_B1032: return B(1, 0, 3, 2);
-   case BI_SWIZZLE_B3210: return B(3, 2, 1, 0);
-   case BI_SWIZZLE_B0022: return B(0, 0, 2, 2);
+   case BI_SWIZZLE_H00:
+      return H(0, 0);
+   case BI_SWIZZLE_H01:
+      return H(0, 1);
+   case BI_SWIZZLE_H10:
+      return H(1, 0);
+   case BI_SWIZZLE_H11:
+      return H(1, 1);
+   case BI_SWIZZLE_B0000:
+      return B(0, 0, 0, 0);
+   case BI_SWIZZLE_B1111:
+      return B(1, 1, 1, 1);
+   case BI_SWIZZLE_B2222:
+      return B(2, 2, 2, 2);
+   case BI_SWIZZLE_B3333:
+      return B(3, 3, 3, 3);
+   case BI_SWIZZLE_B0011:
+      return B(0, 0, 1, 1);
+   case BI_SWIZZLE_B2233:
+      return B(2, 2, 3, 3);
+   case BI_SWIZZLE_B1032:
+      return B(1, 0, 3, 2);
+   case BI_SWIZZLE_B3210:
+      return B(3, 2, 1, 0);
+   case BI_SWIZZLE_B0022:
+      return B(0, 0, 2, 2);
    }
 
 #undef H
@@ -105,148 +118,148 @@ bi_apply_swizzle(uint32_t value, enum bi_swizzle swz)
 }
 
 enum bi_index_type {
-        BI_INDEX_NULL = 0,
-        BI_INDEX_NORMAL = 1,
-        BI_INDEX_REGISTER = 2,
-        BI_INDEX_CONSTANT = 3,
-        BI_INDEX_PASS = 4,
-        BI_INDEX_FAU = 5
+   BI_INDEX_NULL = 0,
+   BI_INDEX_NORMAL = 1,
+   BI_INDEX_REGISTER = 2,
+   BI_INDEX_CONSTANT = 3,
+   BI_INDEX_PASS = 4,
+   BI_INDEX_FAU = 5
 };
 
 typedef struct {
-        uint32_t value;
+   uint32_t value;
 
-        /* modifiers, should only be set if applicable for a given instruction.
-         * For *IDP.v4i8, abs plays the role of sign. For bitwise ops where
-         * applicable, neg plays the role of not */
-        bool abs : 1;
-        bool neg : 1;
+   /* modifiers, should only be set if applicable for a given instruction.
+    * For *IDP.v4i8, abs plays the role of sign. For bitwise ops where
+    * applicable, neg plays the role of not */
+   bool abs : 1;
+   bool neg : 1;
 
-        /* The last use of a value, should be purged from the register cache.
-         * Set by liveness analysis. */
-        bool discard : 1;
+   /* The last use of a value, should be purged from the register cache.
+    * Set by liveness analysis. */
+   bool discard : 1;
 
-        /* For a source, the swizzle. For a destination, acts a bit like a
-         * write mask. Identity for the full 32-bit, H00 for only caring about
-         * the lower half, other values unused. */
-        enum bi_swizzle swizzle : 4;
-        uint32_t offset : 3;
-        enum bi_index_type type : 3;
+   /* For a source, the swizzle. For a destination, acts a bit like a
+    * write mask. Identity for the full 32-bit, H00 for only caring about
+    * the lower half, other values unused. */
+   enum bi_swizzle swizzle : 4;
+   uint32_t offset         : 3;
+   enum bi_index_type type : 3;
 
-        /* Must be zeroed so we can hash the whole 64-bits at a time */
-        unsigned padding : (32 - 13);
+   /* Must be zeroed so we can hash the whole 64-bits at a time */
+   unsigned padding : (32 - 13);
 } bi_index;
 
 static inline bi_index
 bi_get_index(unsigned value)
 {
-        return (bi_index) {
-                .value = value,
-                .swizzle = BI_SWIZZLE_H01,
-                .type = BI_INDEX_NORMAL,
-        };
+   return (bi_index){
+      .value = value,
+      .swizzle = BI_SWIZZLE_H01,
+      .type = BI_INDEX_NORMAL,
+   };
 }
 
 static inline bi_index
 bi_register(unsigned reg)
 {
-        assert(reg < 64);
+   assert(reg < 64);
 
-        return (bi_index) {
-                .value = reg,
-                .swizzle = BI_SWIZZLE_H01,
-                .type = BI_INDEX_REGISTER,
-        };
+   return (bi_index){
+      .value = reg,
+      .swizzle = BI_SWIZZLE_H01,
+      .type = BI_INDEX_REGISTER,
+   };
 }
 
 static inline bi_index
 bi_imm_u32(uint32_t imm)
 {
-        return (bi_index) {
-                .value = imm,
-                .swizzle = BI_SWIZZLE_H01,
-                .type = BI_INDEX_CONSTANT,
-        };
+   return (bi_index){
+      .value = imm,
+      .swizzle = BI_SWIZZLE_H01,
+      .type = BI_INDEX_CONSTANT,
+   };
 }
 
 static inline bi_index
 bi_imm_f32(float imm)
 {
-        return bi_imm_u32(fui(imm));
+   return bi_imm_u32(fui(imm));
 }
 
 static inline bi_index
 bi_null()
 {
-        return (bi_index) { .type = BI_INDEX_NULL };
+   return (bi_index){.type = BI_INDEX_NULL};
 }
 
 static inline bi_index
 bi_zero()
 {
-        return bi_imm_u32(0);
+   return bi_imm_u32(0);
 }
 
 static inline bi_index
 bi_passthrough(enum bifrost_packed_src value)
 {
-        return (bi_index) {
-                .value = value,
-                .swizzle = BI_SWIZZLE_H01,
-                .type = BI_INDEX_PASS,
-        };
+   return (bi_index){
+      .value = value,
+      .swizzle = BI_SWIZZLE_H01,
+      .type = BI_INDEX_PASS,
+   };
 }
 
 /* Helps construct swizzles */
 static inline bi_index
 bi_swz_16(bi_index idx, bool x, bool y)
 {
-        assert(idx.swizzle == BI_SWIZZLE_H01);
-        idx.swizzle = (enum bi_swizzle)(BI_SWIZZLE_H00 | (x << 1) | y);
-        return idx;
+   assert(idx.swizzle == BI_SWIZZLE_H01);
+   idx.swizzle = (enum bi_swizzle)(BI_SWIZZLE_H00 | (x << 1) | y);
+   return idx;
 }
 
 static inline bi_index
 bi_half(bi_index idx, bool upper)
 {
-        return bi_swz_16(idx, upper, upper);
+   return bi_swz_16(idx, upper, upper);
 }
 
 static inline bi_index
 bi_byte(bi_index idx, unsigned lane)
 {
-        assert(idx.swizzle == BI_SWIZZLE_H01);
-        assert(lane < 4);
-        idx.swizzle = (enum bi_swizzle)(BI_SWIZZLE_B0000 + lane);
-        return idx;
+   assert(idx.swizzle == BI_SWIZZLE_H01);
+   assert(lane < 4);
+   idx.swizzle = (enum bi_swizzle)(BI_SWIZZLE_B0000 + lane);
+   return idx;
 }
 
 static inline bi_index
 bi_abs(bi_index idx)
 {
-        idx.abs = true;
-        return idx;
+   idx.abs = true;
+   return idx;
 }
 
 static inline bi_index
 bi_neg(bi_index idx)
 {
-        idx.neg ^= true;
-        return idx;
+   idx.neg ^= true;
+   return idx;
 }
 
 static inline bi_index
 bi_discard(bi_index idx)
 {
-        idx.discard = true;
-        return idx;
+   idx.discard = true;
+   return idx;
 }
 
 /* Additive identity in IEEE 754 arithmetic */
 static inline bi_index
 bi_negzero()
 {
-        return bi_neg(bi_zero());
+   return bi_neg(bi_zero());
 }
 
 /* Replaces an index, preserving any modifiers */
@@ -254,11 +267,11 @@ bi_negzero()
 static inline bi_index
 bi_replace_index(bi_index old, bi_index replacement)
 {
-        replacement.abs = old.abs;
-        replacement.neg = old.neg;
-        replacement.swizzle = old.swizzle;
-        replacement.discard = false; /* needs liveness analysis to set */
-        return replacement;
+   replacement.abs = old.abs;
+   replacement.neg = old.neg;
+   replacement.swizzle = old.swizzle;
+   replacement.discard = false; /* needs liveness analysis to set */
+   return replacement;
 }
 
 /* Remove any modifiers. This has the property:
@@ -270,9 +283,9 @@ bi_replace_index(bi_index old, bi_index replacement)
 static inline bi_index
 bi_strip_index(bi_index index)
 {
-        index.abs = index.neg = false;
-        index.swizzle = BI_SWIZZLE_H01;
-        return index;
+   index.abs = index.neg = false;
+   index.swizzle = BI_SWIZZLE_H01;
+   return index;
 }
 
 /* For bitwise instructions */
@@ -281,40 +294,40 @@ bi_strip_index(bi_index index)
 static inline bi_index
 bi_imm_u8(uint8_t imm)
 {
-        return bi_byte(bi_imm_u32(imm), 0);
+   return bi_byte(bi_imm_u32(imm), 0);
 }
 
 static inline bi_index
 bi_imm_u16(uint16_t imm)
 {
-        return bi_half(bi_imm_u32(imm), false);
+   return bi_half(bi_imm_u32(imm), false);
 }
 
 static inline bi_index
 bi_imm_uintN(uint32_t imm, unsigned sz)
 {
-        assert(sz == 8 || sz == 16 || sz == 32);
-        return (sz == 8) ? bi_imm_u8(imm) :
-                (sz == 16) ? bi_imm_u16(imm) :
-                bi_imm_u32(imm);
+   assert(sz == 8 || sz == 16 || sz == 32);
+   return (sz == 8)    ? bi_imm_u8(imm)
+          : (sz == 16) ? bi_imm_u16(imm)
+                       : bi_imm_u32(imm);
 }
 
 static inline bi_index
 bi_imm_f16(float imm)
 {
-        return bi_imm_u16(_mesa_float_to_half(imm));
+   return bi_imm_u16(_mesa_float_to_half(imm));
 }
 
 static inline bool
 bi_is_null(bi_index idx)
 {
-        return idx.type == BI_INDEX_NULL;
+   return idx.type == BI_INDEX_NULL;
 }
 
 static inline bool
 bi_is_ssa(bi_index idx)
 {
-        return idx.type == BI_INDEX_NORMAL;
+   return idx.type == BI_INDEX_NORMAL;
 }
 
 /* Compares equivalence as references. Does not compare offsets, swizzles, or
@@ -324,8 +337,7 @@ bi_is_ssa(bi_index idx)
 static inline bool
 bi_is_equiv(bi_index left, bi_index right)
 {
-        return (left.type == right.type) &&
-                (left.value == right.value);
+   return (left.type == right.type) && (left.value == right.value);
 }
 
 /* A stronger equivalence relation that requires the indices access the
@@ -335,7 +347,7 @@ bi_is_equiv(bi_index left, bi_index right)
 static inline bool
 bi_is_word_equiv(bi_index left, bi_index right)
 {
-        return bi_is_equiv(left, right) && left.offset == right.offset;
+   return bi_is_equiv(left, right) && left.offset == right.offset;
 }
 
 /* An even stronger equivalence that checks if indices correspond to the
@@ -344,207 +356,203 @@ bi_is_word_equiv(bi_index left, bi_index right)
 static inline bool
 bi_is_value_equiv(bi_index left, bi_index right)
 {
-        if (left.type == BI_INDEX_CONSTANT && right.type == BI_INDEX_CONSTANT) {
-                return (bi_apply_swizzle(left.value, left.swizzle) ==
-                        bi_apply_swizzle(right.value, right.swizzle)) &&
-                       (left.abs == right.abs) &&
-                       (left.neg == right.neg);
-        } else {
-                return (left.value == right.value) &&
-                       (left.abs == right.abs) &&
-                       (left.neg == right.neg) &&
-                       (left.swizzle == right.swizzle) &&
-                       (left.offset == right.offset) &&
-                       (left.type == right.type);
-        }
+   if (left.type == BI_INDEX_CONSTANT && right.type == BI_INDEX_CONSTANT) {
+      return (bi_apply_swizzle(left.value, left.swizzle) ==
+              bi_apply_swizzle(right.value, right.swizzle)) &&
+             (left.abs == right.abs) && (left.neg == right.neg);
+   } else {
+      return (left.value == right.value) && (left.abs == right.abs) &&
+             (left.neg == right.neg) && (left.swizzle == right.swizzle) &&
+             (left.offset == right.offset) && (left.type == right.type);
+   }
 }
 
-#define BI_MAX_VEC 8
+#define BI_MAX_VEC   8
 #define BI_MAX_DESTS 4
-#define BI_MAX_SRCS 6
+#define BI_MAX_SRCS  6
 
 typedef struct {
-        /* Must be first */
-        struct list_head link;
-        bi_index *dest;
-        bi_index *src;
+   /* Must be first */
+   struct list_head link;
+   bi_index *dest;
+   bi_index *src;
 
-        enum bi_opcode op;
-        uint8_t nr_srcs;
-        uint8_t nr_dests;
+   enum bi_opcode op;
+   uint8_t nr_srcs;
+   uint8_t nr_dests;
 
-        union {
-                /* For a branch */
-                struct bi_block *branch_target;
+   union {
+      /* For a branch */
+      struct bi_block *branch_target;
 
-                /* For a phi node that hasn't been translated yet. This is only
-                 * used during NIR->BIR
-                 */
-                nir_phi_instr *phi;
-        };
+      /* For a phi node that hasn't been translated yet. This is only
+       * used during NIR->BIR
+       */
+      nir_phi_instr *phi;
+   };
 
-        /* These don't fit neatly with anything else.. */
-        enum bi_register_format register_format;
-        enum bi_vecsize vecsize;
+   /* These don't fit neatly with anything else.. */
+   enum bi_register_format register_format;
+   enum bi_vecsize vecsize;
 
-        /* Flow control associated with a Valhall instruction */
-        uint8_t flow;
+   /* Flow control associated with a Valhall instruction */
+   uint8_t flow;
 
-        /* Slot associated with a message-passing instruction */
-        uint8_t slot;
+   /* Slot associated with a message-passing instruction */
+   uint8_t slot;
 
-        /* Can we spill the value written here? Used to prevent
-         * useless double fills */
-        bool no_spill;
+   /* Can we spill the value written here? Used to prevent
+    * useless double fills */
+   bool no_spill;
 
-        /* On Bifrost: A value of bi_table to override the table, inducing a
-         * DTSEL_IMM pair if nonzero.
-         *
-         * On Valhall: the table index to use for resource instructions.
-         *
-         * These two interpretations are equivalent if you squint a bit.
-         */
-        unsigned table;
+   /* On Bifrost: A value of bi_table to override the table, inducing a
+    * DTSEL_IMM pair if nonzero.
+    *
+    * On Valhall: the table index to use for resource instructions.
+    *
+    * These two interpretations are equivalent if you squint a bit.
+    */
+   unsigned table;
 
-        /* Everything after this MUST NOT be accessed directly, since
-         * interpretation depends on opcodes */
+   /* Everything after this MUST NOT be accessed directly, since
+    * interpretation depends on opcodes */
 
-        /* Destination modifiers */
-        union {
-                enum bi_clamp clamp;
-                bool saturate;
-                bool not_result;
-                unsigned dest_mod;
-        };
+   /* Destination modifiers */
+   union {
+      enum bi_clamp clamp;
+      bool saturate;
+      bool not_result;
+      unsigned dest_mod;
+   };
 
-        /* Immediates. All seen alone in an instruction, except for varying/texture
-         * which are specified jointly for VARTEX */
-        union {
-                uint32_t shift;
-                uint32_t fill;
-                uint32_t index;
-                uint32_t attribute_index;
+   /* Immediates. All seen alone in an instruction, except for varying/texture
+    * which are specified jointly for VARTEX */
+   union {
+      uint32_t shift;
+      uint32_t fill;
+      uint32_t index;
+      uint32_t attribute_index;
 
-                struct {
-                        uint32_t varying_index;
-                        uint32_t sampler_index;
-                        uint32_t texture_index;
-                };
+      struct {
+         uint32_t varying_index;
+         uint32_t sampler_index;
+         uint32_t texture_index;
+      };
 
-                /* TEXC, ATOM_CX: # of staging registers used */
-                struct {
-                        uint32_t sr_count;
-                        uint32_t sr_count_2;
+      /* TEXC, ATOM_CX: # of staging registers used */
+      struct {
+         uint32_t sr_count;
+         uint32_t sr_count_2;
 
-                        union {
-                                /* Atomics effectively require all three */
-                                int32_t byte_offset;
+         union {
+            /* Atomics effectively require all three */
+            int32_t byte_offset;
 
-                                /* BLEND requires all three */
-                                int32_t branch_offset;
-                        };
-                };
-        };
+            /* BLEND requires all three */
+            int32_t branch_offset;
+         };
+      };
+   };
 
-        /* Modifiers specific to particular instructions are thrown in a union */
-        union {
-                enum bi_adj adj; /* FEXP_TABLE.u4 */
-                enum bi_atom_opc atom_opc; /* atomics */
-                enum bi_func func; /* FPOW_SC_DET */
-                enum bi_function function; /* LD_VAR_FLAT */
-                enum bi_mux mux; /* MUX */
-                enum bi_sem sem; /* FMAX, FMIN */
-                enum bi_source source; /* LD_GCLK */
-                bool scale; /* VN_ASST2, FSINCOS_OFFSET */
-                bool offset; /* FSIN_TABLE, FOCS_TABLE */
-                bool mask; /* CLZ */
-                bool threads; /* IMULD, IMOV_FMA */
-                bool combine; /* BRANCHC */
-                bool format; /* LEA_TEX */
+   /* Modifiers specific to particular instructions are thrown in a union */
+   union {
+      enum bi_adj adj;           /* FEXP_TABLE.u4 */
+      enum bi_atom_opc atom_opc; /* atomics */
+      enum bi_func func;         /* FPOW_SC_DET */
+      enum bi_function function; /* LD_VAR_FLAT */
+      enum bi_mux mux;           /* MUX */
+      enum bi_sem sem;           /* FMAX, FMIN */
+      enum bi_source source;     /* LD_GCLK */
+      bool scale;                /* VN_ASST2, FSINCOS_OFFSET */
+      bool offset;               /* FSIN_TABLE, FOCS_TABLE */
+      bool mask;                 /* CLZ */
+      bool threads;              /* IMULD, IMOV_FMA */
+      bool combine;              /* BRANCHC */
+      bool format;               /* LEA_TEX */
 
-                struct {
-                        enum bi_special special; /* FADD_RSCALE, FMA_RSCALE */
-                        enum bi_round round; /* FMA, converts, FADD, _RSCALE, etc */
-                        bool ftz; /* Flush-to-zero for F16_TO_F32 */
-                };
+      struct {
+         enum bi_special special; /* FADD_RSCALE, FMA_RSCALE */
+         enum bi_round round;     /* FMA, converts, FADD, _RSCALE, etc */
+         bool ftz;                /* Flush-to-zero for F16_TO_F32 */
+      };
 
-                struct {
-                        enum bi_result_type result_type; /* FCMP, ICMP */
-                        enum bi_cmpf cmpf; /* CSEL, FCMP, ICMP, BRANCH */
-                };
+      struct {
+         enum bi_result_type result_type; /* FCMP, ICMP */
+         enum bi_cmpf cmpf;               /* CSEL, FCMP, ICMP, BRANCH */
+      };
 
-                struct {
-                        enum bi_stack_mode stack_mode; /* JUMP_EX */
-                        bool test_mode;
-                };
+      struct {
+         enum bi_stack_mode stack_mode; /* JUMP_EX */
+         bool test_mode;
+      };
 
-                struct {
-                        enum bi_seg seg; /* LOAD, STORE, SEG_ADD, SEG_SUB */
-                        bool preserve_null; /* SEG_ADD, SEG_SUB */
-                        enum bi_extend extend; /* LOAD, IMUL */
-                };
+      struct {
+         enum bi_seg seg;       /* LOAD, STORE, SEG_ADD, SEG_SUB */
+         bool preserve_null;    /* SEG_ADD, SEG_SUB */
+         enum bi_extend extend; /* LOAD, IMUL */
+      };
 
-                struct {
-                        enum bi_sample sample; /* VAR_TEX, LD_VAR */
-                        enum bi_update update; /* VAR_TEX, LD_VAR */
-                        enum bi_varying_name varying_name; /* LD_VAR_SPECIAL */
-                        bool skip; /* VAR_TEX, TEXS, TEXC */
-                        bool lod_mode; /* VAR_TEX, TEXS, implicitly for TEXC */
-                        enum bi_source_format source_format; /* LD_VAR_BUF */
+      struct {
+         enum bi_sample sample;             /* VAR_TEX, LD_VAR */
+         enum bi_update update;             /* VAR_TEX, LD_VAR */
+         enum bi_varying_name varying_name; /* LD_VAR_SPECIAL */
+         bool skip;                         /* VAR_TEX, TEXS, TEXC */
+         bool lod_mode; /* VAR_TEX, TEXS, implicitly for TEXC */
+         enum bi_source_format source_format; /* LD_VAR_BUF */
 
-                        /* Used for valhall texturing */
-                        bool shadow;
-                        bool texel_offset;
-                        bool array_enable;
-                        bool integer_coordinates;
-                        enum bi_fetch_component fetch_component;
-                        enum bi_va_lod_mode va_lod_mode;
-                        enum bi_dimension dimension;
-                        enum bi_write_mask write_mask;
-                };
+         /* Used for valhall texturing */
+         bool shadow;
+         bool texel_offset;
+         bool array_enable;
+         bool integer_coordinates;
+         enum bi_fetch_component fetch_component;
+         enum bi_va_lod_mode va_lod_mode;
+         enum bi_dimension dimension;
+         enum bi_write_mask write_mask;
+      };
 
-                /* Maximum size, for hashing */
-                unsigned flags[14];
+      /* Maximum size, for hashing */
+      unsigned flags[14];
 
-                struct {
-                        enum bi_subgroup subgroup; /* WMASK, CLPER */
-                        enum bi_inactive_result inactive_result; /* CLPER */
-                        enum bi_lane_op lane_op; /* CLPER */
-                };
+      struct {
+         enum bi_subgroup subgroup;               /* WMASK, CLPER */
+         enum bi_inactive_result inactive_result; /* CLPER */
+         enum bi_lane_op lane_op;                 /* CLPER */
+      };
 
-                struct {
-                        bool z; /* ZS_EMIT */
-                        bool stencil; /* ZS_EMIT */
-                };
+      struct {
+         bool z;       /* ZS_EMIT */
+         bool stencil; /* ZS_EMIT */
+      };
 
-                struct {
-                        bool h; /* VN_ASST1.f16 */
-                        bool l; /* VN_ASST1.f16 */
-                };
+      struct {
+         bool h; /* VN_ASST1.f16 */
+         bool l; /* VN_ASST1.f16 */
+      };
 
-                struct {
-                        bool bytes2; /* RROT_DOUBLE, FRSHIFT_DOUBLE */
-                        bool result_word;
-                        bool arithmetic; /* ARSHIFT_OR */
-                };
+      struct {
+         bool bytes2; /* RROT_DOUBLE, FRSHIFT_DOUBLE */
+         bool result_word;
+         bool arithmetic; /* ARSHIFT_OR */
+      };
 
-                struct {
-                        bool sqrt; /* FREXPM */
-                        bool log; /* FREXPM */
-                };
+      struct {
+         bool sqrt; /* FREXPM */
+         bool log;  /* FREXPM */
+      };
 
-                struct {
-                        enum bi_mode mode; /* FLOG_TABLE */
-                        enum bi_precision precision; /* FLOG_TABLE */
-                        bool divzero; /* FRSQ_APPROX, FRSQ */
-                };
-        };
+      struct {
+         enum bi_mode mode;           /* FLOG_TABLE */
+         enum bi_precision precision; /* FLOG_TABLE */
+         bool divzero;                /* FRSQ_APPROX, FRSQ */
+      };
+   };
 } bi_instr;
 
 static inline bool
 bi_is_staging_src(const bi_instr *I, unsigned s)
 {
-        return (s == 0 || s == 4) && bi_opcode_props[I->op].sr_read;
+   return (s == 0 || s == 4) && bi_opcode_props[I->op].sr_read;
 }
 
 /*
@@ -555,48 +563,48 @@ bi_is_staging_src(const bi_instr *I, unsigned s)
 static inline void
 bi_drop_dests(bi_instr *I, unsigned new_count)
 {
-        assert(new_count < I->nr_dests);
+   assert(new_count < I->nr_dests);
 
-        for (unsigned i = new_count; i < I->nr_dests; ++i)
-                I->dest[i] = bi_null();
+   for (unsigned i = new_count; i < I->nr_dests; ++i)
+      I->dest[i] = bi_null();
 
-        I->nr_dests = new_count;
+   I->nr_dests = new_count;
 }
 
 static inline void
 bi_drop_srcs(bi_instr *I, unsigned new_count)
 {
-        assert(new_count < I->nr_srcs);
+   assert(new_count < I->nr_srcs);
 
-        for (unsigned i = new_count; i < I->nr_srcs; ++i)
-                I->src[i] = bi_null();
+   for (unsigned i = new_count; i < I->nr_srcs; ++i)
+      I->src[i] = bi_null();
 
-        I->nr_srcs = new_count;
+   I->nr_srcs = new_count;
 }
 
 static inline void
 bi_replace_src(bi_instr *I, unsigned src_index, bi_index replacement)
 {
-        I->src[src_index] = bi_replace_index(I->src[src_index], replacement);
+   I->src[src_index] = bi_replace_index(I->src[src_index], replacement);
 }
 
 /* Represents the assignment of slots for a given bi_tuple */
 
 typedef struct {
-        /* Register to assign to each slot */
-        unsigned slot[4];
+   /* Register to assign to each slot */
+   unsigned slot[4];
 
-        /* Read slots can be disabled */
-        bool enabled[2];
+   /* Read slots can be disabled */
+   bool enabled[2];
 
-        /* Configuration for slots 2/3 */
-        struct bifrost_reg_ctrl_23 slot23;
+   /* Configuration for slots 2/3 */
+   struct bifrost_reg_ctrl_23 slot23;
 
-        /* Fast-Access-Uniform RAM index */
-        uint8_t fau_idx;
+   /* Fast-Access-Uniform RAM index */
+   uint8_t fau_idx;
 
-        /* Whether writes are actually for the last instruction */
-        bool first_instruction;
+   /* Whether writes are actually for the last instruction */
+   bool first_instruction;
 } bi_registers;
 
 /* A bi_tuple contains two paired instruction pointers. If a slot is unfilled,
@@ -605,307 +613,307 @@ typedef struct {
  */
 
 typedef struct {
-        uint8_t fau_idx;
-        bi_registers regs;
-        bi_instr *fma;
-        bi_instr *add;
+   uint8_t fau_idx;
+   bi_registers regs;
+   bi_instr *fma;
+   bi_instr *add;
 } bi_tuple;
 
 struct bi_block;
 
 typedef struct {
-        struct list_head link;
+   struct list_head link;
 
-        /* Link back up for branch calculations */
-        struct bi_block *block;
+   /* Link back up for branch calculations */
+   struct bi_block *block;
 
-        /* Architectural limit of 8 tuples/clause */
-        unsigned tuple_count;
-        bi_tuple tuples[8];
+   /* Architectural limit of 8 tuples/clause */
+   unsigned tuple_count;
+   bi_tuple tuples[8];
 
-        /* For scoreboarding -- the clause ID (this is not globally unique!)
-         * and its dependencies in terms of other clauses, computed during
-         * scheduling and used when emitting code. Dependencies expressed as a
-         * bitfield matching the hardware, except shifted by a clause (the
-         * shift back to the ISA's off-by-one encoding is worked out when
-         * emitting clauses) */
-        unsigned scoreboard_id;
-        uint8_t dependencies;
+   /* For scoreboarding -- the clause ID (this is not globally unique!)
+    * and its dependencies in terms of other clauses, computed during
+    * scheduling and used when emitting code. Dependencies expressed as a
+    * bitfield matching the hardware, except shifted by a clause (the
+    * shift back to the ISA's off-by-one encoding is worked out when
+    * emitting clauses) */
+   unsigned scoreboard_id;
+   uint8_t dependencies;
 
-        /* See ISA header for description */
-        enum bifrost_flow flow_control;
+   /* See ISA header for description */
+   enum bifrost_flow flow_control;
 
-        /* Can we prefetch the next clause? Usually it makes sense, except for
-         * clauses ending in unconditional branches */
-        bool next_clause_prefetch;
+   /* Can we prefetch the next clause? Usually it makes sense, except for
+    * clauses ending in unconditional branches */
+   bool next_clause_prefetch;
 
-        /* Assigned data register */
-        unsigned staging_register;
+   /* Assigned data register */
+   unsigned staging_register;
 
-        /* Corresponds to the usual bit but shifted by a clause */
-        bool staging_barrier;
+   /* Corresponds to the usual bit but shifted by a clause */
+   bool staging_barrier;
 
-        /* Constants read by this clause. ISA limit. Must satisfy:
-         *
-         *      constant_count + tuple_count <= 13
-         *
-         * Also implicitly constant_count <= tuple_count since a tuple only
-         * reads a single constant.
-         */
-        uint64_t constants[8];
-        unsigned constant_count;
+   /* Constants read by this clause. ISA limit. Must satisfy:
+    *
+    *      constant_count + tuple_count <= 13
+    *
+    * Also implicitly constant_count <= tuple_count since a tuple only
+    * reads a single constant.
+    */
+   uint64_t constants[8];
+   unsigned constant_count;
 
-        /* Index of a constant to be PC-relative */
-        unsigned pcrel_idx;
+   /* Index of a constant to be PC-relative */
+   unsigned pcrel_idx;
 
-        /* Branches encode a constant offset relative to the program counter
-         * with some magic flags. By convention, if there is a branch, its
-         * constant will be last. Set this flag to indicate this is required.
-         */
-        bool branch_constant;
+   /* Branches encode a constant offset relative to the program counter
+    * with some magic flags. By convention, if there is a branch, its
+    * constant will be last. Set this flag to indicate this is required.
+    */
+   bool branch_constant;
 
-        /* Unique in a clause */
-        enum bifrost_message_type message_type;
-        bi_instr *message;
+   /* Unique in a clause */
+   enum bifrost_message_type message_type;
+   bi_instr *message;
 
-        /* Discard helper threads */
-        bool td;
+   /* Discard helper threads */
+   bool td;
 
-        /* Should flush-to-zero mode be enabled for this clause? */
-        bool ftz;
+   /* Should flush-to-zero mode be enabled for this clause? */
+   bool ftz;
 } bi_clause;
 
 #define BI_NUM_SLOTS 8
 
 /* A model for the state of the scoreboard */
 struct bi_scoreboard_state {
-        /** Bitmap of registers read/written by a slot */
-        uint64_t read[BI_NUM_SLOTS];
-        uint64_t write[BI_NUM_SLOTS];
+   /** Bitmap of registers read/written by a slot */
+   uint64_t read[BI_NUM_SLOTS];
+   uint64_t write[BI_NUM_SLOTS];
 
-        /* Nonregister dependencies present by a slot */
-        uint8_t varying : BI_NUM_SLOTS;
-        uint8_t memory : BI_NUM_SLOTS;
+   /* Nonregister dependencies present by a slot */
+   uint8_t varying : BI_NUM_SLOTS;
+   uint8_t memory : BI_NUM_SLOTS;
 };
 
 typedef struct bi_block {
-        /* Link to next block. Must be first for mir_get_block */
-        struct list_head link;
+   /* Link to next block. Must be first for mir_get_block */
+   struct list_head link;
 
-        /* List of instructions emitted for the current block */
-        struct list_head instructions;
+   /* List of instructions emitted for the current block */
+   struct list_head instructions;
 
-        /* Index of the block in source order */
-        unsigned index;
+   /* Index of the block in source order */
+   unsigned index;
 
-        /* Control flow graph */
-        struct bi_block *successors[2];
-        struct util_dynarray predecessors;
-        bool unconditional_jumps;
+   /* Control flow graph */
+   struct bi_block *successors[2];
+   struct util_dynarray predecessors;
+   bool unconditional_jumps;
 
-        /* Per 32-bit word live masks for the block indexed by node */
-        uint8_t *live_in;
-        uint8_t *live_out;
+   /* Per 32-bit word live masks for the block indexed by node */
+   uint8_t *live_in;
+   uint8_t *live_out;
 
-        /* Scalar liveness indexed by SSA index */
-        BITSET_WORD *ssa_live_in;
-        BITSET_WORD *ssa_live_out;
+   /* Scalar liveness indexed by SSA index */
+   BITSET_WORD *ssa_live_in;
+   BITSET_WORD *ssa_live_out;
 
-        /* If true, uses clauses; if false, uses instructions */
-        bool scheduled;
-        struct list_head clauses; /* list of bi_clause */
+   /* If true, uses clauses; if false, uses instructions */
+   bool scheduled;
+   struct list_head clauses; /* list of bi_clause */
 
-        /* Post-RA liveness */
-        uint64_t reg_live_in, reg_live_out;
+   /* Post-RA liveness */
+   uint64_t reg_live_in, reg_live_out;
 
-        /* Scoreboard state at the start/end of block */
-        struct bi_scoreboard_state scoreboard_in, scoreboard_out;
+   /* Scoreboard state at the start/end of block */
+   struct bi_scoreboard_state scoreboard_in, scoreboard_out;
 
-        /* On Valhall, indicates we need a terminal NOP to implement jumps to
-         * the end of the shader.
-         */
-        bool needs_nop;
+   /* On Valhall, indicates we need a terminal NOP to implement jumps to
+    * the end of the shader.
+    */
+   bool needs_nop;
 
-        /* Flags available for pass-internal use */
-        uint8_t pass_flags;
+   /* Flags available for pass-internal use */
+   uint8_t pass_flags;
 } bi_block;
 
 static inline unsigned
 bi_num_successors(bi_block *block)
 {
-        STATIC_ASSERT(ARRAY_SIZE(block->successors) == 2);
-        assert(block->successors[0] || !block->successors[1]);
+   STATIC_ASSERT(ARRAY_SIZE(block->successors) == 2);
+   assert(block->successors[0] || !block->successors[1]);
 
-        if (block->successors[1])
-                return 2;
-        else if (block->successors[0])
-                return 1;
-        else
-                return 0;
+   if (block->successors[1])
+      return 2;
+   else if (block->successors[0])
+      return 1;
+   else
+      return 0;
 }
 
 static inline unsigned
 bi_num_predecessors(bi_block *block)
 {
-        return util_dynarray_num_elements(&block->predecessors, bi_block *);
+   return util_dynarray_num_elements(&block->predecessors, bi_block *);
 }
 
 static inline bi_block *
 bi_start_block(struct list_head *blocks)
 {
-        bi_block *first = list_first_entry(blocks, bi_block, link);
-        assert(bi_num_predecessors(first) == 0);
-        return first;
+   bi_block *first = list_first_entry(blocks, bi_block, link);
+   assert(bi_num_predecessors(first) == 0);
+   return first;
 }
 
 static inline bi_block *
 bi_exit_block(struct list_head *blocks)
 {
-        bi_block *last = list_last_entry(blocks, bi_block, link);
-        assert(bi_num_successors(last) == 0);
-        return last;
+   bi_block *last = list_last_entry(blocks, bi_block, link);
+   assert(bi_num_successors(last) == 0);
+   return last;
 }
 
 static inline void
 bi_block_add_successor(bi_block *block, bi_block *successor)
 {
-        assert(block != NULL && successor != NULL);
+   assert(block != NULL && successor != NULL);
 
-        /* Cull impossible edges */
-        if (block->unconditional_jumps)
-                return;
+   /* Cull impossible edges */
+   if (block->unconditional_jumps)
+      return;
 
-        for (unsigned i = 0; i < ARRAY_SIZE(block->successors); ++i) {
-                if (block->successors[i]) {
-                       if (block->successors[i] == successor)
-                               return;
-                       else
-                               continue;
-                }
+   for (unsigned i = 0; i < ARRAY_SIZE(block->successors); ++i) {
+      if (block->successors[i]) {
+         if (block->successors[i] == successor)
+            return;
+         else
+            continue;
+      }
 
-                block->successors[i] = successor;
-                util_dynarray_append(&successor->predecessors, bi_block *, block);
-                return;
-        }
+      block->successors[i] = successor;
+      util_dynarray_append(&successor->predecessors, bi_block *, block);
+      return;
+   }
 
-        unreachable("Too many successors");
+   unreachable("Too many successors");
 }
 
 /* Subset of pan_shader_info needed per-variant, in order to support IDVS */
 struct bi_shader_info {
-        struct panfrost_ubo_push *push;
-        struct bifrost_shader_info *bifrost;
-        struct panfrost_sysvals *sysvals;
-        unsigned tls_size;
-        unsigned work_reg_count;
-        unsigned push_offset;
+   struct panfrost_ubo_push *push;
+   struct bifrost_shader_info *bifrost;
+   struct panfrost_sysvals *sysvals;
+   unsigned tls_size;
+   unsigned work_reg_count;
+   unsigned push_offset;
 };
 
 /* State of index-driven vertex shading for current shader */
 enum bi_idvs_mode {
-        /* IDVS not in use */
-        BI_IDVS_NONE = 0,
+   /* IDVS not in use */
+   BI_IDVS_NONE = 0,
 
-        /* IDVS in use. Compiling a position shader */
-        BI_IDVS_POSITION = 1,
+   /* IDVS in use. Compiling a position shader */
+   BI_IDVS_POSITION = 1,
 
-        /* IDVS in use. Compiling a varying shader */
-        BI_IDVS_VARYING = 2,
+   /* IDVS in use. Compiling a varying shader */
+   BI_IDVS_VARYING = 2,
 };
 
 typedef struct {
-       const struct panfrost_compile_inputs *inputs;
-       nir_shader *nir;
-       struct bi_shader_info info;
-       gl_shader_stage stage;
-       struct list_head blocks; /* list of bi_block */
-       struct hash_table_u64 *sysval_to_id;
-       uint32_t quirks;
-       unsigned arch;
-       enum bi_idvs_mode idvs;
-       unsigned num_blocks;
+   const struct panfrost_compile_inputs *inputs;
+   nir_shader *nir;
+   struct bi_shader_info info;
+   gl_shader_stage stage;
+   struct list_head blocks; /* list of bi_block */
+   struct hash_table_u64 *sysval_to_id;
+   uint32_t quirks;
+   unsigned arch;
+   enum bi_idvs_mode idvs;
+   unsigned num_blocks;
 
-       /* In any graphics shader, whether the "IDVS with memory
-        * allocation" flow is used. This affects how varyings are loaded and
-        * stored. Ignore for compute.
-        */
-       bool malloc_idvs;
+   /* In any graphics shader, whether the "IDVS with memory
+    * allocation" flow is used. This affects how varyings are loaded and
+    * stored. Ignore for compute.
+    */
+   bool malloc_idvs;
 
-       /* During NIR->BIR */
-       bi_block *current_block;
-       bi_block *after_block;
-       bi_block *break_block;
-       bi_block *continue_block;
-       bi_block **indexed_nir_blocks;
-       bool emitted_atest;
+   /* During NIR->BIR */
+   bi_block *current_block;
+   bi_block *after_block;
+   bi_block *break_block;
+   bi_block *continue_block;
+   bi_block **indexed_nir_blocks;
+   bool emitted_atest;
 
-       /* During NIR->BIR, the coverage bitmap. If this is NULL, the default
-        * coverage bitmap should be source from preloaded register r60. This is
-        * written by ATEST and ZS_EMIT
-        */
-       bi_index coverage;
+   /* During NIR->BIR, the coverage bitmap. If this is NULL, the default
+    * coverage bitmap should be source from preloaded register r60. This is
+    * written by ATEST and ZS_EMIT
+    */
+   bi_index coverage;
 
-       /* During NIR->BIR, table of preloaded registers, or NULL if never
-        * preloaded.
-        */
-       bi_index preloaded[64];
+   /* During NIR->BIR, table of preloaded registers, or NULL if never
+    * preloaded.
+    */
+   bi_index preloaded[64];
 
-       /* For creating temporaries */
-       unsigned ssa_alloc;
-       unsigned reg_alloc;
+   /* For creating temporaries */
+   unsigned ssa_alloc;
+   unsigned reg_alloc;
 
-       /* Mask of UBOs that need to be uploaded */
-       uint32_t ubo_mask;
+   /* Mask of UBOs that need to be uploaded */
+   uint32_t ubo_mask;
 
-       /* During instruction selection, map from vector bi_index to its scalar
-        * components, populated by a split.
-        */
-       struct hash_table_u64 *allocated_vec;
+   /* During instruction selection, map from vector bi_index to its scalar
+    * components, populated by a split.
+    */
+   struct hash_table_u64 *allocated_vec;
 
-       /* Stats for shader-db */
-       unsigned loop_count;
-       unsigned spills;
-       unsigned fills;
+   /* Stats for shader-db */
+   unsigned loop_count;
+   unsigned spills;
+   unsigned fills;
 } bi_context;
 
 static inline void
 bi_remove_instruction(bi_instr *ins)
 {
-        list_del(&ins->link);
+   list_del(&ins->link);
 }
 
 enum bir_fau {
-        BIR_FAU_ZERO = 0,
-        BIR_FAU_LANE_ID = 1,
-        BIR_FAU_WARP_ID = 2,
-        BIR_FAU_CORE_ID = 3,
-        BIR_FAU_FB_EXTENT = 4,
-        BIR_FAU_ATEST_PARAM = 5,
-        BIR_FAU_SAMPLE_POS_ARRAY = 6,
-        BIR_FAU_BLEND_0 = 8,
-        /* blend descs 1 - 7 */
-        BIR_FAU_TYPE_MASK = 15,
+   BIR_FAU_ZERO = 0,
+   BIR_FAU_LANE_ID = 1,
+   BIR_FAU_WARP_ID = 2,
+   BIR_FAU_CORE_ID = 3,
+   BIR_FAU_FB_EXTENT = 4,
+   BIR_FAU_ATEST_PARAM = 5,
+   BIR_FAU_SAMPLE_POS_ARRAY = 6,
+   BIR_FAU_BLEND_0 = 8,
+   /* blend descs 1 - 7 */
+   BIR_FAU_TYPE_MASK = 15,
 
-        /* Valhall only */
-        BIR_FAU_TLS_PTR = 16,
-        BIR_FAU_WLS_PTR = 17,
-        BIR_FAU_PROGRAM_COUNTER = 18,
+   /* Valhall only */
+   BIR_FAU_TLS_PTR = 16,
+   BIR_FAU_WLS_PTR = 17,
+   BIR_FAU_PROGRAM_COUNTER = 18,
 
-        BIR_FAU_UNIFORM = (1 << 7),
-        /* Look up table on Valhall */
-        BIR_FAU_IMMEDIATE = (1 << 8),
+   BIR_FAU_UNIFORM = (1 << 7),
+   /* Look up table on Valhall */
+   BIR_FAU_IMMEDIATE = (1 << 8),
 
 };
 
 static inline bi_index
 bi_fau(enum bir_fau value, bool hi)
 {
-        return (bi_index) {
-                .value = value,
-                .swizzle = BI_SWIZZLE_H01,
-                .offset = hi ? 1u : 0u,
-                .type = BI_INDEX_FAU,
-        };
+   return (bi_index){
+      .value = value,
+      .swizzle = BI_SWIZZLE_H01,
+      .offset = hi ? 1u : 0u,
+      .type = BI_INDEX_FAU,
+   };
 }
 
 /*
@@ -918,8 +926,7 @@ bi_fau(enum bir_fau value, bool hi)
 static inline bi_index
 va_lut(unsigned index)
 {
-        return bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | (index >> 1)),
-                      index & 1);
+   return bi_fau((enum bir_fau)(BIR_FAU_IMMEDIATE | (index >> 1)), index & 1);
 }
 
 /*
@@ -930,13 +937,13 @@ va_lut(unsigned index)
 static inline bi_index
 va_zero_lut()
 {
-        return va_lut(0);
+   return va_lut(0);
 }
 
 static inline bi_index
 bi_temp(bi_context *ctx)
 {
-        return bi_get_index(ctx->ssa_alloc++);
+   return bi_get_index(ctx->ssa_alloc++);
 }
 
 /* Inline constants automatically, will be lowered out by bi_lower_fau where a
@@ -946,113 +953,108 @@ bi_temp(bi_context *ctx)
 static inline bi_index
 bi_src_index(nir_src *src)
 {
-        if (nir_src_is_const(*src) && nir_src_bit_size(*src) <= 32) {
-                return bi_imm_u32(nir_src_as_uint(*src));
-        } else {
-                assert(src->is_ssa);
-                return bi_get_index(src->ssa->index);
-        }
+   if (nir_src_is_const(*src) && nir_src_bit_size(*src) <= 32) {
+      return bi_imm_u32(nir_src_as_uint(*src));
+   } else {
+      assert(src->is_ssa);
+      return bi_get_index(src->ssa->index);
+   }
 }
 
 static inline bi_index
 bi_dest_index(nir_dest *dst)
 {
-        assert(dst->is_ssa);
-        return bi_get_index(dst->ssa.index);
+   assert(dst->is_ssa);
+   return bi_get_index(dst->ssa.index);
 }
 
 /* Iterators for Bifrost IR */
 
-#define bi_foreach_block(ctx, v) \
-        list_for_each_entry(bi_block, v, &ctx->blocks, link)
+#define bi_foreach_block(ctx, v)                                               \
+   list_for_each_entry(bi_block, v, &ctx->blocks, link)
 
-#define bi_foreach_block_rev(ctx, v) \
-        list_for_each_entry_rev(bi_block, v, &ctx->blocks, link)
+#define bi_foreach_block_rev(ctx, v)                                           \
+   list_for_each_entry_rev(bi_block, v, &ctx->blocks, link)
 
-#define bi_foreach_block_from(ctx, from, v) \
-        list_for_each_entry_from(bi_block, v, from, &ctx->blocks, link)
+#define bi_foreach_block_from(ctx, from, v)                                    \
+   list_for_each_entry_from(bi_block, v, from, &ctx->blocks, link)
 
-#define bi_foreach_block_from_rev(ctx, from, v) \
-        list_for_each_entry_from_rev(bi_block, v, from, &ctx->blocks, link)
+#define bi_foreach_block_from_rev(ctx, from, v)                                \
+   list_for_each_entry_from_rev(bi_block, v, from, &ctx->blocks, link)
 
-#define bi_foreach_instr_in_block(block, v) \
-        list_for_each_entry(bi_instr, v, &(block)->instructions, link)
+#define bi_foreach_instr_in_block(block, v)                                    \
+   list_for_each_entry(bi_instr, v, &(block)->instructions, link)
 
-#define bi_foreach_instr_in_block_rev(block, v) \
-        list_for_each_entry_rev(bi_instr, v, &(block)->instructions, link)
+#define bi_foreach_instr_in_block_rev(block, v)                                \
+   list_for_each_entry_rev(bi_instr, v, &(block)->instructions, link)
 
-#define bi_foreach_instr_in_block_safe(block, v) \
-        list_for_each_entry_safe(bi_instr, v, &(block)->instructions, link)
+#define bi_foreach_instr_in_block_safe(block, v)                               \
+   list_for_each_entry_safe(bi_instr, v, &(block)->instructions, link)
 
-#define bi_foreach_instr_in_block_safe_rev(block, v) \
-        list_for_each_entry_safe_rev(bi_instr, v, &(block)->instructions, link)
+#define bi_foreach_instr_in_block_safe_rev(block, v)                           \
+   list_for_each_entry_safe_rev(bi_instr, v, &(block)->instructions, link)
 
-#define bi_foreach_instr_in_block_from(block, v, from) \
-        list_for_each_entry_from(bi_instr, v, from, &(block)->instructions, link)
+#define bi_foreach_instr_in_block_from(block, v, from)                         \
+   list_for_each_entry_from(bi_instr, v, from, &(block)->instructions, link)
 
-#define bi_foreach_instr_in_block_from_rev(block, v, from) \
-        list_for_each_entry_from_rev(bi_instr, v, from, &(block)->instructions, link)
+#define bi_foreach_instr_in_block_from_rev(block, v, from)                     \
+   list_for_each_entry_from_rev(bi_instr, v, from, &(block)->instructions, link)
 
-#define bi_foreach_clause_in_block(block, v) \
-        list_for_each_entry(bi_clause, v, &(block)->clauses, link)
+#define bi_foreach_clause_in_block(block, v)                                   \
+   list_for_each_entry(bi_clause, v, &(block)->clauses, link)
 
-#define bi_foreach_clause_in_block_rev(block, v) \
-        list_for_each_entry_rev(bi_clause, v, &(block)->clauses, link)
+#define bi_foreach_clause_in_block_rev(block, v)                               \
+   list_for_each_entry_rev(bi_clause, v, &(block)->clauses, link)
 
-#define bi_foreach_clause_in_block_safe(block, v) \
-        list_for_each_entry_safe(bi_clause, v, &(block)->clauses, link)
+#define bi_foreach_clause_in_block_safe(block, v)                              \
+   list_for_each_entry_safe(bi_clause, v, &(block)->clauses, link)
 
-#define bi_foreach_clause_in_block_from(block, v, from) \
-        list_for_each_entry_from(bi_clause, v, from, &(block)->clauses, link)
+#define bi_foreach_clause_in_block_from(block, v, from)                        \
+   list_for_each_entry_from(bi_clause, v, from, &(block)->clauses, link)
 
-#define bi_foreach_clause_in_block_from_rev(block, v, from) \
-        list_for_each_entry_from_rev(bi_clause, v, from, &(block)->clauses, link)
+#define bi_foreach_clause_in_block_from_rev(block, v, from)                    \
+   list_for_each_entry_from_rev(bi_clause, v, from, &(block)->clauses, link)
 
-#define bi_foreach_instr_global(ctx, v) \
-        bi_foreach_block(ctx, v_block) \
-                bi_foreach_instr_in_block(v_block, v)
+#define bi_foreach_instr_global(ctx, v)                                        \
+   bi_foreach_block(ctx, v_block)                                              \
+      bi_foreach_instr_in_block(v_block, v)
 
-#define bi_foreach_instr_global_rev(ctx, v) \
-        bi_foreach_block_rev(ctx, v_block) \
-                bi_foreach_instr_in_block_rev(v_block, v)
+#define bi_foreach_instr_global_rev(ctx, v)                                    \
+   bi_foreach_block_rev(ctx, v_block)                                          \
+      bi_foreach_instr_in_block_rev(v_block, v)
 
-#define bi_foreach_instr_global_safe(ctx, v) \
-        bi_foreach_block(ctx, v_block) \
-                bi_foreach_instr_in_block_safe(v_block, v)
+#define bi_foreach_instr_global_safe(ctx, v)                                   \
+   bi_foreach_block(ctx, v_block)                                              \
+      bi_foreach_instr_in_block_safe(v_block, v)
 
-#define bi_foreach_instr_global_rev_safe(ctx, v) \
-        bi_foreach_block_rev(ctx, v_block) \
-                bi_foreach_instr_in_block_rev_safe(v_block, v)
+#define bi_foreach_instr_global_rev_safe(ctx, v)                               \
+   bi_foreach_block_rev(ctx, v_block)                                          \
+   bi_foreach_instr_in_block_rev_safe(v_block, v)
 
-#define bi_foreach_instr_in_tuple(tuple, v) \
-        for (bi_instr *v = (tuple)->fma ?: (tuple)->add; \
-                        v != NULL; \
-                        v = (v == (tuple)->add) ? NULL : (tuple)->add)
+#define bi_foreach_instr_in_tuple(tuple, v)                                    \
+   for (bi_instr *v = (tuple)->fma ?: (tuple)->add; v != NULL;                 \
+        v = (v == (tuple)->add) ? NULL : (tuple)->add)
 
-#define bi_foreach_successor(blk, v) \
-        bi_block *v; \
-        bi_block **_v; \
-        for (_v = &blk->successors[0], \
-                v = *_v; \
-                v != NULL && _v < &blk->successors[2]; \
-                _v++, v = *_v) \
+#define bi_foreach_successor(blk, v)                                           \
+   bi_block *v;                                                                \
+   bi_block **_v;                                                              \
+   for (_v = &blk->successors[0], v = *_v;                                     \
+        v != NULL && _v < &blk->successors[2]; _v++, v = *_v)
 
-#define bi_foreach_predecessor(blk, v) \
-        util_dynarray_foreach(&(blk)->predecessors, bi_block *, v)
+#define bi_foreach_predecessor(blk, v)                                         \
+   util_dynarray_foreach(&(blk)->predecessors, bi_block *, v)
 
-#define bi_foreach_src(ins, v) \
-        for (unsigned v = 0; v < ins->nr_srcs; ++v)
+#define bi_foreach_src(ins, v) for (unsigned v = 0; v < ins->nr_srcs; ++v)
 
-#define bi_foreach_dest(ins, v) \
-        for (unsigned v = 0; v < ins->nr_dests; ++v)
+#define bi_foreach_dest(ins, v) for (unsigned v = 0; v < ins->nr_dests; ++v)
 
-#define bi_foreach_ssa_src(ins, v) \
-        for (unsigned v = 0; v < ins->nr_srcs; ++v) \
-                if (ins->src[v].type == BI_INDEX_NORMAL)
+#define bi_foreach_ssa_src(ins, v)                                             \
+   for (unsigned v = 0; v < ins->nr_srcs; ++v)                                 \
+      if (ins->src[v].type == BI_INDEX_NORMAL)
 
-#define bi_foreach_instr_and_src_in_tuple(tuple, ins, s) \
-        bi_foreach_instr_in_tuple(tuple, ins) \
-                bi_foreach_src(ins, s)
+#define bi_foreach_instr_and_src_in_tuple(tuple, ins, s)                       \
+   bi_foreach_instr_in_tuple(tuple, ins)                                       \
+      bi_foreach_src(ins, s)
 
 /*
  * Find the index of a predecessor, used as the implicit order of phi sources.
@@ -1060,39 +1062,40 @@ bi_dest_index(nir_dest *dst)
 static inline unsigned
 bi_predecessor_index(bi_block *succ, bi_block *pred)
 {
-        unsigned index = 0;
+   unsigned index = 0;
 
-        bi_foreach_predecessor(succ, x) {
-                if (*x == pred) return index;
+   bi_foreach_predecessor(succ, x) {
+      if (*x == pred)
+         return index;
 
-                index++;
-        }
+      index++;
+   }
 
-        unreachable("Invalid predecessor");
+   unreachable("Invalid predecessor");
 }
 
 static inline bi_instr *
 bi_prev_op(bi_instr *ins)
 {
-        return list_last_entry(&(ins->link), bi_instr, link);
+   return list_last_entry(&(ins->link), bi_instr, link);
 }
 
 static inline bi_instr *
 bi_next_op(bi_instr *ins)
 {
-        return list_first_entry(&(ins->link), bi_instr, link);
+   return list_first_entry(&(ins->link), bi_instr, link);
 }
 
 static inline bi_block *
 bi_next_block(bi_block *block)
 {
-        return list_first_entry(&(block->link), bi_block, link);
+   return list_first_entry(&(block->link), bi_block, link);
 }
 
 static inline bi_block *
 bi_entry_block(bi_context *ctx)
 {
-        return list_first_entry(&ctx->blocks, bi_block, link);
+   return list_first_entry(&ctx->blocks, bi_block, link);
 }
 
 /* BIR manipulation */
@@ -1102,7 +1105,7 @@ unsigned bi_count_read_registers(const bi_instr *ins, unsigned src);
 unsigned bi_count_write_registers(const bi_instr *ins, unsigned dest);
 bool bi_is_regfmt_16(enum bi_register_format fmt);
 unsigned bi_writemask(const bi_instr *ins, unsigned dest);
-bi_clause * bi_next_clause(bi_context *ctx, bi_block *block, bi_clause *clause);
+bi_clause *bi_next_clause(bi_context *ctx, bi_block *block, bi_clause *clause);
 bool bi_side_effects(const bi_instr *I);
 bool bi_reconverge_branches(bi_block *block);
 
@@ -1155,8 +1158,16 @@ bool bi_reads_t(bi_instr *ins, unsigned src);
 bool bi_validate_initialization(bi_context *ctx);
 void bi_validate(bi_context *ctx, const char *after_str);
 #else
-static inline bool bi_validate_initialization(UNUSED bi_context *ctx) { return true; }
-static inline void bi_validate(UNUSED bi_context *ctx, UNUSED const char *after_str) { return; }
+static inline bool
+bi_validate_initialization(UNUSED bi_context *ctx)
+{
+   return true;
+}
+static inline void
+bi_validate(UNUSED bi_context *ctx, UNUSED const char *after_str)
+{
+   return;
+}
 #endif
 
 uint32_t bi_fold_constant(bi_instr *I, bool *unsupported);
@@ -1181,10 +1192,9 @@ bool bi_ec0_packed(unsigned tuple_count);
 static inline bool
 bi_is_terminal_block(bi_block *block)
 {
-        return (block == NULL) ||
-                (list_is_empty(&block->instructions) &&
-                 bi_is_terminal_block(block->successors[0]) &&
-                 bi_is_terminal_block(block->successors[1]));
+   return (block == NULL) || (list_is_empty(&block->instructions) &&
+                              bi_is_terminal_block(block->successors[0]) &&
+                              bi_is_terminal_block(block->successors[1]));
 }
 
 /* Code emit */
@@ -1194,124 +1204,102 @@ unsigned bi_pack(bi_context *ctx, struct util_dynarray *emission);
 void bi_pack_valhall(bi_context *ctx, struct util_dynarray *emission);
 
 struct bi_packed_tuple {
-        uint64_t lo;
-        uint64_t hi;
+   uint64_t lo;
+   uint64_t hi;
 };
 
 uint8_t bi_pack_literal(enum bi_clause_subword literal);
 
-uint8_t
-bi_pack_upper(enum bi_clause_subword upper,
-                struct bi_packed_tuple *tuples,
-                ASSERTED unsigned tuple_count);
-uint64_t
-bi_pack_tuple_bits(enum bi_clause_subword idx,
-                struct bi_packed_tuple *tuples,
-                ASSERTED unsigned tuple_count,
-                unsigned offset, unsigned nbits);
+uint8_t bi_pack_upper(enum bi_clause_subword upper,
+                      struct bi_packed_tuple *tuples,
+                      ASSERTED unsigned tuple_count);
+uint64_t bi_pack_tuple_bits(enum bi_clause_subword idx,
+                            struct bi_packed_tuple *tuples,
+                            ASSERTED unsigned tuple_count, unsigned offset,
+                            unsigned nbits);
 
-uint8_t
-bi_pack_sync(enum bi_clause_subword t1,
-             enum bi_clause_subword t2,
-             enum bi_clause_subword t3,
-             struct bi_packed_tuple *tuples,
-             ASSERTED unsigned tuple_count,
-             bool z);
+uint8_t bi_pack_sync(enum bi_clause_subword t1, enum bi_clause_subword t2,
+                     enum bi_clause_subword t3, struct bi_packed_tuple *tuples,
+                     ASSERTED unsigned tuple_count, bool z);
 
-void
-bi_pack_format(struct util_dynarray *emission,
-                unsigned index,
-                struct bi_packed_tuple *tuples,
-                ASSERTED unsigned tuple_count,
-                uint64_t header, uint64_t ec0,
-                unsigned m0, bool z);
+void bi_pack_format(struct util_dynarray *emission, unsigned index,
+                    struct bi_packed_tuple *tuples,
+                    ASSERTED unsigned tuple_count, uint64_t header,
+                    uint64_t ec0, unsigned m0, bool z);
 
-unsigned bi_pack_fma(bi_instr *I,
-                enum bifrost_packed_src src0,
-                enum bifrost_packed_src src1,
-                enum bifrost_packed_src src2,
-                enum bifrost_packed_src src3);
-unsigned bi_pack_add(bi_instr *I,
-                enum bifrost_packed_src src0,
-                enum bifrost_packed_src src1,
-                enum bifrost_packed_src src2,
-                enum bifrost_packed_src src3);
+unsigned bi_pack_fma(bi_instr *I, enum bifrost_packed_src src0,
+                     enum bifrost_packed_src src1, enum bifrost_packed_src src2,
+                     enum bifrost_packed_src src3);
+unsigned bi_pack_add(bi_instr *I, enum bifrost_packed_src src0,
+                     enum bifrost_packed_src src1, enum bifrost_packed_src src2,
+                     enum bifrost_packed_src src3);
 
 /* Like in NIR, for use with the builder */
 
 enum bi_cursor_option {
-    bi_cursor_after_block,
-    bi_cursor_before_instr,
-    bi_cursor_after_instr
+   bi_cursor_after_block,
+   bi_cursor_before_instr,
+   bi_cursor_after_instr
 };
 
 typedef struct {
-    enum bi_cursor_option option;
+   enum bi_cursor_option option;
 
-    union {
-        bi_block *block;
-        bi_instr *instr;
-    };
+   union {
+      bi_block *block;
+      bi_instr *instr;
+   };
 } bi_cursor;
 
 static inline bi_cursor
 bi_after_block(bi_block *block)
 {
-    return (bi_cursor) {
-        .option = bi_cursor_after_block,
-        .block = block
-    };
+   return (bi_cursor){.option = bi_cursor_after_block, .block = block};
 }
 
 static inline bi_cursor
 bi_before_instr(bi_instr *instr)
 {
-    return (bi_cursor) {
-        .option = bi_cursor_before_instr,
-        .instr = instr
-    };
+   return (bi_cursor){.option = bi_cursor_before_instr, .instr = instr};
 }
 
 static inline bi_cursor
 bi_after_instr(bi_instr *instr)
 {
-    return (bi_cursor) {
-        .option = bi_cursor_after_instr,
-        .instr = instr
-    };
+   return (bi_cursor){.option = bi_cursor_after_instr, .instr = instr};
 }
 
 static inline bi_cursor
 bi_after_block_logical(bi_block *block)
 {
-        if (list_is_empty(&block->instructions))
-                return bi_after_block(block);
+   if (list_is_empty(&block->instructions))
+      return bi_after_block(block);
 
-        bi_instr *last = list_last_entry(&block->instructions, bi_instr, link);
-        assert(last != NULL);
+   bi_instr *last = list_last_entry(&block->instructions, bi_instr, link);
+   assert(last != NULL);
 
-        if (last->branch_target)
-                return bi_before_instr(last);
-        else
-                return bi_after_block(block);
+   if (last->branch_target)
+      return bi_before_instr(last);
+   else
+      return bi_after_block(block);
 }
 
 static inline bi_cursor
 bi_before_nonempty_block(bi_block *block)
 {
-        bi_instr *I = list_first_entry(&block->instructions, bi_instr, link);
-        assert(I != NULL);
+   bi_instr *I = list_first_entry(&block->instructions, bi_instr, link);
+   assert(I != NULL);
 
-        return bi_before_instr(I);
+   return bi_before_instr(I);
 }
 
 static inline bi_cursor
 bi_before_block(bi_block *block)
 {
-        if (list_is_empty(&block->instructions))
-                return bi_after_block(block);
-        else
-                return bi_before_nonempty_block(block);
+   if (list_is_empty(&block->instructions))
+      return bi_after_block(block);
+   else
+      return bi_before_nonempty_block(block);
 }
 
 /* Invariant: a tuple must be nonempty UNLESS it is the last tuple of a clause,
@@ -1320,80 +1308,79 @@ bi_before_block(bi_block *block)
 ATTRIBUTE_RETURNS_NONNULL static inline bi_instr *
 bi_first_instr_in_tuple(bi_tuple *tuple)
 {
-        bi_instr *instr = tuple->fma ?: tuple->add;
-        assert(instr != NULL);
-        return instr;
+   bi_instr *instr = tuple->fma ?: tuple->add;
+   assert(instr != NULL);
+   return instr;
 }
 
 ATTRIBUTE_RETURNS_NONNULL static inline bi_instr *
 bi_first_instr_in_clause(bi_clause *clause)
 {
-        return bi_first_instr_in_tuple(&clause->tuples[0]);
+   return bi_first_instr_in_tuple(&clause->tuples[0]);
 }
 
 ATTRIBUTE_RETURNS_NONNULL static inline bi_instr *
 bi_last_instr_in_clause(bi_clause *clause)
 {
-        bi_tuple tuple = clause->tuples[clause->tuple_count - 1];
-        bi_instr *instr = tuple.add ?: tuple.fma;
+   bi_tuple tuple = clause->tuples[clause->tuple_count - 1];
+   bi_instr *instr = tuple.add ?: tuple.fma;
 
-        if (!instr) {
-                assert(clause->tuple_count >= 2);
-                tuple = clause->tuples[clause->tuple_count - 2];
-                instr = tuple.add ?: tuple.fma;
-        }
+   if (!instr) {
+      assert(clause->tuple_count >= 2);
+      tuple = clause->tuples[clause->tuple_count - 2];
+      instr = tuple.add ?: tuple.fma;
+   }
 
-        assert(instr != NULL);
-        return instr;
+   assert(instr != NULL);
+   return instr;
 }
 
 /* Implemented by expanding bi_foreach_instr_in_block_from(_rev) with the start
  * (end) of the clause and adding a condition for the clause boundary */
 
-#define bi_foreach_instr_in_clause(block, clause, pos) \
-   for (bi_instr *pos = list_entry(bi_first_instr_in_clause(clause), bi_instr, link); \
-	(&pos->link != &(block)->instructions) \
-                && (pos != bi_next_op(bi_last_instr_in_clause(clause))); \
-	pos = list_entry(pos->link.next, bi_instr, link))
+#define bi_foreach_instr_in_clause(block, clause, pos)                         \
+   for (bi_instr *pos =                                                        \
+           list_entry(bi_first_instr_in_clause(clause), bi_instr, link);       \
+        (&pos->link != &(block)->instructions) &&                              \
+        (pos != bi_next_op(bi_last_instr_in_clause(clause)));                  \
+        pos = list_entry(pos->link.next, bi_instr, link))
 
-#define bi_foreach_instr_in_clause_rev(block, clause, pos) \
-   for (bi_instr *pos = list_entry(bi_last_instr_in_clause(clause), bi_instr, link); \
-	(&pos->link != &(block)->instructions) \
-	        && pos != bi_prev_op(bi_first_instr_in_clause(clause)); \
-	pos = list_entry(pos->link.prev, bi_instr, link))
+#define bi_foreach_instr_in_clause_rev(block, clause, pos)                     \
+   for (bi_instr *pos =                                                        \
+           list_entry(bi_last_instr_in_clause(clause), bi_instr, link);        \
+        (&pos->link != &(block)->instructions) &&                              \
+        pos != bi_prev_op(bi_first_instr_in_clause(clause));                   \
+        pos = list_entry(pos->link.prev, bi_instr, link))
 
 static inline bi_cursor
 bi_before_clause(bi_clause *clause)
 {
-    return bi_before_instr(bi_first_instr_in_clause(clause));
+   return bi_before_instr(bi_first_instr_in_clause(clause));
 }
 
 static inline bi_cursor
 bi_before_tuple(bi_tuple *tuple)
 {
-    return bi_before_instr(bi_first_instr_in_tuple(tuple));
+   return bi_before_instr(bi_first_instr_in_tuple(tuple));
 }
 
 static inline bi_cursor
 bi_after_clause(bi_clause *clause)
 {
-    return bi_after_instr(bi_last_instr_in_clause(clause));
+   return bi_after_instr(bi_last_instr_in_clause(clause));
 }
 
 /* IR builder in terms of cursor infrastructure */
 
 typedef struct {
-    bi_context *shader;
-    bi_cursor cursor;
+   bi_context *shader;
+   bi_cursor cursor;
 } bi_builder;
 
 static inline bi_builder
 bi_init_builder(bi_context *ctx, bi_cursor cursor)
 {
-        return (bi_builder) {
-                .shader = ctx,
-                .cursor = cursor
-        };
+   return (bi_builder){.shader = ctx, .cursor = cursor};
 }
 
 /* Insert an instruction at the cursor and move the cursor */
@@ -1401,26 +1388,26 @@ bi_init_builder(bi_context *ctx, bi_cursor cursor)
 static inline void
 bi_builder_insert(bi_cursor *cursor, bi_instr *I)
 {
-    switch (cursor->option) {
-    case bi_cursor_after_instr:
-        list_add(&I->link, &cursor->instr->link);
-        cursor->instr = I;
-        return;
+   switch (cursor->option) {
+   case bi_cursor_after_instr:
+      list_add(&I->link, &cursor->instr->link);
+      cursor->instr = I;
+      return;
 
-    case bi_cursor_after_block:
-        list_addtail(&I->link, &cursor->block->instructions);
-        cursor->option = bi_cursor_after_instr;
-        cursor->instr = I;
-        return;
+   case bi_cursor_after_block:
+      list_addtail(&I->link, &cursor->block->instructions);
+      cursor->option = bi_cursor_after_instr;
+      cursor->instr = I;
+      return;
 
-    case bi_cursor_before_instr:
-        list_addtail(&I->link, &cursor->instr->link);
-        cursor->option = bi_cursor_after_instr;
-        cursor->instr = I;
-        return;
-    }
+   case bi_cursor_before_instr:
+      list_addtail(&I->link, &cursor->instr->link);
+      cursor->option = bi_cursor_after_instr;
+      cursor->instr = I;
+      return;
+   }
 
-    unreachable("Invalid cursor option");
+   unreachable("Invalid cursor option");
 }
 
 bi_instr *bi_csel_from_mux(bi_builder *b, const bi_instr *I, bool must_sign);
@@ -1429,19 +1416,19 @@ bi_instr *bi_csel_from_mux(bi_builder *b, const bi_instr *I, bool must_sign);
 static inline bi_index
 bi_dontcare(bi_builder *b)
 {
-        if (b->shader->arch >= 9)
-               return bi_zero();
-        else
-               return bi_passthrough(BIFROST_SRC_FAU_HI);
+   if (b->shader->arch >= 9)
+      return bi_zero();
+   else
+      return bi_passthrough(BIFROST_SRC_FAU_HI);
 }
 
-#define bi_worklist_init(ctx, w) u_worklist_init(w, ctx->num_blocks, ctx)
+#define bi_worklist_init(ctx, w)        u_worklist_init(w, ctx->num_blocks, ctx)
 #define bi_worklist_push_head(w, block) u_worklist_push_head(w, block, index)
 #define bi_worklist_push_tail(w, block) u_worklist_push_tail(w, block, index)
-#define bi_worklist_peek_head(w) u_worklist_peek_head(w, bi_block, index)
-#define bi_worklist_pop_head(w)  u_worklist_pop_head( w, bi_block, index)
-#define bi_worklist_peek_tail(w) u_worklist_peek_tail(w, bi_block, index)
-#define bi_worklist_pop_tail(w)  u_worklist_pop_tail( w, bi_block, index)
+#define bi_worklist_peek_head(w)        u_worklist_peek_head(w, bi_block, index)
+#define bi_worklist_pop_head(w)         u_worklist_pop_head(w, bi_block, index)
+#define bi_worklist_peek_tail(w)        u_worklist_peek_tail(w, bi_block, index)
+#define bi_worklist_pop_tail(w)         u_worklist_pop_tail(w, bi_block, index)
 
 /* NIR passes */
 
diff --git a/src/panfrost/bifrost/disassemble.c b/src/panfrost/bifrost/disassemble.c
index 1bc98e40596..5a3791efad0 100644
--- a/src/panfrost/bifrost/disassemble.c
+++ b/src/panfrost/bifrost/disassemble.c
@@ -23,268 +23,276 @@
  * SOFTWARE.
  */
 
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdint.h>
 #include <assert.h>
 #include <inttypes.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
 #include <string.h>
 
-#include "bifrost.h"
-#include "disassemble.h"
-#include "bi_print_common.h"
 #include "util/compiler.h"
 #include "util/macros.h"
+#include "bi_print_common.h"
+#include "bifrost.h"
+#include "disassemble.h"
 
 // return bits (high, lo]
-static uint64_t bits(uint32_t word, unsigned lo, unsigned high)
+static uint64_t
+bits(uint32_t word, unsigned lo, unsigned high)
 {
-        if (high == 32)
-                return word >> lo;
-        return (word & ((1 << high) - 1)) >> lo;
+   if (high == 32)
+      return word >> lo;
+   return (word & ((1 << high) - 1)) >> lo;
 }
 
 // each of these structs represents an instruction that's dispatched in one
 // cycle. Note that these instructions are packed in funny ways within the
 // clause, hence the need for a separate struct.
 struct bifrost_alu_inst {
-        uint32_t fma_bits;
-        uint32_t add_bits;
-        uint64_t reg_bits;
+   uint32_t fma_bits;
+   uint32_t add_bits;
+   uint64_t reg_bits;
 };
 
-static unsigned get_reg0(struct bifrost_regs regs)
+static unsigned
+get_reg0(struct bifrost_regs regs)
 {
-        if (regs.ctrl == 0)
-                return regs.reg0 | ((regs.reg1 & 0x1) << 5);
+   if (regs.ctrl == 0)
+      return regs.reg0 | ((regs.reg1 & 0x1) << 5);
 
-        return regs.reg0 <= regs.reg1 ? regs.reg0 : 63 - regs.reg0;
+   return regs.reg0 <= regs.reg1 ? regs.reg0 : 63 - regs.reg0;
 }
 
-static unsigned get_reg1(struct bifrost_regs regs)
+static unsigned
+get_reg1(struct bifrost_regs regs)
 {
-        return regs.reg0 <= regs.reg1 ? regs.reg1 : 63 - regs.reg1;
+   return regs.reg0 <= regs.reg1 ? regs.reg1 : 63 - regs.reg1;
 }
 
 // this represents the decoded version of the ctrl register field.
 struct bifrost_reg_ctrl {
-        bool read_reg0;
-        bool read_reg1;
-        struct bifrost_reg_ctrl_23 slot23;
+   bool read_reg0;
+   bool read_reg1;
+   struct bifrost_reg_ctrl_23 slot23;
 };
 
-static void dump_header(FILE *fp, struct bifrost_header header, bool verbose)
+static void
+dump_header(FILE *fp, struct bifrost_header header, bool verbose)
 {
-        fprintf(fp, "ds(%u) ", header.dependency_slot);
+   fprintf(fp, "ds(%u) ", header.dependency_slot);
 
-        if (header.staging_barrier)
-                fprintf(fp, "osrb ");
+   if (header.staging_barrier)
+      fprintf(fp, "osrb ");
 
-        fprintf(fp, "%s ", bi_flow_control_name(header.flow_control));
+   fprintf(fp, "%s ", bi_flow_control_name(header.flow_control));
 
-        if (header.suppress_inf)
-                fprintf(fp, "inf_suppress ");
-        if (header.suppress_nan)
-                fprintf(fp, "nan_suppress ");
+   if (header.suppress_inf)
+      fprintf(fp, "inf_suppress ");
+   if (header.suppress_nan)
+      fprintf(fp, "nan_suppress ");
 
-        if (header.flush_to_zero == BIFROST_FTZ_DX11)
-                fprintf(fp, "ftz_dx11 ");
-        else if (header.flush_to_zero == BIFROST_FTZ_ALWAYS)
-                fprintf(fp, "ftz_hsa ");
-        if (header.flush_to_zero == BIFROST_FTZ_ABRUPT)
-                fprintf(fp, "ftz_au ");
+   if (header.flush_to_zero == BIFROST_FTZ_DX11)
+      fprintf(fp, "ftz_dx11 ");
+   else if (header.flush_to_zero == BIFROST_FTZ_ALWAYS)
+      fprintf(fp, "ftz_hsa ");
+   if (header.flush_to_zero == BIFROST_FTZ_ABRUPT)
+      fprintf(fp, "ftz_au ");
 
-        assert(!header.zero1);
-        assert(!header.zero2);
+   assert(!header.zero1);
+   assert(!header.zero2);
 
-        if (header.float_exceptions == BIFROST_EXCEPTIONS_DISABLED)
-                fprintf(fp, "fpe_ts ");
-        else if (header.float_exceptions == BIFROST_EXCEPTIONS_PRECISE_DIVISION)
-                fprintf(fp, "fpe_pd ");
-        else if (header.float_exceptions == BIFROST_EXCEPTIONS_PRECISE_SQRT)
-                fprintf(fp, "fpe_psqr ");
+   if (header.float_exceptions == BIFROST_EXCEPTIONS_DISABLED)
+      fprintf(fp, "fpe_ts ");
+   else if (header.float_exceptions == BIFROST_EXCEPTIONS_PRECISE_DIVISION)
+      fprintf(fp, "fpe_pd ");
+   else if (header.float_exceptions == BIFROST_EXCEPTIONS_PRECISE_SQRT)
+      fprintf(fp, "fpe_psqr ");
 
-        if (header.message_type)
-                fprintf(fp, "%s ", bi_message_type_name(header.message_type));
+   if (header.message_type)
+      fprintf(fp, "%s ", bi_message_type_name(header.message_type));
 
-        if (header.terminate_discarded_threads)
-                fprintf(fp, "td ");
+   if (header.terminate_discarded_threads)
+      fprintf(fp, "td ");
 
-        if (header.next_clause_prefetch)
-                fprintf(fp, "ncph ");
+   if (header.next_clause_prefetch)
+      fprintf(fp, "ncph ");
 
-        if (header.next_message_type)
-                fprintf(fp, "next_%s ", bi_message_type_name(header.next_message_type));
-        if (header.dependency_wait != 0) {
-                fprintf(fp, "dwb(");
-                bool first = true;
-                for (unsigned i = 0; i < 8; i++) {
-                        if (header.dependency_wait & (1 << i)) {
-                                if (!first) {
-                                        fprintf(fp, ", ");
-                                }
-                                fprintf(fp, "%u", i);
-                                first = false;
-                        }
-                }
-                fprintf(fp, ") ");
-        }
+   if (header.next_message_type)
+      fprintf(fp, "next_%s ", bi_message_type_name(header.next_message_type));
+   if (header.dependency_wait != 0) {
+      fprintf(fp, "dwb(");
+      bool first = true;
+      for (unsigned i = 0; i < 8; i++) {
+         if (header.dependency_wait & (1 << i)) {
+            if (!first) {
+               fprintf(fp, ", ");
+            }
+            fprintf(fp, "%u", i);
+            first = false;
+         }
+      }
+      fprintf(fp, ") ");
+   }
 
-        fprintf(fp, "\n");
+   fprintf(fp, "\n");
 }
 
-static struct bifrost_reg_ctrl DecodeRegCtrl(FILE *fp, struct bifrost_regs regs, bool first)
+static struct bifrost_reg_ctrl
+DecodeRegCtrl(FILE *fp, struct bifrost_regs regs, bool first)
 {
-        struct bifrost_reg_ctrl decoded = {};
-        unsigned ctrl;
-        if (regs.ctrl == 0) {
-                ctrl = regs.reg1 >> 2;
-                decoded.read_reg0 = !(regs.reg1 & 0x2);
-                decoded.read_reg1 = false;
-        } else {
-                ctrl = regs.ctrl;
-                decoded.read_reg0 = decoded.read_reg1 = true;
-        }
+   struct bifrost_reg_ctrl decoded = {};
+   unsigned ctrl;
+   if (regs.ctrl == 0) {
+      ctrl = regs.reg1 >> 2;
+      decoded.read_reg0 = !(regs.reg1 & 0x2);
+      decoded.read_reg1 = false;
+   } else {
+      ctrl = regs.ctrl;
+      decoded.read_reg0 = decoded.read_reg1 = true;
+   }
 
-        /* Modify control based on state */
-        if (first)
-                ctrl = (ctrl & 0x7) | ((ctrl & 0x8) << 1);
-        else if (regs.reg2 == regs.reg3)
-                ctrl += 16;
+   /* Modify control based on state */
+   if (first)
+      ctrl = (ctrl & 0x7) | ((ctrl & 0x8) << 1);
+   else if (regs.reg2 == regs.reg3)
+      ctrl += 16;
 
-        decoded.slot23 = bifrost_reg_ctrl_lut[ctrl];
-        ASSERTED struct bifrost_reg_ctrl_23 reserved = { 0 };
-        assert(memcmp(&decoded.slot23, &reserved, sizeof(reserved)));
+   decoded.slot23 = bifrost_reg_ctrl_lut[ctrl];
+   ASSERTED struct bifrost_reg_ctrl_23 reserved = {0};
+   assert(memcmp(&decoded.slot23, &reserved, sizeof(reserved)));
 
-        return decoded;
+   return decoded;
 }
 
-static void dump_regs(FILE *fp, struct bifrost_regs srcs, bool first)
+static void
+dump_regs(FILE *fp, struct bifrost_regs srcs, bool first)
 {
-        struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, srcs, first);
-        fprintf(fp, "    # ");
-        if (ctrl.read_reg0)
-                fprintf(fp, "slot 0: r%u ", get_reg0(srcs));
-        if (ctrl.read_reg1)
-                fprintf(fp, "slot 1: r%u ", get_reg1(srcs));
+   struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, srcs, first);
+   fprintf(fp, "    # ");
+   if (ctrl.read_reg0)
+      fprintf(fp, "slot 0: r%u ", get_reg0(srcs));
+   if (ctrl.read_reg1)
+      fprintf(fp, "slot 1: r%u ", get_reg1(srcs));
 
-        const char *slot3_fma = ctrl.slot23.slot3_fma ? "FMA" : "ADD";
+   const char *slot3_fma = ctrl.slot23.slot3_fma ? "FMA" : "ADD";
 
-        if (ctrl.slot23.slot2 == BIFROST_OP_WRITE)
-                fprintf(fp, "slot 2: r%u (write FMA) ", srcs.reg2);
-        else if (ctrl.slot23.slot2 == BIFROST_OP_WRITE_LO)
-                fprintf(fp, "slot 2: r%u (write lo FMA) ", srcs.reg2);
-        else if (ctrl.slot23.slot2 == BIFROST_OP_WRITE_HI)
-                fprintf(fp, "slot 2: r%u (write hi FMA) ", srcs.reg2);
-        else if (ctrl.slot23.slot2 == BIFROST_OP_READ)
-                fprintf(fp, "slot 2: r%u (read) ", srcs.reg2);
+   if (ctrl.slot23.slot2 == BIFROST_OP_WRITE)
+      fprintf(fp, "slot 2: r%u (write FMA) ", srcs.reg2);
+   else if (ctrl.slot23.slot2 == BIFROST_OP_WRITE_LO)
+      fprintf(fp, "slot 2: r%u (write lo FMA) ", srcs.reg2);
+   else if (ctrl.slot23.slot2 == BIFROST_OP_WRITE_HI)
+      fprintf(fp, "slot 2: r%u (write hi FMA) ", srcs.reg2);
+   else if (ctrl.slot23.slot2 == BIFROST_OP_READ)
+      fprintf(fp, "slot 2: r%u (read) ", srcs.reg2);
 
-        if (ctrl.slot23.slot3 == BIFROST_OP_WRITE)
-                fprintf(fp, "slot 3: r%u (write %s) ", srcs.reg3, slot3_fma);
-        else if (ctrl.slot23.slot3 == BIFROST_OP_WRITE_LO)
-                fprintf(fp, "slot 3: r%u (write lo %s) ", srcs.reg3, slot3_fma);
-        else if (ctrl.slot23.slot3 == BIFROST_OP_WRITE_HI)
-                fprintf(fp, "slot 3: r%u (write hi %s) ", srcs.reg3, slot3_fma);
+   if (ctrl.slot23.slot3 == BIFROST_OP_WRITE)
+      fprintf(fp, "slot 3: r%u (write %s) ", srcs.reg3, slot3_fma);
+   else if (ctrl.slot23.slot3 == BIFROST_OP_WRITE_LO)
+      fprintf(fp, "slot 3: r%u (write lo %s) ", srcs.reg3, slot3_fma);
+   else if (ctrl.slot23.slot3 == BIFROST_OP_WRITE_HI)
+      fprintf(fp, "slot 3: r%u (write hi %s) ", srcs.reg3, slot3_fma);
 
-        if (srcs.fau_idx)
-                fprintf(fp, "fau %X ", srcs.fau_idx);
+   if (srcs.fau_idx)
+      fprintf(fp, "fau %X ", srcs.fau_idx);
 
-        fprintf(fp, "\n");
+   fprintf(fp, "\n");
 }
 
 static void
 bi_disasm_dest_mask(FILE *fp, enum bifrost_reg_op op)
 {
-        if (op == BIFROST_OP_WRITE_LO)
-                fprintf(fp, ".h0");
-        else if (op == BIFROST_OP_WRITE_HI)
-                fprintf(fp, ".h1");
+   if (op == BIFROST_OP_WRITE_LO)
+      fprintf(fp, ".h0");
+   else if (op == BIFROST_OP_WRITE_HI)
+      fprintf(fp, ".h1");
 }
 
 void
 bi_disasm_dest_fma(FILE *fp, struct bifrost_regs *next_regs, bool last)
 {
-    /* If this is the last instruction, next_regs points to the first reg entry. */
-    struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, *next_regs, last);
-    if (ctrl.slot23.slot2 >= BIFROST_OP_WRITE) {
-        fprintf(fp, "r%u:t0", next_regs->reg2);
-        bi_disasm_dest_mask(fp, ctrl.slot23.slot2);
-    } else if (ctrl.slot23.slot3 >= BIFROST_OP_WRITE && ctrl.slot23.slot3_fma) {
-        fprintf(fp, "r%u:t0", next_regs->reg3);
-        bi_disasm_dest_mask(fp, ctrl.slot23.slot3);
-    } else
-        fprintf(fp, "t0");
+   /* If this is the last instruction, next_regs points to the first reg entry. */
+   struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, *next_regs, last);
+   if (ctrl.slot23.slot2 >= BIFROST_OP_WRITE) {
+      fprintf(fp, "r%u:t0", next_regs->reg2);
+      bi_disasm_dest_mask(fp, ctrl.slot23.slot2);
+   } else if (ctrl.slot23.slot3 >= BIFROST_OP_WRITE && ctrl.slot23.slot3_fma) {
+      fprintf(fp, "r%u:t0", next_regs->reg3);
+      bi_disasm_dest_mask(fp, ctrl.slot23.slot3);
+   } else
+      fprintf(fp, "t0");
 }
 
 void
 bi_disasm_dest_add(FILE *fp, struct bifrost_regs *next_regs, bool last)
 {
-    /* If this is the last instruction, next_regs points to the first reg entry. */
-    struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, *next_regs, last);
+   /* If this is the last instruction, next_regs points to the first reg entry. */
+   struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, *next_regs, last);
 
-    if (ctrl.slot23.slot3 >= BIFROST_OP_WRITE && !ctrl.slot23.slot3_fma) {
-        fprintf(fp, "r%u:t1", next_regs->reg3);
-        bi_disasm_dest_mask(fp, ctrl.slot23.slot3);
-    } else
-        fprintf(fp, "t1");
-}
-
-static void dump_const_imm(FILE *fp, uint32_t imm)
-{
-        union {
-                float f;
-                uint32_t i;
-        } fi;
-        fi.i = imm;
-        fprintf(fp, "0x%08x /* %f */", imm, fi.f);
+   if (ctrl.slot23.slot3 >= BIFROST_OP_WRITE && !ctrl.slot23.slot3_fma) {
+      fprintf(fp, "r%u:t1", next_regs->reg3);
+      bi_disasm_dest_mask(fp, ctrl.slot23.slot3);
+   } else
+      fprintf(fp, "t1");
 }
 
 static void
-dump_pc_imm(FILE *fp, uint64_t imm, unsigned branch_offset, enum bi_constmod mod, bool high32)
+dump_const_imm(FILE *fp, uint32_t imm)
 {
-        if (mod == BI_CONSTMOD_PC_HI && !high32) {
-                dump_const_imm(fp, imm);
-                return;
-        }
+   union {
+      float f;
+      uint32_t i;
+   } fi;
+   fi.i = imm;
+   fprintf(fp, "0x%08x /* %f */", imm, fi.f);
+}
 
-        /* 60-bit sign-extend */
-        uint64_t zx64 = (imm << 4);
-        int64_t sx64 = zx64;
-        sx64 >>= 4;
+static void
+dump_pc_imm(FILE *fp, uint64_t imm, unsigned branch_offset,
+            enum bi_constmod mod, bool high32)
+{
+   if (mod == BI_CONSTMOD_PC_HI && !high32) {
+      dump_const_imm(fp, imm);
+      return;
+   }
 
-        /* 28-bit sign extend x 2 */
-        uint32_t imm32[2] = { (uint32_t) imm, (uint32_t) (imm >> 32) };
-        uint32_t zx32[2] = { imm32[0] << 4, imm32[1] << 4 };
-        int32_t sx32[2] = { zx32[0], zx32[1] };
-        sx32[0] >>= 4;
-        sx32[1] >>= 4;
+   /* 60-bit sign-extend */
+   uint64_t zx64 = (imm << 4);
+   int64_t sx64 = zx64;
+   sx64 >>= 4;
 
-        int64_t offs = 0;
+   /* 28-bit sign extend x 2 */
+   uint32_t imm32[2] = {(uint32_t)imm, (uint32_t)(imm >> 32)};
+   uint32_t zx32[2] = {imm32[0] << 4, imm32[1] << 4};
+   int32_t sx32[2] = {zx32[0], zx32[1]};
+   sx32[0] >>= 4;
+   sx32[1] >>= 4;
 
-        switch (mod) {
-        case BI_CONSTMOD_PC_LO:
-                offs = sx64;
-                break;
-        case BI_CONSTMOD_PC_HI:
-                offs = sx32[1];
-                break;
-        case BI_CONSTMOD_PC_LO_HI:
-                offs = sx32[high32];
-                break;
-        default:
-                unreachable("Invalid PC modifier");
-        }
+   int64_t offs = 0;
 
-        assert((offs & 15) == 0);
-        fprintf(fp, "clause_%" PRId64, branch_offset + (offs / 16));
+   switch (mod) {
+   case BI_CONSTMOD_PC_LO:
+      offs = sx64;
+      break;
+   case BI_CONSTMOD_PC_HI:
+      offs = sx32[1];
+      break;
+   case BI_CONSTMOD_PC_LO_HI:
+      offs = sx32[high32];
+      break;
+   default:
+      unreachable("Invalid PC modifier");
+   }
 
-        if (mod == BI_CONSTMOD_PC_LO && high32)
-                fprintf(fp, " >> 32");
+   assert((offs & 15) == 0);
+   fprintf(fp, "clause_%" PRId64, branch_offset + (offs / 16));
 
-        /* While technically in spec, referencing the current clause as (pc +
-         * 0) likely indicates an unintended infinite loop  */
-        if (offs == 0)
-                fprintf(fp, " /* XXX: likely an infinite loop */");
+   if (mod == BI_CONSTMOD_PC_LO && high32)
+      fprintf(fp, " >> 32");
+
+   /* While technically in spec, referencing the current clause as (pc +
+    * 0) likely indicates an unintended infinite loop  */
+   if (offs == 0)
+      fprintf(fp, " /* XXX: likely an infinite loop */");
 }
 
 /* Convert an index to an embedded constant in FAU-RAM to the index of the
@@ -293,106 +301,107 @@ dump_pc_imm(FILE *fp, uint64_t imm, unsigned branch_offset, enum bi_constmod mod
 static unsigned
 const_fau_to_idx(unsigned fau_value)
 {
-        unsigned map[8] = {
-                ~0, ~0, 4, 5, 0, 1, 2, 3
-        };
+   unsigned map[8] = {~0, ~0, 4, 5, 0, 1, 2, 3};
 
-        assert(map[fau_value] < 6);
-        return map[fau_value];
+   assert(map[fau_value] < 6);
+   return map[fau_value];
 }
 
-static void dump_fau_src(FILE *fp, struct bifrost_regs srcs, unsigned branch_offset, struct bi_constants *consts, bool high32)
+static void
+dump_fau_src(FILE *fp, struct bifrost_regs srcs, unsigned branch_offset,
+             struct bi_constants *consts, bool high32)
 {
-        if (srcs.fau_idx & 0x80) {
-                unsigned uniform = (srcs.fau_idx & 0x7f);
-                fprintf(fp, "u%u.w%u", uniform, high32);
-        } else if (srcs.fau_idx >= 0x20) {
-                unsigned idx = const_fau_to_idx(srcs.fau_idx >> 4);
-                uint64_t imm = consts->raw[idx];
-                imm |= (srcs.fau_idx & 0xf);
-                if (consts->mods[idx] != BI_CONSTMOD_NONE)
-                        dump_pc_imm(fp, imm, branch_offset, consts->mods[idx], high32);
-                else if (high32)
-                        dump_const_imm(fp, imm >> 32);
-                else
-                        dump_const_imm(fp, imm);
-        } else {
-                switch (srcs.fau_idx) {
-                case 0:
-                        fprintf(fp, "#0");
-                        break;
-                case 1:
-                        fprintf(fp, "lane_id");
-                        break;
-                case 2:
-                        fprintf(fp, "warp_id");
-                        break;
-                case 3:
-                        fprintf(fp, "core_id");
-                        break;
-                case 4:
-                        fprintf(fp, "framebuffer_size");
-                        break;
-                case 5:
-                        fprintf(fp, "atest_datum");
-                        break;
-                case 6:
-                        fprintf(fp, "sample");
-                        break;
-                case 8:
-                case 9:
-                case 10:
-                case 11:
-                case 12:
-                case 13:
-                case 14:
-                case 15:
-                        fprintf(fp, "blend_descriptor_%u", (unsigned) srcs.fau_idx - 8);
-                        break;
-                default:
-                        fprintf(fp, "XXX - reserved%u", (unsigned) srcs.fau_idx);
-                        break;
-                }
+   if (srcs.fau_idx & 0x80) {
+      unsigned uniform = (srcs.fau_idx & 0x7f);
+      fprintf(fp, "u%u.w%u", uniform, high32);
+   } else if (srcs.fau_idx >= 0x20) {
+      unsigned idx = const_fau_to_idx(srcs.fau_idx >> 4);
+      uint64_t imm = consts->raw[idx];
+      imm |= (srcs.fau_idx & 0xf);
+      if (consts->mods[idx] != BI_CONSTMOD_NONE)
+         dump_pc_imm(fp, imm, branch_offset, consts->mods[idx], high32);
+      else if (high32)
+         dump_const_imm(fp, imm >> 32);
+      else
+         dump_const_imm(fp, imm);
+   } else {
+      switch (srcs.fau_idx) {
+      case 0:
+         fprintf(fp, "#0");
+         break;
+      case 1:
+         fprintf(fp, "lane_id");
+         break;
+      case 2:
+         fprintf(fp, "warp_id");
+         break;
+      case 3:
+         fprintf(fp, "core_id");
+         break;
+      case 4:
+         fprintf(fp, "framebuffer_size");
+         break;
+      case 5:
+         fprintf(fp, "atest_datum");
+         break;
+      case 6:
+         fprintf(fp, "sample");
+         break;
+      case 8:
+      case 9:
+      case 10:
+      case 11:
+      case 12:
+      case 13:
+      case 14:
+      case 15:
+         fprintf(fp, "blend_descriptor_%u", (unsigned)srcs.fau_idx - 8);
+         break;
+      default:
+         fprintf(fp, "XXX - reserved%u", (unsigned)srcs.fau_idx);
+         break;
+      }
 
-                if (high32)
-                        fprintf(fp, ".y");
-                else
-                        fprintf(fp, ".x");
-        }
+      if (high32)
+         fprintf(fp, ".y");
+      else
+         fprintf(fp, ".x");
+   }
 }
 
 void
-dump_src(FILE *fp, unsigned src, struct bifrost_regs srcs, unsigned branch_offset, struct bi_constants *consts, bool isFMA)
+dump_src(FILE *fp, unsigned src, struct bifrost_regs srcs,
+         unsigned branch_offset, struct bi_constants *consts, bool isFMA)
 {
-        switch (src) {
-        case 0:
-                fprintf(fp, "r%u", get_reg0(srcs));
-                break;
-        case 1:
-                fprintf(fp, "r%u", get_reg1(srcs));
-                break;
-        case 2:
-                fprintf(fp, "r%u", srcs.reg2);
-                break;
-        case 3:
-                if (isFMA)
-                        fprintf(fp, "#0");
-                else
-                        fprintf(fp, "t"); // i.e. the output of FMA this cycle
-                break;
-        case 4:
-                dump_fau_src(fp, srcs, branch_offset, consts, false);
-                break;
-        case 5:
-                dump_fau_src(fp, srcs, branch_offset, consts, true);
-                break;
-        case 6:
-                fprintf(fp, "t0");
-                break;
-        case 7:
-                fprintf(fp, "t1");
-                break;
-        }
+   switch (src) {
+   case 0:
+      fprintf(fp, "r%u", get_reg0(srcs));
+      break;
+   case 1:
+      fprintf(fp, "r%u", get_reg1(srcs));
+      break;
+   case 2:
+      fprintf(fp, "r%u", srcs.reg2);
+      break;
+   case 3:
+      if (isFMA)
+         fprintf(fp, "#0");
+      else
+         fprintf(fp, "t"); // i.e. the output of FMA this cycle
+      break;
+   case 4:
+      dump_fau_src(fp, srcs, branch_offset, consts, false);
+      break;
+   case 5:
+      dump_fau_src(fp, srcs, branch_offset, consts, true);
+      break;
+   case 6:
+      fprintf(fp, "t0");
+      break;
+   case 7:
+      fprintf(fp, "t1");
+      break;
+   }
 }
 
 /* Tables for decoding M0, or if M0 == 7, M1 respectively.
@@ -403,308 +412,311 @@ dump_src(FILE *fp, unsigned src, struct bifrost_regs srcs, unsigned branch_offse
  */
 
 static const enum bi_constmod M1_table[7][2] = {
-        { BI_CONSTMOD_NONE, BI_CONSTMOD_NONE },
-        { BI_CONSTMOD_PC_LO, BI_CONSTMOD_NONE },
-        { BI_CONSTMOD_PC_LO, BI_CONSTMOD_PC_LO },
-        { ~0, ~0 },
-        { BI_CONSTMOD_PC_HI, BI_CONSTMOD_NONE },
-        { BI_CONSTMOD_PC_HI, BI_CONSTMOD_PC_HI },
-        { BI_CONSTMOD_PC_LO, BI_CONSTMOD_NONE },
+   {BI_CONSTMOD_NONE, BI_CONSTMOD_NONE},
+   {BI_CONSTMOD_PC_LO, BI_CONSTMOD_NONE},
+   {BI_CONSTMOD_PC_LO, BI_CONSTMOD_PC_LO},
+   {~0, ~0},
+   {BI_CONSTMOD_PC_HI, BI_CONSTMOD_NONE},
+   {BI_CONSTMOD_PC_HI, BI_CONSTMOD_PC_HI},
+   {BI_CONSTMOD_PC_LO, BI_CONSTMOD_NONE},
 };
 
 static const enum bi_constmod M2_table[4][2] = {
-        { BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_NONE },
-        { BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_HI },
-        { BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_LO_HI },
-        { BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_HI },
+   {BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_NONE},
+   {BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_HI},
+   {BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_LO_HI},
+   {BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_HI},
 };
 
 static void
 decode_M(enum bi_constmod *mod, unsigned M1, unsigned M2, bool single)
 {
-        if (M1 >= 8) {
-                mod[0] = BI_CONSTMOD_NONE;
+   if (M1 >= 8) {
+      mod[0] = BI_CONSTMOD_NONE;
 
-                if (!single)
-                        mod[1] = BI_CONSTMOD_NONE;
+      if (!single)
+         mod[1] = BI_CONSTMOD_NONE;
 
-                return;
-        } else if (M1 == 7) {
-                assert(M2 < 4);
-                memcpy(mod, M2_table[M2], sizeof(*mod) * (single ? 1 : 2));
-        } else {
-                assert(M1 != 3);
-                memcpy(mod, M1_table[M1], sizeof(*mod) * (single ? 1 : 2));
-        }
+      return;
+   } else if (M1 == 7) {
+      assert(M2 < 4);
+      memcpy(mod, M2_table[M2], sizeof(*mod) * (single ? 1 : 2));
+   } else {
+      assert(M1 != 3);
+      memcpy(mod, M1_table[M1], sizeof(*mod) * (single ? 1 : 2));
+   }
 }
 
-static void dump_clause(FILE *fp, uint32_t *words, unsigned *size, unsigned offset, bool verbose)
+static void
+dump_clause(FILE *fp, uint32_t *words, unsigned *size, unsigned offset,
+            bool verbose)
 {
-        // State for a decoded clause
-        struct bifrost_alu_inst instrs[8] = {};
-        struct bi_constants consts = {};
-        unsigned num_instrs = 0;
-        unsigned num_consts = 0;
-        uint64_t header_bits = 0;
+   // State for a decoded clause
+   struct bifrost_alu_inst instrs[8] = {};
+   struct bi_constants consts = {};
+   unsigned num_instrs = 0;
+   unsigned num_consts = 0;
+   uint64_t header_bits = 0;
 
-        unsigned i;
-        for (i = 0; ; i++, words += 4) {
-                if (verbose) {
-                        fprintf(fp, "# ");
-                        for (int j = 0; j < 4; j++)
-                                fprintf(fp, "%08x ", words[3 - j]); // low bit on the right
-                        fprintf(fp, "\n");
-                }
-                unsigned tag = bits(words[0], 0, 8);
+   unsigned i;
+   for (i = 0;; i++, words += 4) {
+      if (verbose) {
+         fprintf(fp, "# ");
+         for (int j = 0; j < 4; j++)
+            fprintf(fp, "%08x ", words[3 - j]); // low bit on the right
+         fprintf(fp, "\n");
+      }
+      unsigned tag = bits(words[0], 0, 8);
 
-                // speculatively decode some things that are common between many formats, so we can share some code
-                struct bifrost_alu_inst main_instr = {};
-                // 20 bits
-                main_instr.add_bits = bits(words[2], 2, 32 - 13);
-                // 23 bits
-                main_instr.fma_bits = bits(words[1], 11, 32) | bits(words[2], 0, 2) << (32 - 11);
-                // 35 bits
-                main_instr.reg_bits = ((uint64_t) bits(words[1], 0, 11)) << 24 | (uint64_t) bits(words[0], 8, 32);
+      // speculatively decode some things that are common between many formats,
+      // so we can share some code
+      struct bifrost_alu_inst main_instr = {};
+      // 20 bits
+      main_instr.add_bits = bits(words[2], 2, 32 - 13);
+      // 23 bits
+      main_instr.fma_bits = bits(words[1], 11, 32) | bits(words[2], 0, 2)
+                                                        << (32 - 11);
+      // 35 bits
+      main_instr.reg_bits = ((uint64_t)bits(words[1], 0, 11)) << 24 |
+                            (uint64_t)bits(words[0], 8, 32);
 
-                uint64_t const0 = bits(words[0], 8, 32) << 4 | (uint64_t) words[1] << 28 | bits(words[2], 0, 4) << 60;
-                uint64_t const1 = bits(words[2], 4, 32) << 4 | (uint64_t) words[3] << 32;
+      uint64_t const0 = bits(words[0], 8, 32) << 4 | (uint64_t)words[1] << 28 |
+                        bits(words[2], 0, 4) << 60;
+      uint64_t const1 = bits(words[2], 4, 32) << 4 | (uint64_t)words[3] << 32;
 
-                /* Z-bit */
-                bool stop = tag & 0x40;
+      /* Z-bit */
+      bool stop = tag & 0x40;
 
-                if (verbose) {
-                        fprintf(fp, "# tag: 0x%02x\n", tag);
-                }
-                if (tag & 0x80) {
-                        /* Format 5 or 10 */
-                        unsigned idx = stop ? 5 : 2;
-                        main_instr.add_bits |= ((tag >> 3) & 0x7) << 17;
-                        instrs[idx + 1] = main_instr;
-                        instrs[idx].add_bits = bits(words[3], 0, 17) | ((tag & 0x7) << 17);
-                        instrs[idx].fma_bits |= bits(words[2], 19, 32) << 10;
-                        consts.raw[0] = bits(words[3], 17, 32) << 4;
-                } else {
-                        bool done = false;
-                        switch ((tag >> 3) & 0x7) {
-                        case 0x0:
-                                switch (tag & 0x7) {
-                                case 0x3:
-                                        /* Format 1 */
-                                        main_instr.add_bits |= bits(words[3], 29, 32) << 17;
-                                        instrs[1] = main_instr;
-                                        num_instrs = 2;
-                                        done = stop;
-                                        break;
-                                case 0x4:
-                                        /* Format 3 */
-                                        instrs[2].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
-                                        instrs[2].fma_bits |= bits(words[2], 19, 32) << 10;
-                                        consts.raw[0] = const0;
-                                        decode_M(&consts.mods[0], bits(words[2], 4, 8), bits(words[2], 8, 12), true);
-                                        num_instrs = 3;
-                                        num_consts = 1;
-                                        done = stop;
-                                        break;
-                                case 0x1:
-                                case 0x5:
-                                        /* Format 4 */
-                                        instrs[2].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
-                                        instrs[2].fma_bits |= bits(words[2], 19, 32) << 10;
-                                        main_instr.add_bits |= bits(words[3], 26, 29) << 17;
-                                        instrs[3] = main_instr;
-                                        if ((tag & 0x7) == 0x5) {
-                                                num_instrs = 4;
-                                                done = stop;
-                                        }
-                                        break;
-                                case 0x6:
-                                        /* Format 8 */
-                                        instrs[5].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
-                                        instrs[5].fma_bits |= bits(words[2], 19, 32) << 10;
-                                        consts.raw[0] = const0;
-                                        decode_M(&consts.mods[0], bits(words[2], 4, 8), bits(words[2], 8, 12), true);
-                                        num_instrs = 6;
-                                        num_consts = 1;
-                                        done = stop;
-                                        break;
-                                case 0x7:
-                                        /* Format 9 */
-                                        instrs[5].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
-                                        instrs[5].fma_bits |= bits(words[2], 19, 32) << 10;
-                                        main_instr.add_bits |= bits(words[3], 26, 29) << 17;
-                                        instrs[6] = main_instr;
-                                        num_instrs = 7;
-                                        done = stop;
-                                        break;
-                                default:
-                                        unreachable("[INSTR_INVALID_ENC] Invalid tag bits");
-                                }
-                                break;
-                        case 0x2:
-                        case 0x3: {
-                                /* Format 6 or 11 */
-                                unsigned idx = ((tag >> 3) & 0x7) == 2 ? 4 : 7;
-                                main_instr.add_bits |= (tag & 0x7) << 17;
-                                instrs[idx] = main_instr;
-                                consts.raw[0] |= (bits(words[2], 19, 32) | ((uint64_t) words[3] << 13)) << 19;
-                                num_consts = 1;
-                                num_instrs = idx + 1;
-                                done = stop;
-                                break;
-                        }
-                        case 0x4: {
-                                /* Format 2 */
-                                unsigned idx = stop ? 4 : 1;
-                                main_instr.add_bits |= (tag & 0x7) << 17;
-                                instrs[idx] = main_instr;
-                                instrs[idx + 1].fma_bits |= bits(words[3], 22, 32);
-                                instrs[idx + 1].reg_bits = bits(words[2], 19, 32) | (bits(words[3], 0, 22) << (32 - 19));
-                                break;
-                        }
-                        case 0x1:
-                                /* Format 0 - followed by constants */
-                                num_instrs = 1;
-                                done = stop;
-                                FALLTHROUGH;
-                        case 0x5:
-                                /* Format 0 - followed by instructions */
-                                header_bits = bits(words[2], 19, 32) | ((uint64_t) words[3] << (32 - 19));
-                                main_instr.add_bits |= (tag & 0x7) << 17;
-                                instrs[0] = main_instr;
-                                break;
-                        case 0x6:
-                        case 0x7: {
-                                /* Format 12 */
-                                unsigned pos = tag & 0xf;
+      if (verbose) {
+         fprintf(fp, "# tag: 0x%02x\n", tag);
+      }
+      if (tag & 0x80) {
+         /* Format 5 or 10 */
+         unsigned idx = stop ? 5 : 2;
+         main_instr.add_bits |= ((tag >> 3) & 0x7) << 17;
+         instrs[idx + 1] = main_instr;
+         instrs[idx].add_bits = bits(words[3], 0, 17) | ((tag & 0x7) << 17);
+         instrs[idx].fma_bits |= bits(words[2], 19, 32) << 10;
+         consts.raw[0] = bits(words[3], 17, 32) << 4;
+      } else {
+         bool done = false;
+         switch ((tag >> 3) & 0x7) {
+         case 0x0:
+            switch (tag & 0x7) {
+            case 0x3:
+               /* Format 1 */
+               main_instr.add_bits |= bits(words[3], 29, 32) << 17;
+               instrs[1] = main_instr;
+               num_instrs = 2;
+               done = stop;
+               break;
+            case 0x4:
+               /* Format 3 */
+               instrs[2].add_bits =
+                  bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
+               instrs[2].fma_bits |= bits(words[2], 19, 32) << 10;
+               consts.raw[0] = const0;
+               decode_M(&consts.mods[0], bits(words[2], 4, 8),
+                        bits(words[2], 8, 12), true);
+               num_instrs = 3;
+               num_consts = 1;
+               done = stop;
+               break;
+            case 0x1:
+            case 0x5:
+               /* Format 4 */
+               instrs[2].add_bits =
+                  bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
+               instrs[2].fma_bits |= bits(words[2], 19, 32) << 10;
+               main_instr.add_bits |= bits(words[3], 26, 29) << 17;
+               instrs[3] = main_instr;
+               if ((tag & 0x7) == 0x5) {
+                  num_instrs = 4;
+                  done = stop;
+               }
+               break;
+            case 0x6:
+               /* Format 8 */
+               instrs[5].add_bits =
+                  bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
+               instrs[5].fma_bits |= bits(words[2], 19, 32) << 10;
+               consts.raw[0] = const0;
+               decode_M(&consts.mods[0], bits(words[2], 4, 8),
+                        bits(words[2], 8, 12), true);
+               num_instrs = 6;
+               num_consts = 1;
+               done = stop;
+               break;
+            case 0x7:
+               /* Format 9 */
+               instrs[5].add_bits =
+                  bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
+               instrs[5].fma_bits |= bits(words[2], 19, 32) << 10;
+               main_instr.add_bits |= bits(words[3], 26, 29) << 17;
+               instrs[6] = main_instr;
+               num_instrs = 7;
+               done = stop;
+               break;
+            default:
+               unreachable("[INSTR_INVALID_ENC] Invalid tag bits");
+            }
+            break;
+         case 0x2:
+         case 0x3: {
+            /* Format 6 or 11 */
+            unsigned idx = ((tag >> 3) & 0x7) == 2 ? 4 : 7;
+            main_instr.add_bits |= (tag & 0x7) << 17;
+            instrs[idx] = main_instr;
+            consts.raw[0] |=
+               (bits(words[2], 19, 32) | ((uint64_t)words[3] << 13)) << 19;
+            num_consts = 1;
+            num_instrs = idx + 1;
+            done = stop;
+            break;
+         }
+         case 0x4: {
+            /* Format 2 */
+            unsigned idx = stop ? 4 : 1;
+            main_instr.add_bits |= (tag & 0x7) << 17;
+            instrs[idx] = main_instr;
+            instrs[idx + 1].fma_bits |= bits(words[3], 22, 32);
+            instrs[idx + 1].reg_bits =
+               bits(words[2], 19, 32) | (bits(words[3], 0, 22) << (32 - 19));
+            break;
+         }
+         case 0x1:
+            /* Format 0 - followed by constants */
+            num_instrs = 1;
+            done = stop;
+            FALLTHROUGH;
+         case 0x5:
+            /* Format 0 - followed by instructions */
+            header_bits =
+               bits(words[2], 19, 32) | ((uint64_t)words[3] << (32 - 19));
+            main_instr.add_bits |= (tag & 0x7) << 17;
+            instrs[0] = main_instr;
+            break;
+         case 0x6:
+         case 0x7: {
+            /* Format 12 */
+            unsigned pos = tag & 0xf;
 
-                                struct {
-                                        unsigned const_idx;
-                                        unsigned nr_tuples;
-                                } pos_table[0x10] = {
-                                        { 0, 1 },
-                                        { 0, 2 },
-                                        { 0, 4 },
-                                        { 1, 3 },
-                                        { 1, 5 },
-                                        { 2, 4 },
-                                        { 0, 7 },
-                                        { 1, 6 },
-                                        { 3, 5 },
-                                        { 1, 8 },
-                                        { 2, 7 },
-                                        { 3, 6 },
-                                        { 3, 8 },
-                                        { 4, 7 },
-                                        { 5, 6 },
-                                        { ~0, ~0 }
-                                };
+            struct {
+               unsigned const_idx;
+               unsigned nr_tuples;
+            } pos_table[0x10] = {{0, 1}, {0, 2}, {0, 4}, {1, 3},
+                                 {1, 5}, {2, 4}, {0, 7}, {1, 6},
+                                 {3, 5}, {1, 8}, {2, 7}, {3, 6},
+                                 {3, 8}, {4, 7}, {5, 6}, {~0, ~0}};
 
-                                ASSERTED bool valid_count = pos_table[pos].nr_tuples == num_instrs;
-                                assert(valid_count && "INSTR_INVALID_ENC");
+            ASSERTED bool valid_count = pos_table[pos].nr_tuples == num_instrs;
+            assert(valid_count && "INSTR_INVALID_ENC");
 
-                                unsigned const_idx = pos_table[pos].const_idx;
+            unsigned const_idx = pos_table[pos].const_idx;
 
-                                if (num_consts < const_idx + 2)
-                                        num_consts = const_idx + 2;
+            if (num_consts < const_idx + 2)
+               num_consts = const_idx + 2;
 
-                                consts.raw[const_idx] = const0;
-                                consts.raw[const_idx + 1] = const1;
+            consts.raw[const_idx] = const0;
+            consts.raw[const_idx + 1] = const1;
 
-                                /* Calculate M values from A, B and 4-bit
-                                 * unsigned arithmetic. Mathematically it
-                                 * should be (A - B) % 16 but we use this
-                                 * alternate form to avoid sign issues */
+            /* Calculate M values from A, B and 4-bit
+             * unsigned arithmetic. Mathematically it
+             * should be (A - B) % 16 but we use this
+             * alternate form to avoid sign issues */
 
-                                unsigned A1 = bits(words[2], 0, 4);
-                                unsigned B1 = bits(words[3], 28, 32);
-                                unsigned A2 = bits(words[1], 0, 4);
-                                unsigned B2 = bits(words[2], 28, 32);
+            unsigned A1 = bits(words[2], 0, 4);
+            unsigned B1 = bits(words[3], 28, 32);
+            unsigned A2 = bits(words[1], 0, 4);
+            unsigned B2 = bits(words[2], 28, 32);
 
-                                unsigned M1 = (16 + A1 - B1) & 0xF;
-                                unsigned M2 = (16 + A2 - B2) & 0xF;
+            unsigned M1 = (16 + A1 - B1) & 0xF;
+            unsigned M2 = (16 + A2 - B2) & 0xF;
 
-                                decode_M(&consts.mods[const_idx], M1, M2, false);
+            decode_M(&consts.mods[const_idx], M1, M2, false);
 
-                                done = stop;
-                                break;
-                        }
-                        default:
-                                break;
-                        }
+            done = stop;
+            break;
+         }
+         default:
+            break;
+         }
 
-                        if (done)
-                                break;
-                }
-        }
+         if (done)
+            break;
+      }
+   }
 
-        *size = i + 1;
+   *size = i + 1;
 
-        if (verbose) {
-                fprintf(fp, "# header: %012" PRIx64 "\n", header_bits);
-        }
+   if (verbose) {
+      fprintf(fp, "# header: %012" PRIx64 "\n", header_bits);
+   }
 
-        struct bifrost_header header;
-        memcpy((char *) &header, (char *) &header_bits, sizeof(struct bifrost_header));
-        dump_header(fp, header, verbose);
+   struct bifrost_header header;
+   memcpy((char *)&header, (char *)&header_bits, sizeof(struct bifrost_header));
+   dump_header(fp, header, verbose);
 
-        fprintf(fp, "{\n");
-        for (i = 0; i < num_instrs; i++) {
-                struct bifrost_regs regs, next_regs;
-                if (i + 1 == num_instrs) {
-                        memcpy((char *) &next_regs, (char *) &instrs[0].reg_bits,
-                               sizeof(next_regs));
-                } else {
-                        memcpy((char *) &next_regs, (char *) &instrs[i + 1].reg_bits,
-                               sizeof(next_regs));
-                }
+   fprintf(fp, "{\n");
+   for (i = 0; i < num_instrs; i++) {
+      struct bifrost_regs regs, next_regs;
+      if (i + 1 == num_instrs) {
+         memcpy((char *)&next_regs, (char *)&instrs[0].reg_bits,
+                sizeof(next_regs));
+      } else {
+         memcpy((char *)&next_regs, (char *)&instrs[i + 1].reg_bits,
+                sizeof(next_regs));
+      }
 
-                memcpy((char *) &regs, (char *) &instrs[i].reg_bits, sizeof(regs));
+      memcpy((char *)&regs, (char *)&instrs[i].reg_bits, sizeof(regs));
 
-                if (verbose) {
-                        fprintf(fp, "    # regs: %016" PRIx64 "\n", instrs[i].reg_bits);
-                        dump_regs(fp, regs, i == 0);
-                }
+      if (verbose) {
+         fprintf(fp, "    # regs: %016" PRIx64 "\n", instrs[i].reg_bits);
+         dump_regs(fp, regs, i == 0);
+      }
 
-                bi_disasm_fma(fp, instrs[i].fma_bits, &regs, &next_regs,
-                                header.staging_register, offset, &consts,
-                                i + 1 == num_instrs);
+      bi_disasm_fma(fp, instrs[i].fma_bits, &regs, &next_regs,
+                    header.staging_register, offset, &consts,
+                    i + 1 == num_instrs);
 
-                bi_disasm_add(fp, instrs[i].add_bits, &regs, &next_regs,
-                                header.staging_register, offset, &consts,
-                                i + 1 == num_instrs);
-        }
-        fprintf(fp, "}\n");
+      bi_disasm_add(fp, instrs[i].add_bits, &regs, &next_regs,
+                    header.staging_register, offset, &consts,
+                    i + 1 == num_instrs);
+   }
+   fprintf(fp, "}\n");
 
-        if (verbose) {
-                for (unsigned i = 0; i < num_consts; i++) {
-                        fprintf(fp, "# const%d: %08" PRIx64 "\n", 2 * i, consts.raw[i] & 0xffffffff);
-                        fprintf(fp, "# const%d: %08" PRIx64 "\n", 2 * i + 1, consts.raw[i] >> 32);
-                }
-        }
+   if (verbose) {
+      for (unsigned i = 0; i < num_consts; i++) {
+         fprintf(fp, "# const%d: %08" PRIx64 "\n", 2 * i,
+                 consts.raw[i] & 0xffffffff);
+         fprintf(fp, "# const%d: %08" PRIx64 "\n", 2 * i + 1,
+                 consts.raw[i] >> 32);
+      }
+   }
 
-        fprintf(fp, "\n");
-        return;
+   fprintf(fp, "\n");
+   return;
 }
 
-void disassemble_bifrost(FILE *fp, uint8_t *code, size_t size, bool verbose)
+void
+disassemble_bifrost(FILE *fp, uint8_t *code, size_t size, bool verbose)
 {
-        uint32_t *words = (uint32_t *) code;
-        uint32_t *words_end = words + (size / 4);
-        // used for displaying branch targets
-        unsigned offset = 0;
-        while (words != words_end) {
-                /* Shaders have zero bytes at the end for padding; stop
-                 * disassembling when we hit them. */
-                if (*words == 0)
-                        break;
+   uint32_t *words = (uint32_t *)code;
+   uint32_t *words_end = words + (size / 4);
+   // used for displaying branch targets
+   unsigned offset = 0;
+   while (words != words_end) {
+      /* Shaders have zero bytes at the end for padding; stop
+       * disassembling when we hit them. */
+      if (*words == 0)
+         break;
 
-                fprintf(fp, "clause_%u:\n", offset);
+      fprintf(fp, "clause_%u:\n", offset);
 
-                unsigned size;
-                dump_clause(fp, words, &size, offset, verbose);
+      unsigned size;
+      dump_clause(fp, words, &size, offset, verbose);
 
-                words += size * 4;
-                offset += size;
-        }
+      words += size * 4;
+      offset += size;
+   }
 }
-
diff --git a/src/panfrost/bifrost/disassemble.h b/src/panfrost/bifrost/disassemble.h
index 1e39c20d658..bf023a732a4 100644
--- a/src/panfrost/bifrost/disassemble.h
+++ b/src/panfrost/bifrost/disassemble.h
@@ -34,14 +34,20 @@
 
 void disassemble_bifrost(FILE *fp, uint8_t *code, size_t size, bool verbose);
 
-void
-bi_disasm_fma(FILE *fp, unsigned bits, struct bifrost_regs *srcs, struct bifrost_regs *next_regs, unsigned staging_register, unsigned branch_offset, struct bi_constants *consts, bool first);
+void bi_disasm_fma(FILE *fp, unsigned bits, struct bifrost_regs *srcs,
+                   struct bifrost_regs *next_regs, unsigned staging_register,
+                   unsigned branch_offset, struct bi_constants *consts,
+                   bool first);
 
-void bi_disasm_add(FILE *fp, unsigned bits, struct bifrost_regs *srcs, struct bifrost_regs *next_regs, unsigned staging_register, unsigned branch_offset, struct bi_constants *consts, bool first);
+void bi_disasm_add(FILE *fp, unsigned bits, struct bifrost_regs *srcs,
+                   struct bifrost_regs *next_regs, unsigned staging_register,
+                   unsigned branch_offset, struct bi_constants *consts,
+                   bool first);
 
 void bi_disasm_dest_fma(FILE *fp, struct bifrost_regs *next_regs, bool first);
 void bi_disasm_dest_add(FILE *fp, struct bifrost_regs *next_regs, bool first);
 
-void dump_src(FILE *fp, unsigned src, struct bifrost_regs srcs, unsigned branch_offset, struct bi_constants *consts, bool isFMA);
+void dump_src(FILE *fp, unsigned src, struct bifrost_regs srcs,
+              unsigned branch_offset, struct bi_constants *consts, bool isFMA);
 
 #endif
diff --git a/src/panfrost/bifrost/nodearray.h b/src/panfrost/bifrost/nodearray.h
index ed852e0c56d..40ca43def60 100644
--- a/src/panfrost/bifrost/nodearray.h
+++ b/src/panfrost/bifrost/nodearray.h
@@ -62,182 +62,187 @@ typedef uint16_t nodearray_value;
 typedef uint64_t nodearray_sparse;
 
 typedef struct {
-        union {
-                nodearray_sparse *sparse;
-                nodearray_value *dense;
-        };
-        unsigned size;
-        unsigned sparse_capacity;
+   union {
+      nodearray_sparse *sparse;
+      nodearray_value *dense;
+   };
+   unsigned size;
+   unsigned sparse_capacity;
 } nodearray;
 
 /* Align sizes to 16-bytes for SIMD purposes */
 #define NODEARRAY_DENSE_ALIGN(x) ALIGN_POT(x, 16)
 
-#define nodearray_sparse_foreach(buf, elem) \
-   for (nodearray_sparse *elem = (buf)->sparse; \
+#define nodearray_sparse_foreach(buf, elem)                                    \
+   for (nodearray_sparse *elem = (buf)->sparse;                                \
         elem < (buf)->sparse + (buf)->size; elem++)
 
-#define nodearray_dense_foreach(buf, elem) \
-   for (nodearray_value *elem = (buf)->dense; \
+#define nodearray_dense_foreach(buf, elem)                                     \
+   for (nodearray_value *elem = (buf)->dense;                                  \
         elem < (buf)->dense + (buf)->size; elem++)
 
-#define nodearray_dense_foreach_64(buf, elem) \
-   for (uint64_t *elem = (uint64_t *)(buf)->dense; \
+#define nodearray_dense_foreach_64(buf, elem)                                  \
+   for (uint64_t *elem = (uint64_t *)(buf)->dense;                             \
         (nodearray_value *)elem < (buf)->dense + (buf)->size; elem++)
 
 static inline bool
 nodearray_is_sparse(const nodearray *a)
 {
-        return a->sparse_capacity != ~0U;
+   return a->sparse_capacity != ~0U;
 }
 
 static inline void
 nodearray_init(nodearray *a)
 {
-        memset(a, 0, sizeof(nodearray));
+   memset(a, 0, sizeof(nodearray));
 }
 
 static inline void
 nodearray_reset(nodearray *a)
 {
-        free(a->sparse);
-        nodearray_init(a);
+   free(a->sparse);
+   nodearray_init(a);
 }
 
 static inline nodearray_sparse
 nodearray_encode(unsigned key, nodearray_value value)
 {
-        static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch");
-        return ((nodearray_sparse) key << 16) | value;
+   static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch");
+   return ((nodearray_sparse)key << 16) | value;
 }
 
 static inline unsigned
 nodearray_sparse_key(const nodearray_sparse *elem)
 {
-        static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch");
-        return *elem >> 16;
+   static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch");
+   return *elem >> 16;
 }
 
 static inline nodearray_value
 nodearray_sparse_value(const nodearray_sparse *elem)
 {
-        return *elem & NODEARRAY_MAX_VALUE;
+   return *elem & NODEARRAY_MAX_VALUE;
 }
 
 static inline unsigned
-nodearray_sparse_search(const nodearray *a, nodearray_sparse key, nodearray_sparse **elem)
+nodearray_sparse_search(const nodearray *a, nodearray_sparse key,
+                        nodearray_sparse **elem)
 {
-        assert(nodearray_is_sparse(a) && a->size);
+   assert(nodearray_is_sparse(a) && a->size);
 
-        nodearray_sparse *data = a->sparse;
+   nodearray_sparse *data = a->sparse;
 
-        /* Encode the key using the highest possible value, so that the
-         * matching node must be encoded lower than this
-         */
-        nodearray_sparse skey = nodearray_encode(key, NODEARRAY_MAX_VALUE);
+   /* Encode the key using the highest possible value, so that the
+    * matching node must be encoded lower than this
+    */
+   nodearray_sparse skey = nodearray_encode(key, NODEARRAY_MAX_VALUE);
 
-        unsigned left = 0;
-        unsigned right = a->size - 1;
+   unsigned left = 0;
+   unsigned right = a->size - 1;
 
-        if (data[right] <= skey)
-                left = right;
+   if (data[right] <= skey)
+      left = right;
 
-        while (left != right) {
-                /* No need to worry about overflow, we couldn't have more than
-                 * 2^24 elements */
-                unsigned probe = (left + right + 1) / 2;
+   while (left != right) {
+      /* No need to worry about overflow, we couldn't have more than
+       * 2^24 elements */
+      unsigned probe = (left + right + 1) / 2;
 
-                if (data[probe] > skey)
-                        right = probe - 1;
-                else
-                        left = probe;
-        }
+      if (data[probe] > skey)
+         right = probe - 1;
+      else
+         left = probe;
+   }
 
-        *elem = data + left;
-        return left;
+   *elem = data + left;
+   return left;
 }
 
 static inline void
 nodearray_orr(nodearray *a, unsigned key, nodearray_value value,
               unsigned max_sparse, unsigned max)
 {
-        assert(key < (1 << 24));
-        assert(key < max);
+   assert(key < (1 << 24));
+   assert(key < max);
 
-        if (!value)
-                return;
+   if (!value)
+      return;
 
-        if (nodearray_is_sparse(a)) {
-                unsigned size = a->size;
-                unsigned left = 0;
+   if (nodearray_is_sparse(a)) {
+      unsigned size = a->size;
+      unsigned left = 0;
 
-                if (size) {
-                        /* First, binary search for key */
-                        nodearray_sparse *elem;
-                        left = nodearray_sparse_search(a, key, &elem);
+      if (size) {
+         /* First, binary search for key */
+         nodearray_sparse *elem;
+         left = nodearray_sparse_search(a, key, &elem);
 
-                        if (nodearray_sparse_key(elem) == key) {
-                                *elem |= value;
-                                return;
-                        }
+         if (nodearray_sparse_key(elem) == key) {
+            *elem |= value;
+            return;
+         }
 
-                        /* We insert before `left`, so increment it if it's
-                         * out of order */
-                        if (nodearray_sparse_key(elem) < key)
-                                ++left;
-                }
+         /* We insert before `left`, so increment it if it's
+          * out of order */
+         if (nodearray_sparse_key(elem) < key)
+            ++left;
+      }
 
-                if (size < max_sparse && (size + 1) < max / 4) {
-                        /* We didn't find it, but we know where to insert it. */
+      if (size < max_sparse && (size + 1) < max / 4) {
+         /* We didn't find it, but we know where to insert it. */
 
-                        nodearray_sparse *data = a->sparse;
-                        nodearray_sparse *data_move = data + left;
+         nodearray_sparse *data = a->sparse;
+         nodearray_sparse *data_move = data + left;
 
-                        bool realloc = (++a->size) > a->sparse_capacity;
+         bool realloc = (++a->size) > a->sparse_capacity;
 
-                        if (realloc) {
-                                a->sparse_capacity = MIN2(MAX2(a->sparse_capacity * 2, 64), max / 4);
+         if (realloc) {
+            a->sparse_capacity =
+               MIN2(MAX2(a->sparse_capacity * 2, 64), max / 4);
 
-                                a->sparse = (nodearray_sparse *)malloc(a->sparse_capacity * sizeof(nodearray_sparse));
+            a->sparse = (nodearray_sparse *)malloc(a->sparse_capacity *
+                                                   sizeof(nodearray_sparse));
 
-                                if (left)
-                                        memcpy(a->sparse, data, left * sizeof(nodearray_sparse));
-                        }
+            if (left)
+               memcpy(a->sparse, data, left * sizeof(nodearray_sparse));
+         }
 
-                        nodearray_sparse *elem = a->sparse + left;
+         nodearray_sparse *elem = a->sparse + left;
 
-                        if (left != size)
-                                memmove(elem + 1, data_move, (size - left) * sizeof(nodearray_sparse));
+         if (left != size)
+            memmove(elem + 1, data_move,
+                    (size - left) * sizeof(nodearray_sparse));
 
-                        *elem = nodearray_encode(key, value);
+         *elem = nodearray_encode(key, value);
 
-                        if (realloc)
-                                free(data);
+         if (realloc)
+            free(data);
 
-                        return;
-                }
+         return;
+      }
 
-                /* There are too many elements, so convert to a dense array */
-                nodearray old = *a;
+      /* There are too many elements, so convert to a dense array */
+      nodearray old = *a;
 
-                a->dense = (nodearray_value *)calloc(NODEARRAY_DENSE_ALIGN(max), sizeof(nodearray_value));
-                a->size = max;
-                a->sparse_capacity = ~0U;
+      a->dense = (nodearray_value *)calloc(NODEARRAY_DENSE_ALIGN(max),
+                                           sizeof(nodearray_value));
+      a->size = max;
+      a->sparse_capacity = ~0U;
 
-                nodearray_value *data = a->dense;
+      nodearray_value *data = a->dense;
 
-                nodearray_sparse_foreach(&old, x) {
-                        unsigned key = nodearray_sparse_key(x);
-                        nodearray_value value = nodearray_sparse_value(x);
+      nodearray_sparse_foreach(&old, x) {
+         unsigned key = nodearray_sparse_key(x);
+         nodearray_value value = nodearray_sparse_value(x);
 
-                        assert(key < max);
-                        data[key] = value;
-                }
+         assert(key < max);
+         data[key] = value;
+      }
 
-                free(old.sparse);
-        }
+      free(old.sparse);
+   }
 
-        a->dense[key] |= value;
+   a->dense[key] |= value;
 }
 
 #ifdef __cplusplus
diff --git a/src/panfrost/bifrost/test/test-constant-fold.cpp b/src/panfrost/bifrost/test/test-constant-fold.cpp
index 90a63862ee1..1e7034f6864 100644
--- a/src/panfrost/bifrost/test/test-constant-fold.cpp
+++ b/src/panfrost/bifrost/test/test-constant-fold.cpp
@@ -21,14 +21,15 @@
  * SOFTWARE.
  */
 
-#include "compiler.h"
-#include "bi_test.h"
 #include "bi_builder.h"
+#include "bi_test.h"
+#include "compiler.h"
 
 #include <gtest/gtest.h>
 
 static std::string
-to_string(const bi_instr *I) {
+to_string(const bi_instr *I)
+{
    char *cstr = NULL;
    size_t size = 0;
    FILE *f = open_memstream(&cstr, &size);
@@ -40,23 +41,21 @@ to_string(const bi_instr *I) {
 }
 
 static testing::AssertionResult
-constant_fold_pred(const char *I_expr,
-                   const char *expected_expr,
-                   bi_instr *I,
+constant_fold_pred(const char *I_expr, const char *expected_expr, bi_instr *I,
                    uint32_t expected)
 {
    bool unsupported = false;
    uint32_t v = bi_fold_constant(I, &unsupported);
    if (unsupported) {
       return testing::AssertionFailure()
-         << "Constant fold unsupported for instruction \n\n"
-         << "  " << to_string(I);
+             << "Constant fold unsupported for instruction \n\n"
+             << "  " << to_string(I);
    } else if (v != expected) {
       return testing::AssertionFailure()
-         << "Unexpected result when constant folding instruction\n\n"
-         << "  " << to_string(I) << "\n"
-         << "  Actual: " << v << "\n"
-         << "Expected: " << expected << "\n";
+             << "Unexpected result when constant folding instruction\n\n"
+             << "  " << to_string(I) << "\n"
+             << "  Actual: " << v << "\n"
+             << "Expected: " << expected << "\n";
    } else {
       return testing::AssertionSuccess();
    }
@@ -64,7 +63,6 @@ constant_fold_pred(const char *I_expr,
 
 #define EXPECT_FOLD(i, e) EXPECT_PRED_FORMAT2(constant_fold_pred, i, e)
 
-
 static testing::AssertionResult
 not_constant_fold_pred(const char *I_expr, bi_instr *I)
 {
@@ -74,22 +72,23 @@ not_constant_fold_pred(const char *I_expr, bi_instr *I)
       return testing::AssertionSuccess();
    } else {
       return testing::AssertionFailure()
-         << "Instruction\n\n"
-         << "  " << to_string(I) << "\n"
-         << "shouldn't have constant folded, but folded to: " << v;
+             << "Instruction\n\n"
+             << "  " << to_string(I) << "\n"
+             << "shouldn't have constant folded, but folded to: " << v;
    }
 }
 
 #define EXPECT_NOT_FOLD(i) EXPECT_PRED_FORMAT1(not_constant_fold_pred, i)
 
-
 class ConstantFold : public testing::Test {
-protected:
-   ConstantFold() {
+ protected:
+   ConstantFold()
+   {
       mem_ctx = ralloc_context(NULL);
       b = bit_builder(mem_ctx);
    }
-   ~ConstantFold() {
+   ~ConstantFold()
+   {
       ralloc_free(mem_ctx);
    }
 
@@ -101,9 +100,7 @@ TEST_F(ConstantFold, Swizzles)
 {
    bi_index reg = bi_register(0);
 
-   EXPECT_FOLD(
-      bi_swz_v2i16_to(b, reg, bi_imm_u32(0xCAFEBABE)),
-      0xCAFEBABE);
+   EXPECT_FOLD(bi_swz_v2i16_to(b, reg, bi_imm_u32(0xCAFEBABE)), 0xCAFEBABE);
 
    EXPECT_FOLD(
       bi_swz_v2i16_to(b, reg, bi_swz_16(bi_imm_u32(0xCAFEBABE), false, false)),
@@ -123,18 +120,17 @@ TEST_F(ConstantFold, VectorConstructions2i16)
    bi_index reg = bi_register(0);
 
    EXPECT_FOLD(
-      bi_mkvec_v2i16_to(b, reg, bi_imm_u16(0xCAFE),
-                                bi_imm_u16(0xBABE)),
+      bi_mkvec_v2i16_to(b, reg, bi_imm_u16(0xCAFE), bi_imm_u16(0xBABE)),
       0xBABECAFE);
 
    EXPECT_FOLD(
       bi_mkvec_v2i16_to(b, reg, bi_swz_16(bi_imm_u32(0xCAFEBABE), true, true),
-                                bi_imm_u16(0xBABE)),
+                        bi_imm_u16(0xBABE)),
       0xBABECAFE);
 
    EXPECT_FOLD(
       bi_mkvec_v2i16_to(b, reg, bi_swz_16(bi_imm_u32(0xCAFEBABE), true, true),
-                                bi_swz_16(bi_imm_u32(0xCAFEBABE), false, false)),
+                        bi_swz_16(bi_imm_u32(0xCAFEBABE), false, false)),
       0xBABECAFE);
 }
 
@@ -173,17 +169,18 @@ TEST_F(ConstantFold, LimitedShiftsForTexturing)
 {
    bi_index reg = bi_register(0);
 
-   EXPECT_FOLD(
-      bi_lshift_or_i32_to(b, reg, bi_imm_u32(0xCAFE), bi_imm_u32(0xA0000), bi_imm_u8(4)),
-      (0xCAFE << 4) | 0xA0000);
+   EXPECT_FOLD(bi_lshift_or_i32_to(b, reg, bi_imm_u32(0xCAFE),
+                                   bi_imm_u32(0xA0000), bi_imm_u8(4)),
+               (0xCAFE << 4) | 0xA0000);
 
-   EXPECT_NOT_FOLD(
-      bi_lshift_or_i32_to(b, reg, bi_imm_u32(0xCAFE), bi_not(bi_imm_u32(0xA0000)), bi_imm_u8(4)));
+   EXPECT_NOT_FOLD(bi_lshift_or_i32_to(
+      b, reg, bi_imm_u32(0xCAFE), bi_not(bi_imm_u32(0xA0000)), bi_imm_u8(4)));
 
-   EXPECT_NOT_FOLD(
-      bi_lshift_or_i32_to(b, reg, bi_not(bi_imm_u32(0xCAFE)), bi_imm_u32(0xA0000), bi_imm_u8(4)));
+   EXPECT_NOT_FOLD(bi_lshift_or_i32_to(b, reg, bi_not(bi_imm_u32(0xCAFE)),
+                                       bi_imm_u32(0xA0000), bi_imm_u8(4)));
 
-   bi_instr *I = bi_lshift_or_i32_to(b, reg, bi_imm_u32(0xCAFE), bi_imm_u32(0xA0000), bi_imm_u8(4));
+   bi_instr *I = bi_lshift_or_i32_to(b, reg, bi_imm_u32(0xCAFE),
+                                     bi_imm_u32(0xA0000), bi_imm_u8(4));
    I->not_result = true;
    EXPECT_NOT_FOLD(I);
 }
@@ -193,9 +190,12 @@ TEST_F(ConstantFold, NonConstantSourcesCannotBeFolded)
    bi_index reg = bi_register(0);
 
    EXPECT_NOT_FOLD(bi_swz_v2i16_to(b, reg, bi_temp(b->shader)));
-   EXPECT_NOT_FOLD(bi_mkvec_v2i16_to(b, reg, bi_temp(b->shader), bi_temp(b->shader)));
-   EXPECT_NOT_FOLD(bi_mkvec_v2i16_to(b, reg, bi_temp(b->shader), bi_imm_u32(0xDEADBEEF)));
-   EXPECT_NOT_FOLD(bi_mkvec_v2i16_to(b, reg, bi_imm_u32(0xDEADBEEF), bi_temp(b->shader)));
+   EXPECT_NOT_FOLD(
+      bi_mkvec_v2i16_to(b, reg, bi_temp(b->shader), bi_temp(b->shader)));
+   EXPECT_NOT_FOLD(
+      bi_mkvec_v2i16_to(b, reg, bi_temp(b->shader), bi_imm_u32(0xDEADBEEF)));
+   EXPECT_NOT_FOLD(
+      bi_mkvec_v2i16_to(b, reg, bi_imm_u32(0xDEADBEEF), bi_temp(b->shader)));
 }
 
 TEST_F(ConstantFold, OtherOperationsShouldNotFold)
diff --git a/src/panfrost/bifrost/test/test-dual-texture.cpp b/src/panfrost/bifrost/test/test-dual-texture.cpp
index aa364aa5bcf..25f22e02889 100644
--- a/src/panfrost/bifrost/test/test-dual-texture.cpp
+++ b/src/panfrost/bifrost/test/test-dual-texture.cpp
@@ -21,55 +21,57 @@
  * SOFTWARE.
  */
 
-#include "compiler.h"
-#include "bi_test.h"
 #include "bi_builder.h"
+#include "bi_test.h"
+#include "compiler.h"
 
 #include <gtest/gtest.h>
 
-#define CASE(shader_stage, instr, expected) do { \
-   bi_builder *A = bit_builder(mem_ctx); \
-   bi_builder *B = bit_builder(mem_ctx); \
-   { \
-      bi_builder *b = A; \
-      bi_index u = bi_temp(b->shader); \
-      bi_index v = bi_temp(b->shader); \
-      A->shader->stage = MESA_SHADER_ ## shader_stage; \
-      instr; \
-   } \
-   { \
-      bi_builder *b = B; \
-      bi_index u = bi_temp(b->shader); \
-      bi_index v = bi_temp(b->shader); \
-      B->shader->stage = MESA_SHADER_ ## shader_stage; \
-      expected; \
-   } \
-   bi_opt_fuse_dual_texture(A->shader); \
-   if (!bit_shader_equal(A->shader, B->shader)) { \
-      ADD_FAILURE(); \
-      fprintf(stderr, "Optimization produce unexpected result"); \
-      fprintf(stderr, "  Actual:\n"); \
-      bi_print_shader(A->shader, stderr); \
-      fprintf(stderr, "Expected:\n"); \
-      bi_print_shader(B->shader, stderr); \
-      fprintf(stderr, "\n"); \
-   } \
-} while(0)
+#define CASE(shader_stage, instr, expected)                                    \
+   do {                                                                        \
+      bi_builder *A = bit_builder(mem_ctx);                                    \
+      bi_builder *B = bit_builder(mem_ctx);                                    \
+      {                                                                        \
+         bi_builder *b = A;                                                    \
+         bi_index u = bi_temp(b->shader);                                      \
+         bi_index v = bi_temp(b->shader);                                      \
+         A->shader->stage = MESA_SHADER_##shader_stage;                        \
+         instr;                                                                \
+      }                                                                        \
+      {                                                                        \
+         bi_builder *b = B;                                                    \
+         bi_index u = bi_temp(b->shader);                                      \
+         bi_index v = bi_temp(b->shader);                                      \
+         B->shader->stage = MESA_SHADER_##shader_stage;                        \
+         expected;                                                             \
+      }                                                                        \
+      bi_opt_fuse_dual_texture(A->shader);                                     \
+      if (!bit_shader_equal(A->shader, B->shader)) {                           \
+         ADD_FAILURE();                                                        \
+         fprintf(stderr, "Optimization produce unexpected result");            \
+         fprintf(stderr, "  Actual:\n");                                       \
+         bi_print_shader(A->shader, stderr);                                   \
+         fprintf(stderr, "Expected:\n");                                       \
+         bi_print_shader(B->shader, stderr);                                   \
+         fprintf(stderr, "\n");                                                \
+      }                                                                        \
+   } while (0)
 
 #define NEGCASE(stage, instr) CASE(stage, instr, instr)
 
 class DualTexture : public testing::Test {
-protected:
-   DualTexture() {
+ protected:
+   DualTexture()
+   {
       mem_ctx = ralloc_context(NULL);
 
-      reg     = bi_register(0);
-      x       = bi_register(4);
-      y       = bi_register(8);
-
+      reg = bi_register(0);
+      x = bi_register(4);
+      y = bi_register(8);
    }
 
-   ~DualTexture() {
+   ~DualTexture()
+   {
       ralloc_free(mem_ctx);
    }
 
@@ -78,134 +80,165 @@ protected:
    bi_index reg, x, y;
 };
 
-
 TEST_F(DualTexture, FuseDualTexFragment)
 {
-   CASE(FRAGMENT, {
+   CASE(
+      FRAGMENT,
+      {
          bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
          bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
-   }, {
-         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F00144), false, 4, 4);
-   });
+      },
+      {
+         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F00144),
+                         false, 4, 4);
+      });
 }
 
 TEST_F(DualTexture, FuseDualTexKernel)
 {
-   CASE(KERNEL, {
+   CASE(
+      KERNEL,
+      {
          bi_texs_2d_f32_to(b, x, u, v, true, 0, 0);
          bi_texs_2d_f32_to(b, y, u, v, true, 1, 1);
-   }, {
-         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F00144), true, 4, 4);
-   });
+      },
+      {
+         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F00144), true,
+                         4, 4);
+      });
 }
 
 TEST_F(DualTexture, FuseDualTexVertex)
 {
-   CASE(VERTEX, {
+   CASE(
+      VERTEX,
+      {
          bi_texs_2d_f32_to(b, x, u, v, true, 0, 0);
          bi_texs_2d_f32_to(b, y, u, v, true, 1, 1);
-   }, {
-         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F00144), true, 4, 4);
-   });
+      },
+      {
+         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F00144), true,
+                         4, 4);
+      });
 }
 
 TEST_F(DualTexture, DontFuseDualTexWrongStage)
 {
    NEGCASE(FRAGMENT, {
-         bi_texs_2d_f32_to(b, x, u, v, true, 0, 0);
-         bi_texs_2d_f32_to(b, y, u, v, true, 1, 1);
+      bi_texs_2d_f32_to(b, x, u, v, true, 0, 0);
+      bi_texs_2d_f32_to(b, y, u, v, true, 1, 1);
    });
 
    NEGCASE(KERNEL, {
-         bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
-         bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
+      bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
+      bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
    });
 
    NEGCASE(VERTEX, {
-         bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
-         bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
+      bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
+      bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
    });
 }
 
 TEST_F(DualTexture, FuseDualTexMaximumIndex)
 {
-   CASE(FRAGMENT, {
+   CASE(
+      FRAGMENT,
+      {
          bi_texs_2d_f32_to(b, x, u, v, false, 2, 2);
          bi_texs_2d_f32_to(b, y, u, v, false, 3, 3);
-   }, {
-         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F003E6), false, 4, 4);
-   });
+      },
+      {
+         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F003E6),
+                         false, 4, 4);
+      });
 }
 
 TEST_F(DualTexture, FuseDualTexMixedIndex)
 {
-   CASE(FRAGMENT, {
+   CASE(
+      FRAGMENT,
+      {
          bi_texs_2d_f32_to(b, x, u, v, false, 3, 2);
          bi_texs_2d_f32_to(b, y, u, v, false, 2, 3);
-   }, {
-         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F003A7), false, 4, 4);
-   });
+      },
+      {
+         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F003A7),
+                         false, 4, 4);
+      });
 }
 
 TEST_F(DualTexture, DontFuseDualTexOutOfBounds)
 {
    NEGCASE(FRAGMENT, {
-         bi_texs_2d_f32_to(b, x, u, v, false, 4, 0);
-         bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
+      bi_texs_2d_f32_to(b, x, u, v, false, 4, 0);
+      bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
    });
 
    NEGCASE(FRAGMENT, {
-         bi_texs_2d_f32_to(b, x, u, v, false, 0, 4);
-         bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
+      bi_texs_2d_f32_to(b, x, u, v, false, 0, 4);
+      bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
    });
 
    NEGCASE(FRAGMENT, {
-         bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
-         bi_texs_2d_f32_to(b, y, u, v, false, 4, 1);
+      bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
+      bi_texs_2d_f32_to(b, y, u, v, false, 4, 1);
    });
 
    NEGCASE(FRAGMENT, {
-         bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
-         bi_texs_2d_f32_to(b, y, u, v, false, 1, 4);
+      bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
+      bi_texs_2d_f32_to(b, y, u, v, false, 1, 4);
    });
 }
 
 TEST_F(DualTexture, FuseDualTexFP16)
 {
-   CASE(FRAGMENT, {
+   CASE(
+      FRAGMENT,
+      {
          bi_texs_2d_f16_to(b, x, u, v, false, 0, 0);
          bi_texs_2d_f16_to(b, y, u, v, false, 1, 1);
-   }, {
-         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF1E00144), false, 2, 2);
-   });
+      },
+      {
+         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF1E00144),
+                         false, 2, 2);
+      });
 }
 
 TEST_F(DualTexture, FuseDualTexMixedSize)
 {
-   CASE(FRAGMENT, {
+   CASE(
+      FRAGMENT,
+      {
          bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
          bi_texs_2d_f16_to(b, y, u, v, false, 1, 1);
-   }, {
-         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0XF9E00144), false, 4, 2);
-   });
+      },
+      {
+         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0XF9E00144),
+                         false, 4, 2);
+      });
 
-   CASE(FRAGMENT, {
+   CASE(
+      FRAGMENT,
+      {
          bi_texs_2d_f16_to(b, x, u, v, false, 0, 0);
          bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
-   }, {
-         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF1F00144), false, 2, 4);
-   });
+      },
+      {
+         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF1F00144),
+                         false, 2, 4);
+      });
 }
 
 TEST_F(DualTexture, DontFuseMixedCoordinates)
 {
    NEGCASE(FRAGMENT, {
-         bi_texs_2d_f32_to(b, x, bi_neg(u), v, false, 0, 0);
-         bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
+      bi_texs_2d_f32_to(b, x, bi_neg(u), v, false, 0, 0);
+      bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
    });
 
    NEGCASE(FRAGMENT, {
-         bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
-         bi_texs_2d_f32_to(b, y, v, u, false, 1, 1);
+      bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
+      bi_texs_2d_f32_to(b, y, v, u, false, 1, 1);
    });
 }
diff --git a/src/panfrost/bifrost/test/test-lower-swizzle.cpp b/src/panfrost/bifrost/test/test-lower-swizzle.cpp
index af36dfc1de0..a6a35554974 100644
--- a/src/panfrost/bifrost/test/test-lower-swizzle.cpp
+++ b/src/panfrost/bifrost/test/test-lower-swizzle.cpp
@@ -21,31 +21,34 @@
  * SOFTWARE.
  */
 
-#include "compiler.h"
-#include "bi_test.h"
 #include "bi_builder.h"
+#include "bi_test.h"
+#include "compiler.h"
 
 #include <gtest/gtest.h>
 
-#define CASE(instr, expected) INSTRUCTION_CASE(instr, expected, bi_lower_swizzle)
+#define CASE(instr, expected)                                                  \
+   INSTRUCTION_CASE(instr, expected, bi_lower_swizzle)
 #define NEGCASE(instr) CASE(instr, instr)
 
 class LowerSwizzle : public testing::Test {
-protected:
-   LowerSwizzle() {
+ protected:
+   LowerSwizzle()
+   {
       mem_ctx = ralloc_context(NULL);
 
-      reg     = bi_register(0);
-      x       = bi_register(1);
-      y       = bi_register(2);
-      z       = bi_register(3);
-      w       = bi_register(4);
+      reg = bi_register(0);
+      x = bi_register(1);
+      y = bi_register(2);
+      z = bi_register(3);
+      w = bi_register(4);
 
-      x3210   = x;
+      x3210 = x;
       x3210.swizzle = BI_SWIZZLE_B3210;
    }
 
-   ~LowerSwizzle() {
+   ~LowerSwizzle()
+   {
       ralloc_free(mem_ctx);
    }
 
@@ -58,7 +61,8 @@ protected:
 TEST_F(LowerSwizzle, Csel16)
 {
    CASE(bi_csel_v2f16_to(b, reg, bi_half(x, 0), y, z, w, BI_CMPF_NE),
-        bi_csel_v2f16_to(b, reg, bi_swz_v2i16(b, bi_half(x, 0)), y, z, w, BI_CMPF_NE));
+        bi_csel_v2f16_to(b, reg, bi_swz_v2i16(b, bi_half(x, 0)), y, z, w,
+                         BI_CMPF_NE));
 }
 
 TEST_F(LowerSwizzle, Fma16)
@@ -79,23 +83,22 @@ TEST_F(LowerSwizzle, ClzHadd8)
 TEST_F(LowerSwizzle, FirstShift8)
 {
    enum bi_opcode ops[] = {
-      BI_OPCODE_LSHIFT_AND_V4I8,
-      BI_OPCODE_LSHIFT_OR_V4I8,
-      BI_OPCODE_LSHIFT_XOR_V4I8,
-      BI_OPCODE_RSHIFT_AND_V4I8,
-      BI_OPCODE_RSHIFT_OR_V4I8,
-      BI_OPCODE_RSHIFT_XOR_V4I8,
+      BI_OPCODE_LSHIFT_AND_V4I8, BI_OPCODE_LSHIFT_OR_V4I8,
+      BI_OPCODE_LSHIFT_XOR_V4I8, BI_OPCODE_RSHIFT_AND_V4I8,
+      BI_OPCODE_RSHIFT_OR_V4I8,  BI_OPCODE_RSHIFT_XOR_V4I8,
    };
 
    for (unsigned i = 0; i < ARRAY_SIZE(ops); ++i) {
-      CASE({
+      CASE(
+         {
             bi_instr *I = bi_lshift_and_v4i8_to(b, reg, x3210, y, z);
             I->op = ops[i];
-      },
-      {
-            bi_instr *I = bi_lshift_and_v4i8_to(b, reg, bi_swz_v4i8(b, x3210), y, z);
+         },
+         {
+            bi_instr *I =
+               bi_lshift_and_v4i8_to(b, reg, bi_swz_v4i8(b, x3210), y, z);
             I->op = ops[i];
-      });
+         });
    }
 }
 
diff --git a/src/panfrost/bifrost/test/test-message-preload.cpp b/src/panfrost/bifrost/test/test-message-preload.cpp
index f1f00413d00..d5e548d54f8 100644
--- a/src/panfrost/bifrost/test/test-message-preload.cpp
+++ b/src/panfrost/bifrost/test/test-message-preload.cpp
@@ -21,56 +21,58 @@
  * SOFTWARE.
  */
 
-#include "compiler.h"
-#include "bi_test.h"
 #include "bi_builder.h"
+#include "bi_test.h"
+#include "compiler.h"
 
 #include <gtest/gtest.h>
 
-#define CASE(instr, expected) do { \
-   bi_builder *A = bit_builder(mem_ctx); \
-   bi_builder *B = bit_builder(mem_ctx); \
-   A->shader->info.bifrost = rzalloc(mem_ctx, struct bifrost_shader_info); \
-   B->shader->info.bifrost = rzalloc(mem_ctx, struct bifrost_shader_info); \
-   { \
-      bi_builder *b = A; \
-      bi_index u = bi_temp(b->shader); \
-      UNUSED bi_index v = bi_temp(b->shader); \
-      UNUSED bi_index w = bi_temp(b->shader); \
-      instr; \
-   } \
-   { \
-      bi_builder *b = B; \
-      bi_index u = bi_temp(b->shader); \
-      UNUSED bi_index v = bi_temp(b->shader); \
-      UNUSED bi_index w = bi_temp(b->shader); \
-      expected; \
-   } \
-   bi_opt_message_preload(A->shader); \
-   if (!bit_shader_equal(A->shader, B->shader)) { \
-      ADD_FAILURE(); \
-      fprintf(stderr, "Optimization produce unexpected result"); \
-      fprintf(stderr, "  Actual:\n"); \
-      bi_print_shader(A->shader, stderr); \
-      fprintf(stderr, "Expected:\n"); \
-      bi_print_shader(B->shader, stderr); \
-      fprintf(stderr, "\n"); \
-   } \
-} while(0)
+#define CASE(instr, expected)                                                  \
+   do {                                                                        \
+      bi_builder *A = bit_builder(mem_ctx);                                    \
+      bi_builder *B = bit_builder(mem_ctx);                                    \
+      A->shader->info.bifrost = rzalloc(mem_ctx, struct bifrost_shader_info);  \
+      B->shader->info.bifrost = rzalloc(mem_ctx, struct bifrost_shader_info);  \
+      {                                                                        \
+         bi_builder *b = A;                                                    \
+         bi_index u = bi_temp(b->shader);                                      \
+         UNUSED bi_index v = bi_temp(b->shader);                               \
+         UNUSED bi_index w = bi_temp(b->shader);                               \
+         instr;                                                                \
+      }                                                                        \
+      {                                                                        \
+         bi_builder *b = B;                                                    \
+         bi_index u = bi_temp(b->shader);                                      \
+         UNUSED bi_index v = bi_temp(b->shader);                               \
+         UNUSED bi_index w = bi_temp(b->shader);                               \
+         expected;                                                             \
+      }                                                                        \
+      bi_opt_message_preload(A->shader);                                       \
+      if (!bit_shader_equal(A->shader, B->shader)) {                           \
+         ADD_FAILURE();                                                        \
+         fprintf(stderr, "Optimization produce unexpected result");            \
+         fprintf(stderr, "  Actual:\n");                                       \
+         bi_print_shader(A->shader, stderr);                                   \
+         fprintf(stderr, "Expected:\n");                                       \
+         bi_print_shader(B->shader, stderr);                                   \
+         fprintf(stderr, "\n");                                                \
+      }                                                                        \
+   } while (0)
 
 #define NEGCASE(instr) CASE(instr, instr)
 
 class MessagePreload : public testing::Test {
-protected:
-   MessagePreload() {
+ protected:
+   MessagePreload()
+   {
       mem_ctx = ralloc_context(NULL);
 
-      x       = bi_register(16);
-      y       = bi_register(32);
-
+      x = bi_register(16);
+      y = bi_register(32);
    }
 
-   ~MessagePreload() {
+   ~MessagePreload()
+   {
       ralloc_free(mem_ctx);
    }
 
@@ -84,100 +86,117 @@ protected:
 
       b->cursor = bi_before_block(bi_start_block(&b->shader->blocks));
       bi_foreach_src(I, i)
-         I->src[i] = bi_mov_i32(b, bi_register(idx*4 + i));
+         I->src[i] = bi_mov_i32(b, bi_register(idx * 4 + i));
 
       b->cursor = bi_after_instr(I);
    }
 };
 
-
 TEST_F(MessagePreload, PreloadLdVarSample)
 {
-   CASE({
+   CASE(
+      {
          bi_ld_var_imm_to(b, u, bi_register(61), BI_REGISTER_FORMAT_F32,
                           BI_SAMPLE_SAMPLE, BI_UPDATE_STORE, BI_VECSIZE_V4, 0);
-   }, {
-         preload_moves(b, u, 4, 0);
-   });
+      },
+      { preload_moves(b, u, 4, 0); });
 }
 
 TEST_F(MessagePreload, PreloadLdVarLdVar)
 {
-   CASE({
+   CASE(
+      {
          bi_ld_var_imm_to(b, u, bi_register(61), BI_REGISTER_FORMAT_F32,
                           BI_SAMPLE_SAMPLE, BI_UPDATE_STORE, BI_VECSIZE_V4, 2);
          bi_ld_var_imm_to(b, v, bi_register(61), BI_REGISTER_FORMAT_F32,
                           BI_SAMPLE_SAMPLE, BI_UPDATE_STORE, BI_VECSIZE_V4, 1);
-   }, {
+      },
+      {
          preload_moves(b, u, 4, 0);
          preload_moves(b, v, 4, 1);
-   });
+      });
 }
 
 TEST_F(MessagePreload, MaxTwoMessages)
 {
-   CASE({
+   CASE(
+      {
          bi_ld_var_imm_to(b, u, bi_register(61), BI_REGISTER_FORMAT_F32,
                           BI_SAMPLE_SAMPLE, BI_UPDATE_STORE, BI_VECSIZE_V4, 2);
          bi_ld_var_imm_to(b, v, bi_register(61), BI_REGISTER_FORMAT_F32,
                           BI_SAMPLE_SAMPLE, BI_UPDATE_STORE, BI_VECSIZE_V4, 1);
          bi_ld_var_imm_to(b, w, bi_register(61), BI_REGISTER_FORMAT_F32,
                           BI_SAMPLE_SAMPLE, BI_UPDATE_STORE, BI_VECSIZE_V4, 0);
-   },
-   {
+      },
+      {
          preload_moves(b, u, 4, 0);
          preload_moves(b, v, 4, 1);
          bi_ld_var_imm_to(b, w, bi_register(61), BI_REGISTER_FORMAT_F32,
                           BI_SAMPLE_SAMPLE, BI_UPDATE_STORE, BI_VECSIZE_V4, 0);
-   });
+      });
 
-   CASE({
-         bi_var_tex_f32_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0, 0);
-         bi_var_tex_f16_to(b, v, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 1, 2);
-         bi_var_tex_f16_to(b, w, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 3, 3);
-   }, {
+   CASE(
+      {
+         bi_var_tex_f32_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0,
+                           0);
+         bi_var_tex_f16_to(b, v, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 1,
+                           2);
+         bi_var_tex_f16_to(b, w, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 3,
+                           3);
+      },
+      {
          preload_moves(b, u, 4, 0);
          preload_moves(b, v, 2, 1);
-         bi_var_tex_f16_to(b, w, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 3, 3);
-   });
+         bi_var_tex_f16_to(b, w, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 3,
+                           3);
+      });
 }
 
 TEST_F(MessagePreload, PreloadVartexF16)
 {
-   CASE({
-         bi_var_tex_f16_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0, 0);
-   }, {
-         preload_moves(b, u, 2, 0);
-   });
+   CASE(
+      {
+         bi_var_tex_f16_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0,
+                           0);
+      },
+      { preload_moves(b, u, 2, 0); });
 }
 
 TEST_F(MessagePreload, PreloadVartexF32)
 {
-   CASE({
-         bi_var_tex_f32_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0, 0);
-   }, {
-         preload_moves(b, u, 4, 0);
-   });
+   CASE(
+      {
+         bi_var_tex_f32_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0,
+                           0);
+      },
+      { preload_moves(b, u, 4, 0); });
 }
 
 TEST_F(MessagePreload, PreloadVartexF32VartexF16)
 {
-   CASE({
-         bi_var_tex_f32_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0, 0);
-         bi_var_tex_f16_to(b, v, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 1, 2);
-   }, {
+   CASE(
+      {
+         bi_var_tex_f32_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0,
+                           0);
+         bi_var_tex_f16_to(b, v, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 1,
+                           2);
+      },
+      {
          preload_moves(b, u, 4, 0);
          preload_moves(b, v, 2, 1);
-   });
+      });
 }
 
 TEST_F(MessagePreload, PreloadVartexLodModes)
 {
-   CASE({
+   CASE(
+      {
          bi_var_tex_f32_to(b, u, true, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0, 0);
-         bi_var_tex_f32_to(b, v, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0, 0);
-   }, {
+         bi_var_tex_f32_to(b, v, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0,
+                           0);
+      },
+      {
          preload_moves(b, u, 4, 0);
          preload_moves(b, v, 4, 1);
-   });
+      });
 }
diff --git a/src/panfrost/bifrost/test/test-optimizer.cpp b/src/panfrost/bifrost/test/test-optimizer.cpp
index 73be5367159..c10b9367e38 100644
--- a/src/panfrost/bifrost/test/test-optimizer.cpp
+++ b/src/panfrost/bifrost/test/test-optimizer.cpp
@@ -21,9 +21,9 @@
  * SOFTWARE.
  */
 
-#include "compiler.h"
-#include "bi_test.h"
 #include "bi_builder.h"
+#include "bi_test.h"
+#include "compiler.h"
 
 #include <gtest/gtest.h>
 
@@ -38,24 +38,35 @@ bi_optimizer(bi_context *ctx)
 /* Define reg first so it has a consistent variable index, and pass it to an
  * instruction that cannot be dead code eliminated so the program is nontrivial.
  */
-#define CASE(instr, expected) INSTRUCTION_CASE(\
-      { UNUSED bi_index reg = bi_temp(b->shader); instr; bi_kaboom(b, reg); }, \
-      { UNUSED bi_index reg = bi_temp(b->shader); expected; bi_kaboom(b, reg); }, \
+#define CASE(instr, expected)                                                  \
+   INSTRUCTION_CASE(                                                           \
+      {                                                                        \
+         UNUSED bi_index reg = bi_temp(b->shader);                             \
+         instr;                                                                \
+         bi_kaboom(b, reg);                                                    \
+      },                                                                       \
+      {                                                                        \
+         UNUSED bi_index reg = bi_temp(b->shader);                             \
+         expected;                                                             \
+         bi_kaboom(b, reg);                                                    \
+      },                                                                       \
       bi_optimizer);
 
 #define NEGCASE(instr) CASE(instr, instr)
 
 class Optimizer : public testing::Test {
-protected:
-   Optimizer() {
+ protected:
+   Optimizer()
+   {
       mem_ctx = ralloc_context(NULL);
 
-      x       = bi_register(1);
-      y       = bi_register(2);
+      x = bi_register(1);
+      y = bi_register(2);
       negabsx = bi_neg(bi_abs(x));
    }
 
-   ~Optimizer() {
+   ~Optimizer()
+   {
       ralloc_free(mem_ctx);
    }
 
@@ -95,91 +106,124 @@ TEST_F(Optimizer, FusedFABSNEGForFP16)
 
 TEST_F(Optimizer, FuseFADD_F32WithEqualSourcesAbsAbsAndClamp)
 {
-   CASE({
-         bi_instr *I = bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_abs(x)), bi_abs(x));
+   CASE(
+      {
+         bi_instr *I =
+            bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_abs(x)), bi_abs(x));
          I->clamp = BI_CLAMP_CLAMP_0_1;
-   }, {
+      },
+      {
          bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x));
          I->clamp = BI_CLAMP_CLAMP_0_1;
-   });
+      });
 
-   CASE({
-         bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_fabsneg_f32(b, bi_abs(x)));
+   CASE(
+      {
+         bi_instr *I =
+            bi_fadd_f32_to(b, reg, bi_abs(x), bi_fabsneg_f32(b, bi_abs(x)));
          I->clamp = BI_CLAMP_CLAMP_0_1;
-   }, {
+      },
+      {
          bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x));
          I->clamp = BI_CLAMP_CLAMP_0_1;
-   });
+      });
 
-   CASE({
-         bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, bi_abs(x), bi_abs(x)));
+   CASE(
+      {
+         bi_instr *I =
+            bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, bi_abs(x), bi_abs(x)));
          I->clamp = BI_CLAMP_CLAMP_0_INF;
-   }, {
+      },
+      {
          bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x));
          I->clamp = BI_CLAMP_CLAMP_0_INF;
-   });
+      });
 }
 
 TEST_F(Optimizer, FuseFADD_V2F16WithDifferentSourcesAbsAbsAndClamp)
 {
-   CASE({
-         bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(y));
+   CASE(
+      {
+         bi_instr *I =
+            bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(y));
          I->clamp = BI_CLAMP_CLAMP_0_1;
-   }, {
+      },
+      {
          bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y));
          I->clamp = BI_CLAMP_CLAMP_0_1;
-   });
+      });
 
-   CASE({
-         bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(y)));
+   CASE(
+      {
+         bi_instr *I =
+            bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(y)));
          I->clamp = BI_CLAMP_CLAMP_0_1;
-   }, {
+      },
+      {
          bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y));
          I->clamp = BI_CLAMP_CLAMP_0_1;
-   });
+      });
 
-   CASE({
-         bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(y)));
+   CASE(
+      {
+         bi_instr *I =
+            bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(y)));
          I->clamp = BI_CLAMP_CLAMP_0_INF;
-   }, {
+      },
+      {
          bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y));
          I->clamp = BI_CLAMP_CLAMP_0_INF;
-   });
+      });
 }
 
 TEST_F(Optimizer, AvoidFADD_V2F16WithEqualSourcesAbsAbsAndClamp)
 {
    NEGCASE({
-         bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(x));
-         I->clamp = BI_CLAMP_CLAMP_0_1;
+      bi_instr *I =
+         bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(x));
+      I->clamp = BI_CLAMP_CLAMP_0_1;
    });
 
    NEGCASE({
-         bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(x)));
-         I->clamp = BI_CLAMP_CLAMP_0_1;
+      bi_instr *I =
+         bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(x)));
+      I->clamp = BI_CLAMP_CLAMP_0_1;
    });
 
    NEGCASE({
-      bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(x)));
+      bi_instr *I =
+         bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(x)));
       I->clamp = BI_CLAMP_CLAMP_0_INF;
    });
 }
 
 TEST_F(Optimizer, SwizzlesComposedForFP16)
 {
-   CASE(bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), y),
+   CASE(bi_fadd_v2f16_to(
+           b, reg, bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), y),
         bi_fadd_v2f16_to(b, reg, bi_swz_16(negabsx, true, false), y));
 
-   CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, negabsx), true, false), y),
+   CASE(bi_fadd_v2f16_to(
+           b, reg, bi_swz_16(bi_fabsneg_v2f16(b, negabsx), true, false), y),
         bi_fadd_v2f16_to(b, reg, bi_swz_16(negabsx, true, false), y));
 
-   CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), true, false), y),
+   CASE(bi_fadd_v2f16_to(
+           b, reg,
+           bi_swz_16(bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), true,
+                     false),
+           y),
         bi_fadd_v2f16_to(b, reg, negabsx, y));
 
-   CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, false)), true, false), y),
+   CASE(bi_fadd_v2f16_to(
+           b, reg,
+           bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, false)), true, false),
+           y),
         bi_fadd_v2f16_to(b, reg, bi_half(negabsx, false), y));
 
-   CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, true)), true, false), y),
+   CASE(bi_fadd_v2f16_to(
+           b, reg,
+           bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, true)), true, false),
+           y),
         bi_fadd_v2f16_to(b, reg, bi_half(negabsx, true), y));
 }
 
@@ -192,7 +236,8 @@ TEST_F(Optimizer, PreserveWidens)
    CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(negabsx, true)), y),
         bi_fadd_f32_to(b, reg, bi_half(negabsx, true), y));
 
-   CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(x, true)), bi_fabsneg_f32(b, bi_half(x, false))),
+   CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(x, true)),
+                       bi_fabsneg_f32(b, bi_half(x, false))),
         bi_fadd_f32_to(b, reg, bi_half(x, true), bi_half(x, false)));
 }
 
@@ -219,85 +264,100 @@ TEST_F(Optimizer, AvoidZeroAndFABSNEGFootguns)
 
 TEST_F(Optimizer, ClampsPropagated)
 {
-   CASE({
-      bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, x, y));
-      I->clamp = BI_CLAMP_CLAMP_0_INF;
-   }, {
-      bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
-      I->clamp = BI_CLAMP_CLAMP_0_INF;
-   });
+   CASE(
+      {
+         bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, x, y));
+         I->clamp = BI_CLAMP_CLAMP_0_INF;
+      },
+      {
+         bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
+         I->clamp = BI_CLAMP_CLAMP_0_INF;
+      });
 
-   CASE({
-      bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, x, y));
-      I->clamp = BI_CLAMP_CLAMP_0_1;
-   }, {
-      bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
-      I->clamp = BI_CLAMP_CLAMP_0_1;
-   });
+   CASE(
+      {
+         bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, x, y));
+         I->clamp = BI_CLAMP_CLAMP_0_1;
+      },
+      {
+         bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
+         I->clamp = BI_CLAMP_CLAMP_0_1;
+      });
 }
 
-
 TEST_F(Optimizer, ClampsComposed)
 {
-   CASE({
-      bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
-      bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
-      I->clamp = BI_CLAMP_CLAMP_M1_1;
-      J->clamp = BI_CLAMP_CLAMP_0_INF;
-   }, {
-      bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
-      I->clamp = BI_CLAMP_CLAMP_0_1;
-   });
+   CASE(
+      {
+         bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
+         bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
+         I->clamp = BI_CLAMP_CLAMP_M1_1;
+         J->clamp = BI_CLAMP_CLAMP_0_INF;
+      },
+      {
+         bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
+         I->clamp = BI_CLAMP_CLAMP_0_1;
+      });
 
-   CASE({
-      bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
-      bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
-      I->clamp = BI_CLAMP_CLAMP_0_1;
-      J->clamp = BI_CLAMP_CLAMP_0_INF;
-   }, {
-      bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
-      I->clamp = BI_CLAMP_CLAMP_0_1;
-   });
+   CASE(
+      {
+         bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
+         bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
+         I->clamp = BI_CLAMP_CLAMP_0_1;
+         J->clamp = BI_CLAMP_CLAMP_0_INF;
+      },
+      {
+         bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
+         I->clamp = BI_CLAMP_CLAMP_0_1;
+      });
 
-   CASE({
-      bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
-      bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
-      I->clamp = BI_CLAMP_CLAMP_0_INF;
-      J->clamp = BI_CLAMP_CLAMP_0_INF;
-   }, {
-      bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
-      I->clamp = BI_CLAMP_CLAMP_0_INF;
-   });
+   CASE(
+      {
+         bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
+         bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
+         I->clamp = BI_CLAMP_CLAMP_0_INF;
+         J->clamp = BI_CLAMP_CLAMP_0_INF;
+      },
+      {
+         bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
+         I->clamp = BI_CLAMP_CLAMP_0_INF;
+      });
 
-   CASE({
-      bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
-      bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
-      I->clamp = BI_CLAMP_CLAMP_M1_1;
-      J->clamp = BI_CLAMP_CLAMP_0_INF;
-   }, {
-      bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
-      I->clamp = BI_CLAMP_CLAMP_0_1;
-   });
+   CASE(
+      {
+         bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
+         bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
+         I->clamp = BI_CLAMP_CLAMP_M1_1;
+         J->clamp = BI_CLAMP_CLAMP_0_INF;
+      },
+      {
+         bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
+         I->clamp = BI_CLAMP_CLAMP_0_1;
+      });
 
-   CASE({
-      bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
-      bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
-      I->clamp = BI_CLAMP_CLAMP_0_1;
-      J->clamp = BI_CLAMP_CLAMP_0_INF;
-   }, {
-      bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
-      I->clamp = BI_CLAMP_CLAMP_0_1;
-   });
+   CASE(
+      {
+         bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
+         bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
+         I->clamp = BI_CLAMP_CLAMP_0_1;
+         J->clamp = BI_CLAMP_CLAMP_0_INF;
+      },
+      {
+         bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
+         I->clamp = BI_CLAMP_CLAMP_0_1;
+      });
 
-   CASE({
-      bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
-      bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
-      I->clamp = BI_CLAMP_CLAMP_0_INF;
-      J->clamp = BI_CLAMP_CLAMP_0_INF;
-   }, {
-      bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
-      I->clamp = BI_CLAMP_CLAMP_0_INF;
-   });
+   CASE(
+      {
+         bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
+         bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
+         I->clamp = BI_CLAMP_CLAMP_0_INF;
+         J->clamp = BI_CLAMP_CLAMP_0_INF;
+      },
+      {
+         bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
+         I->clamp = BI_CLAMP_CLAMP_0_INF;
+      });
 }
 
 TEST_F(Optimizer, DoNotMixSizesWhenClamping)
@@ -341,21 +401,29 @@ TEST_F(Optimizer, FuseComparisonsWithDISCARD)
         bi_discard_f32(b, x, y, BI_CMPF_EQ));
 
    for (unsigned h = 0; h < 2; ++h) {
-      CASE(bi_discard_b32(b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_F1), h)),
+      CASE(bi_discard_b32(
+              b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_F1),
+                         h)),
            bi_discard_f32(b, bi_half(x, h), bi_half(y, h), BI_CMPF_LE));
 
-      CASE(bi_discard_b32(b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_NE, BI_RESULT_TYPE_I1), h)),
+      CASE(bi_discard_b32(
+              b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_NE, BI_RESULT_TYPE_I1),
+                         h)),
            bi_discard_f32(b, bi_half(x, h), bi_half(y, h), BI_CMPF_NE));
 
-      CASE(bi_discard_b32(b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_EQ, BI_RESULT_TYPE_M1), h)),
+      CASE(bi_discard_b32(
+              b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_EQ, BI_RESULT_TYPE_M1),
+                         h)),
            bi_discard_f32(b, bi_half(x, h), bi_half(y, h), BI_CMPF_EQ));
    }
 }
 
 TEST_F(Optimizer, DoNotFuseSpecialComparisons)
 {
-   NEGCASE(bi_discard_b32(b, bi_fcmp_f32(b, x, y, BI_CMPF_GTLT, BI_RESULT_TYPE_F1)));
-   NEGCASE(bi_discard_b32(b, bi_fcmp_f32(b, x, y, BI_CMPF_TOTAL, BI_RESULT_TYPE_F1)));
+   NEGCASE(
+      bi_discard_b32(b, bi_fcmp_f32(b, x, y, BI_CMPF_GTLT, BI_RESULT_TYPE_F1)));
+   NEGCASE(bi_discard_b32(
+      b, bi_fcmp_f32(b, x, y, BI_CMPF_TOTAL, BI_RESULT_TYPE_F1)));
 }
 
 TEST_F(Optimizer, FuseResultType)
@@ -365,25 +433,33 @@ TEST_F(Optimizer, FuseResultType)
                       BI_MUX_INT_ZERO),
         bi_fcmp_f32_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_F1));
 
-   CASE(bi_mux_i32_to(b, reg, bi_imm_f32(0.0), bi_imm_f32(1.0),
-                      bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
-                      BI_MUX_INT_ZERO),
-        bi_fcmp_f32_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_F1));
+   CASE(bi_mux_i32_to(
+           b, reg, bi_imm_f32(0.0), bi_imm_f32(1.0),
+           bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
+           BI_MUX_INT_ZERO),
+        bi_fcmp_f32_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE,
+                       BI_RESULT_TYPE_F1));
 
-   CASE(bi_mux_i32_to(b, reg, bi_imm_u32(0), bi_imm_u32(1),
-                      bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
-                      BI_MUX_INT_ZERO),
-        bi_fcmp_f32_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_I1));
+   CASE(bi_mux_i32_to(
+           b, reg, bi_imm_u32(0), bi_imm_u32(1),
+           bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
+           BI_MUX_INT_ZERO),
+        bi_fcmp_f32_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE,
+                       BI_RESULT_TYPE_I1));
 
    CASE(bi_mux_v2i16_to(b, reg, bi_imm_f16(0.0), bi_imm_f16(1.0),
-                      bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
-                      BI_MUX_INT_ZERO),
-        bi_fcmp_v2f16_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_F1));
+                        bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE,
+                                      BI_RESULT_TYPE_M1),
+                        BI_MUX_INT_ZERO),
+        bi_fcmp_v2f16_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE,
+                         BI_RESULT_TYPE_F1));
 
    CASE(bi_mux_v2i16_to(b, reg, bi_imm_u16(0), bi_imm_u16(1),
-                      bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
-                      BI_MUX_INT_ZERO),
-        bi_fcmp_v2f16_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_I1));
+                        bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE,
+                                      BI_RESULT_TYPE_M1),
+                        BI_MUX_INT_ZERO),
+        bi_fcmp_v2f16_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE,
+                         BI_RESULT_TYPE_I1));
 
    CASE(bi_mux_i32_to(b, reg, bi_imm_u32(0), bi_imm_u32(1),
                       bi_icmp_u32(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
@@ -391,13 +467,13 @@ TEST_F(Optimizer, FuseResultType)
         bi_icmp_u32_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
 
    CASE(bi_mux_v2i16_to(b, reg, bi_imm_u16(0), bi_imm_u16(1),
-                      bi_icmp_v2u16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
-                      BI_MUX_INT_ZERO),
+                        bi_icmp_v2u16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
+                        BI_MUX_INT_ZERO),
         bi_icmp_v2u16_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
 
    CASE(bi_mux_v4i8_to(b, reg, bi_imm_u8(0), bi_imm_u8(1),
-                      bi_icmp_v4u8(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
-                      BI_MUX_INT_ZERO),
+                       bi_icmp_v4u8(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
+                       BI_MUX_INT_ZERO),
         bi_icmp_v4u8_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
 
    CASE(bi_mux_i32_to(b, reg, bi_imm_u32(0), bi_imm_u32(1),
@@ -406,31 +482,36 @@ TEST_F(Optimizer, FuseResultType)
         bi_icmp_s32_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
 
    CASE(bi_mux_v2i16_to(b, reg, bi_imm_u16(0), bi_imm_u16(1),
-                      bi_icmp_v2s16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
-                      BI_MUX_INT_ZERO),
+                        bi_icmp_v2s16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
+                        BI_MUX_INT_ZERO),
         bi_icmp_v2s16_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
 
    CASE(bi_mux_v4i8_to(b, reg, bi_imm_u8(0), bi_imm_u8(1),
-                      bi_icmp_v4s8(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
-                      BI_MUX_INT_ZERO),
+                       bi_icmp_v4s8(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
+                       BI_MUX_INT_ZERO),
         bi_icmp_v4s8_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
 }
 
 TEST_F(Optimizer, DoNotFuseMixedSizeResultType)
 {
-   NEGCASE(bi_mux_i32_to(b, reg, bi_imm_f32(0.0), bi_imm_f32(1.0),
-                      bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
-                      BI_MUX_INT_ZERO));
+   NEGCASE(bi_mux_i32_to(
+      b, reg, bi_imm_f32(0.0), bi_imm_f32(1.0),
+      bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
+      BI_MUX_INT_ZERO));
 
-   NEGCASE(bi_mux_v2i16_to(b, reg, bi_imm_f16(0.0), bi_imm_f16(1.0),
-                      bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
-                      BI_MUX_INT_ZERO));
+   NEGCASE(bi_mux_v2i16_to(
+      b, reg, bi_imm_f16(0.0), bi_imm_f16(1.0),
+      bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
+      BI_MUX_INT_ZERO));
 }
 
 TEST_F(Optimizer, VarTexCoord32)
 {
-   CASE({
-         bi_index ld = bi_ld_var_imm(b, bi_null(), BI_REGISTER_FORMAT_F32, BI_SAMPLE_CENTER, BI_UPDATE_STORE, BI_VECSIZE_V2, 0);
+   CASE(
+      {
+         bi_index ld =
+            bi_ld_var_imm(b, bi_null(), BI_REGISTER_FORMAT_F32,
+                          BI_SAMPLE_CENTER, BI_UPDATE_STORE, BI_VECSIZE_V2, 0);
 
          bi_index x = bi_temp(b->shader);
          bi_index y = bi_temp(b->shader);
@@ -439,9 +520,11 @@ TEST_F(Optimizer, VarTexCoord32)
          split->dest[1] = y;
 
          bi_texs_2d_f32_to(b, reg, x, y, false, 0, 0);
-   }, {
-         bi_var_tex_f32_to(b, reg, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0, 0);
-   });
+      },
+      {
+         bi_var_tex_f32_to(b, reg, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0,
+                           0);
+      });
 }
 
 TEST_F(Optimizer, Int8ToFloat32)
@@ -458,7 +541,6 @@ TEST_F(Optimizer, Int8ToFloat32)
    }
 }
 
-
 TEST_F(Optimizer, Int16ToFloat32)
 {
    for (unsigned i = 0; i < 2; ++i) {
diff --git a/src/panfrost/bifrost/test/test-pack-formats.cpp b/src/panfrost/bifrost/test/test-pack-formats.cpp
index 91fd474655c..f75add21197 100644
--- a/src/panfrost/bifrost/test/test-pack-formats.cpp
+++ b/src/panfrost/bifrost/test/test-pack-formats.cpp
@@ -21,23 +21,27 @@
  * SOFTWARE.
  */
 
-#include "compiler.h"
 #include "bi_test.h"
+#include "compiler.h"
 
 #include <gtest/gtest.h>
 #include "mesa-gtest-extras.h"
 
-class PackFormats : public testing::Test
-{
-protected:
-   PackFormats() {
+class PackFormats : public testing::Test {
+ protected:
+   PackFormats()
+   {
       util_dynarray_init(&result, NULL);
    }
-   ~PackFormats() {
+   ~PackFormats()
+   {
       util_dynarray_fini(&result);
    }
 
-   const uint64_t *result_as_u64_array() { return reinterpret_cast<uint64_t *>(result.data); }
+   const uint64_t *result_as_u64_array()
+   {
+      return reinterpret_cast<uint64_t *>(result.data);
+   }
 
    struct util_dynarray result;
 };
@@ -46,7 +50,7 @@ TEST_F(PackFormats, 1)
 {
    /* Test case from the blob */
    struct bi_packed_tuple tuples[] = {
-      { 0x2380cb1c02200000, 0x10e0 },
+      {0x2380cb1c02200000, 0x10e0},
    };
 
    uint64_t header = 0x021000011800;
@@ -65,8 +69,8 @@ TEST_F(PackFormats, 1)
 TEST_F(PackFormats, 2)
 {
    struct bi_packed_tuple tuples[] = {
-      { 0x9380cb6044000044, 0xf65 },
-      { 0xaf8721a05c000081, 0x1831 },
+      {0x9380cb6044000044, 0xf65},
+      {0xaf8721a05c000081, 0x1831},
    };
 
    bi_pack_format(&result, 0, tuples, 2, 0x52800011800, 0, 0, false);
@@ -86,9 +90,9 @@ TEST_F(PackFormats, 2)
 TEST_F(PackFormats, 3)
 {
    struct bi_packed_tuple tuples[] = {
-      { 0x93805b8040000000, 0xf65 },
-      { 0x93886db05c000000, 0xf65 },
-      { 0xb380cb180c000080, 0x18b1 },
+      {0x93805b8040000000, 0xf65},
+      {0x93886db05c000000, 0xf65},
+      {0xb380cb180c000080, 0x18b1},
    };
 
    bi_pack_format(&result, 0, tuples, 3, 0x3100000000, 0, 0, true);
@@ -96,12 +100,8 @@ TEST_F(PackFormats, 3)
    bi_pack_format(&result, 4, tuples, 3, 0x3100000000, 0, 0, true);
 
    const uint64_t expected[] = {
-      0x805b804000000029,
-      0x0188000000076593,
-      0x886db05c00000021,
-      0x58c0600004076593,
-      0x0000000000000044,
-      0x60002c6ce0300000,
+      0x805b804000000029, 0x0188000000076593, 0x886db05c00000021,
+      0x58c0600004076593, 0x0000000000000044, 0x60002c6ce0300000,
    };
 
    ASSERT_EQ(result.size, 48);
@@ -111,10 +111,10 @@ TEST_F(PackFormats, 3)
 TEST_F(PackFormats, 4)
 {
    struct bi_packed_tuple tuples[] = {
-      { 0xad8c87004000005f, 0x2f18 },
-      { 0xad8c87385c00004f, 0x2f18 },
-      { 0xad8c87385c00006e, 0x2f18 },
-      { 0xb380cb182c000080, 0x18b1 },
+      {0xad8c87004000005f, 0x2f18},
+      {0xad8c87385c00004f, 0x2f18},
+      {0xad8c87385c00006e, 0x2f18},
+      {0xb380cb182c000080, 0x18b1},
    };
 
    uint64_t EC0 = (0x10000001ff000000) >> 4;
@@ -124,12 +124,8 @@ TEST_F(PackFormats, 4)
    bi_pack_format(&result, 6, tuples, 4, 0x3100000000, EC0, 0, false);
 
    const uint64_t expected[] = {
-      0x8c87004000005f2d,
-      0x01880000000718ad,
-      0x8c87385c00004f25,
-      0x39c2e000037718ad,
-      0x80cb182c00008005,
-      0xac01c62b6320b1b3,
+      0x8c87004000005f2d, 0x01880000000718ad, 0x8c87385c00004f25,
+      0x39c2e000037718ad, 0x80cb182c00008005, 0xac01c62b6320b1b3,
    };
 
    ASSERT_EQ(result.size, 48);
@@ -139,11 +135,9 @@ TEST_F(PackFormats, 4)
 TEST_F(PackFormats, 5)
 {
    struct bi_packed_tuple tuples[] = {
-      { 0x9380688040000000, 0xf65 },
-      { 0xd4057300c000040, 0xf26 },
-      { 0x1f80cb1858000000, 0x19ab },
-      { 0x937401f85c000000, 0xf65 },
-      { 0xb380cb180c000080, 0x18a1 },
+      {0x9380688040000000, 0xf65},  {0xd4057300c000040, 0xf26},
+      {0x1f80cb1858000000, 0x19ab}, {0x937401f85c000000, 0xf65},
+      {0xb380cb180c000080, 0x18a1},
    };
 
    uint64_t EC0 = (0x183f800000) >> 4;
@@ -154,14 +148,9 @@ TEST_F(PackFormats, 5)
    bi_pack_format(&result, 8, tuples, 5, 0x3100000000, EC0, 0, true);
 
    const uint64_t expected[] = {
-      0x8068804000000029,
-      0x0188000000076593,
-      0x4057300c00004021,
-      0x58c2c0000007260d,
-      0x7401f85c0000008b,
-      0x00006ac7e0376593,
-      0x80cb180c00008053,
-      0x000000183f80a1b3,
+      0x8068804000000029, 0x0188000000076593, 0x4057300c00004021,
+      0x58c2c0000007260d, 0x7401f85c0000008b, 0x00006ac7e0376593,
+      0x80cb180c00008053, 0x000000183f80a1b3,
    };
 
    ASSERT_EQ(result.size, 64);
@@ -171,12 +160,9 @@ TEST_F(PackFormats, 5)
 TEST_F(PackFormats, 6)
 {
    struct bi_packed_tuple tuples[] = {
-      { 0xad8c870068000048, 0x2f18 },
-      { 0xad8c87385c000050, 0x2f18 },
-      { 0xad8c87385c00006a, 0x2f18 },
-      { 0xad8c87385c000074, 0x2f18 },
-      { 0xad8c87385c000020, 0x2f18 },
-      { 0xad8c87385c000030, 0x2f18 },
+      {0xad8c870068000048, 0x2f18}, {0xad8c87385c000050, 0x2f18},
+      {0xad8c87385c00006a, 0x2f18}, {0xad8c87385c000074, 0x2f18},
+      {0xad8c87385c000020, 0x2f18}, {0xad8c87385c000030, 0x2f18},
    };
 
    uint64_t EC0 = (0x345678912345670) >> 4;
@@ -188,15 +174,9 @@ TEST_F(PackFormats, 6)
    bi_pack_format(&result, 10, tuples, 6, 0x60000011800, EC0, 0, false);
 
    const uint64_t expected[] = {
-      0x8c8700680000482d,
-      0x30000008c00718ad,
-      0x8c87385c00005025,
-      0x39c2e000035718ad,
-      0x8c87385c00007401,
-      0xb401c62b632718ad,
-      0x8c87385c00002065,
-      0x39c2e000018718ad,
-      0x3456789123456706,
+      0x8c8700680000482d, 0x30000008c00718ad, 0x8c87385c00005025,
+      0x39c2e000035718ad, 0x8c87385c00007401, 0xb401c62b632718ad,
+      0x8c87385c00002065, 0x39c2e000018718ad, 0x3456789123456706,
       0xa001c62b63200000,
    };
 
@@ -207,13 +187,10 @@ TEST_F(PackFormats, 6)
 TEST_F(PackFormats, 7)
 {
    struct bi_packed_tuple tuples[] = {
-      { 0x9020074040000083, 0xf65 },
-      { 0x90000d4058100080, 0xf65 },
-      { 0x90000a3058700082, 0xf65 },
-      { 0x9020074008114581, 0xf65 },
-      { 0x90000d0058000080, 0xf65 },
-      { 0x9000083058700082, 0xf65 },
-      { 0x2380cb199ac38400, 0x327a },
+      {0x9020074040000083, 0xf65},  {0x90000d4058100080, 0xf65},
+      {0x90000a3058700082, 0xf65},  {0x9020074008114581, 0xf65},
+      {0x90000d0058000080, 0xf65},  {0x9000083058700082, 0xf65},
+      {0x2380cb199ac38400, 0x327a},
    };
 
    bi_pack_format(&result, 0, tuples, 7, 0x3000100000, 0, 0, true);
@@ -223,15 +200,9 @@ TEST_F(PackFormats, 7)
    bi_pack_format(&result, 11, tuples, 7, 0x3000100000, 0, 0, true);
 
    const uint64_t expected[] = {
-      0x2007404000008329,
-      0x0180008000076590,
-      0x000d405810008021,
-      0x5182c38004176590,
-      0x2007400811458101,
-      0x2401d96400076590,
-      0x000d005800008061,
-      0x4182c38004176590,
-      0x80cb199ac3840047,
+      0x2007404000008329, 0x0180008000076590, 0x000d405810008021,
+      0x5182c38004176590, 0x2007400811458101, 0x2401d96400076590,
+      0x000d005800008061, 0x4182c38004176590, 0x80cb199ac3840047,
       0x3801d96400027a23,
    };
 
@@ -242,14 +213,10 @@ TEST_F(PackFormats, 7)
 TEST_F(PackFormats, 8)
 {
    struct bi_packed_tuple tuples[] = {
-      { 0x442087037a2f8643, 0x3021 },
-      { 0x84008d0586100043, 0x200 },
-      { 0x7c008d0028014543, 0x0 },
-      { 0x1c00070058200081, 0x1980 },
-      { 0x1600dd878320400, 0x200 },
-      { 0x49709c1b08308900, 0x200 },
-      { 0x6c2007807881ca00, 0x40 },
-      { 0x8d70fc0d94900083, 0x800 },
+      {0x442087037a2f8643, 0x3021}, {0x84008d0586100043, 0x200},
+      {0x7c008d0028014543, 0x0},    {0x1c00070058200081, 0x1980},
+      {0x1600dd878320400, 0x200},   {0x49709c1b08308900, 0x200},
+      {0x6c2007807881ca00, 0x40},   {0x8d70fc0d94900083, 0x800},
    };
 
    uint64_t EC0 = (0x32e635d0) >> 4;
@@ -262,18 +229,10 @@ TEST_F(PackFormats, 8)
    bi_pack_format(&result, 13, tuples, 8, 0x61001311800, EC0, 0, true);
 
    const uint64_t expected[] = {
-      0x2087037a2f86432e,
-      0x30800988c0002144,
-      0x008d058610004320,
-      0x6801400a2a1a0084,
-      0x0007005820008101,
-      0x0c00001f0021801c,
-      0x600dd87832040060,
-      0xe0d8418448020001,
-      0x2007807881ca00c0,
-      0xc6ba80125c20406c,
-      0x70fc0d9490008359,
-      0x0000000032e0008d,
+      0x2087037a2f86432e, 0x30800988c0002144, 0x008d058610004320,
+      0x6801400a2a1a0084, 0x0007005820008101, 0x0c00001f0021801c,
+      0x600dd87832040060, 0xe0d8418448020001, 0x2007807881ca00c0,
+      0xc6ba80125c20406c, 0x70fc0d9490008359, 0x0000000032e0008d,
    };
 
    ASSERT_EQ(result.size, 96);
diff --git a/src/panfrost/bifrost/test/test-packing.cpp b/src/panfrost/bifrost/test/test-packing.cpp
index e876368b997..27cbbab26b4 100644
--- a/src/panfrost/bifrost/test/test-packing.cpp
+++ b/src/panfrost/bifrost/test/test-packing.cpp
@@ -39,14 +39,9 @@ TEST(Packing, PackLiteral)
 TEST(Packing, PackUpper)
 {
    struct bi_packed_tuple tuples[] = {
-      { 0, 0x3 << (75 - 64) },
-      { 0, 0x1 << (75 - 64) },
-      { 0, 0x7 << (75 - 64) },
-      { 0, 0x0 << (75 - 64) },
-      { 0, 0x2 << (75 - 64) },
-      { 0, 0x6 << (75 - 64) },
-      { 0, 0x5 << (75 - 64) },
-      { 0, 0x4 << (75 - 64) },
+      {0, 0x3 << (75 - 64)}, {0, 0x1 << (75 - 64)}, {0, 0x7 << (75 - 64)},
+      {0, 0x0 << (75 - 64)}, {0, 0x2 << (75 - 64)}, {0, 0x6 << (75 - 64)},
+      {0, 0x5 << (75 - 64)}, {0, 0x4 << (75 - 64)},
    };
 
    EXPECT_EQ(bi_pack_upper(U(0), tuples, 8), 3);
@@ -62,9 +57,9 @@ TEST(Packing, PackUpper)
 TEST(Packing, PackTupleBits)
 {
    struct bi_packed_tuple tuples[] = {
-      { 0x1234567801234567, 0x3A },
-      { 0x9876543299999999, 0x1B },
-      { 0xABCDEF0101234567, 0x7C },
+      {0x1234567801234567, 0x3A},
+      {0x9876543299999999, 0x1B},
+      {0xABCDEF0101234567, 0x7C},
    };
 
    EXPECT_EQ(bi_pack_tuple_bits(T(0), tuples, 8, 0, 30), 0x01234567);
@@ -75,19 +70,14 @@ TEST(Packing, PackTupleBits)
 TEST(Packing, PackSync)
 {
    struct bi_packed_tuple tuples[] = {
-      { 0, 0x3 << (75 - 64) },
-      { 0, 0x5 << (75 - 64) },
-      { 0, 0x7 << (75 - 64) },
-      { 0, 0x0 << (75 - 64) },
-      { 0, 0x2 << (75 - 64) },
-      { 0, 0x6 << (75 - 64) },
-      { 0, 0x5 << (75 - 64) },
-      { 0, 0x4 << (75 - 64) },
+      {0, 0x3 << (75 - 64)}, {0, 0x5 << (75 - 64)}, {0, 0x7 << (75 - 64)},
+      {0, 0x0 << (75 - 64)}, {0, 0x2 << (75 - 64)}, {0, 0x6 << (75 - 64)},
+      {0, 0x5 << (75 - 64)}, {0, 0x4 << (75 - 64)},
    };
 
    EXPECT_EQ(bi_pack_sync(L(3), L(1), L(7), tuples, 8, false), 0xCF);
    EXPECT_EQ(bi_pack_sync(L(3), L(1), U(7), tuples, 8, false), 0xCC);
    EXPECT_EQ(bi_pack_sync(L(3), U(1), U(7), tuples, 8, false), 0xEC);
-   EXPECT_EQ(bi_pack_sync(Z,    U(1), U(7), tuples, 8, false), 0x2C);
-   EXPECT_EQ(bi_pack_sync(Z,    U(1), U(7), tuples, 8, true) , 0x6C);
+   EXPECT_EQ(bi_pack_sync(Z, U(1), U(7), tuples, 8, false), 0x2C);
+   EXPECT_EQ(bi_pack_sync(Z, U(1), U(7), tuples, 8, true), 0x6C);
 }
diff --git a/src/panfrost/bifrost/test/test-scheduler-predicates.cpp b/src/panfrost/bifrost/test/test-scheduler-predicates.cpp
index 7b7e138ebff..bd7c0fd038c 100644
--- a/src/panfrost/bifrost/test/test-scheduler-predicates.cpp
+++ b/src/panfrost/bifrost/test/test-scheduler-predicates.cpp
@@ -21,23 +21,28 @@
  * SOFTWARE.
  */
 
-#include "compiler.h"
-#include "bi_test.h"
 #include "bi_builder.h"
+#include "bi_test.h"
+#include "compiler.h"
 
 #include <gtest/gtest.h>
 
 class SchedulerPredicates : public testing::Test {
-protected:
-   SchedulerPredicates() {
+ protected:
+   SchedulerPredicates()
+   {
       mem_ctx = ralloc_context(NULL);
       b = bit_builder(mem_ctx);
    }
-   ~SchedulerPredicates() {
+   ~SchedulerPredicates()
+   {
       ralloc_free(mem_ctx);
    }
 
-   bi_index TMP() { return bi_temp(b->shader); }
+   bi_index TMP()
+   {
+      return bi_temp(b->shader);
+   }
 
    void *mem_ctx;
    bi_builder *b;
diff --git a/src/panfrost/bifrost/valhall/disassemble.h b/src/panfrost/bifrost/valhall/disassemble.h
index 1840268ba98..f23a416a0b3 100644
--- a/src/panfrost/bifrost/valhall/disassemble.h
+++ b/src/panfrost/bifrost/valhall/disassemble.h
@@ -1,21 +1,21 @@
 #ifndef __DISASM_H
 #define __DISASM_H
 
-#include <stdio.h>
-#include <stdint.h>
-#include <inttypes.h>
 #include <assert.h>
+#include <inttypes.h>
 #include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
-#define BIT(b) (1ull << (b))
-#define MASK(count) ((1ull << (count)) - 1)
+#define BIT(b)         (1ull << (b))
+#define MASK(count)    ((1ull << (count)) - 1)
 #define SEXT(b, count) ((b ^ BIT(count - 1)) - BIT(count - 1))
-#define UNUSED __attribute__((unused))
+#define UNUSED         __attribute__((unused))
 
 #define VA_SRC_UNIFORM_TYPE 0x2
-#define VA_SRC_IMM_TYPE 0x3
+#define VA_SRC_IMM_TYPE     0x3
 
 static inline void
 va_print_dest(FILE *fp, uint8_t dest, bool can_mask)
@@ -51,7 +51,7 @@ disassemble_valhall(FILE *fp, const uint64_t *code, unsigned size, bool verbose)
       if (verbose) {
          /* Print byte pattern */
          for (unsigned j = 0; j < 8; ++j)
-            fprintf(fp, "%02x ", (uint8_t) (instr >> (j * 8)));
+            fprintf(fp, "%02x ", (uint8_t)(instr >> (j * 8)));
 
          fprintf(fp, "   ");
       } else {
diff --git a/src/panfrost/bifrost/valhall/test/test-add-imm.cpp b/src/panfrost/bifrost/valhall/test/test-add-imm.cpp
index f5e121df1b9..f9e4adea2c3 100644
--- a/src/panfrost/bifrost/valhall/test/test-add-imm.cpp
+++ b/src/panfrost/bifrost/valhall/test/test-add-imm.cpp
@@ -21,10 +21,10 @@
  * SOFTWARE.
  */
 
-#include "va_compiler.h"
-#include "bi_test.h"
-#include "bi_builder.h"
 #include "util/u_cpu_detect.h"
+#include "bi_builder.h"
+#include "bi_test.h"
+#include "va_compiler.h"
 
 #include <gtest/gtest.h>
 
@@ -37,102 +37,137 @@ add_imm(bi_context *ctx)
 }
 
 #define CASE(instr, expected) INSTRUCTION_CASE(instr, expected, add_imm)
-#define NEGCASE(instr) CASE(instr, instr)
+#define NEGCASE(instr)        CASE(instr, instr)
 
 class AddImm : public testing::Test {
-protected:
-   AddImm() {
+ protected:
+   AddImm()
+   {
       mem_ctx = ralloc_context(NULL);
    }
 
-   ~AddImm() {
+   ~AddImm()
+   {
       ralloc_free(mem_ctx);
    }
 
    void *mem_ctx;
 };
 
-
-TEST_F(AddImm, Basic) {
+TEST_F(AddImm, Basic)
+{
    CASE(bi_mov_i32_to(b, bi_register(63), bi_imm_u32(0xABAD1DEA)),
         bi_iadd_imm_i32_to(b, bi_register(63), bi_zero(), 0xABAD1DEA));
 
    CASE(bi_fadd_f32_to(b, bi_register(1), bi_register(2), bi_imm_f32(42.0)),
         bi_fadd_imm_f32_to(b, bi_register(1), bi_register(2), fui(42.0)));
 
-   CASE(bi_fadd_f32_to(b, bi_register(1), bi_discard(bi_register(2)), bi_imm_f32(42.0)),
-        bi_fadd_imm_f32_to(b, bi_register(1), bi_discard(bi_register(2)), fui(42.0)));
+   CASE(bi_fadd_f32_to(b, bi_register(1), bi_discard(bi_register(2)),
+                       bi_imm_f32(42.0)),
+        bi_fadd_imm_f32_to(b, bi_register(1), bi_discard(bi_register(2)),
+                           fui(42.0)));
 
-   CASE(bi_fadd_f32_to(b, bi_register(1), bi_discard(bi_register(2)), bi_neg(bi_imm_f32(42.0))),
-        bi_fadd_imm_f32_to(b, bi_register(1), bi_discard(bi_register(2)), fui(-42.0)));
+   CASE(bi_fadd_f32_to(b, bi_register(1), bi_discard(bi_register(2)),
+                       bi_neg(bi_imm_f32(42.0))),
+        bi_fadd_imm_f32_to(b, bi_register(1), bi_discard(bi_register(2)),
+                           fui(-42.0)));
 }
 
-TEST_F(AddImm, Commutativty) {
+TEST_F(AddImm, Commutativty)
+{
    CASE(bi_fadd_f32_to(b, bi_register(1), bi_imm_f32(42.0), bi_register(2)),
         bi_fadd_imm_f32_to(b, bi_register(1), bi_register(2), fui(42.0)));
 }
 
-TEST_F(AddImm, NoModifiers) {
-   NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_abs(bi_register(2)), bi_imm_f32(42.0)));
-   NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_neg(bi_register(2)), bi_imm_f32(42.0)));
-   NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_f32(42.0)));
+TEST_F(AddImm, NoModifiers)
+{
+   NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_abs(bi_register(2)),
+                          bi_imm_f32(42.0)));
+   NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_neg(bi_register(2)),
+                          bi_imm_f32(42.0)));
+   NEGCASE(bi_fadd_f32_to(b, bi_register(1),
+                          bi_swz_16(bi_register(2), false, false),
+                          bi_imm_f32(42.0)));
 }
 
-TEST_F(AddImm, NoClamp) {
+TEST_F(AddImm, NoClamp)
+{
    NEGCASE({
-      bi_instr *I = bi_fadd_f32_to(b, bi_register(1), bi_register(2),
-            bi_imm_f32(42.0));
+      bi_instr *I =
+         bi_fadd_f32_to(b, bi_register(1), bi_register(2), bi_imm_f32(42.0));
       I->clamp = BI_CLAMP_CLAMP_M1_1;
    });
 }
 
-TEST_F(AddImm, OtherTypes) {
+TEST_F(AddImm, OtherTypes)
+{
    CASE(bi_fadd_v2f16_to(b, bi_register(1), bi_register(2), bi_imm_f16(42.0)),
         bi_fadd_imm_v2f16_to(b, bi_register(1), bi_register(2), 0x51405140));
 
-   CASE(bi_iadd_u32_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false),
+   CASE(bi_iadd_u32_to(b, bi_register(1), bi_register(2),
+                       bi_imm_u32(0xDEADBEEF), false),
         bi_iadd_imm_i32_to(b, bi_register(1), bi_register(2), 0xDEADBEEF));
 
-   CASE(bi_iadd_v2u16_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false),
+   CASE(bi_iadd_v2u16_to(b, bi_register(1), bi_register(2),
+                         bi_imm_u32(0xDEADBEEF), false),
         bi_iadd_imm_v2i16_to(b, bi_register(1), bi_register(2), 0xDEADBEEF));
 
-   CASE(bi_iadd_v4u8_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false),
+   CASE(bi_iadd_v4u8_to(b, bi_register(1), bi_register(2),
+                        bi_imm_u32(0xDEADBEEF), false),
         bi_iadd_imm_v4i8_to(b, bi_register(1), bi_register(2), 0xDEADBEEF));
 
-   CASE(bi_iadd_s32_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false),
+   CASE(bi_iadd_s32_to(b, bi_register(1), bi_register(2),
+                       bi_imm_u32(0xDEADBEEF), false),
         bi_iadd_imm_i32_to(b, bi_register(1), bi_register(2), 0xDEADBEEF));
 
-   CASE(bi_iadd_v2s16_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false),
+   CASE(bi_iadd_v2s16_to(b, bi_register(1), bi_register(2),
+                         bi_imm_u32(0xDEADBEEF), false),
         bi_iadd_imm_v2i16_to(b, bi_register(1), bi_register(2), 0xDEADBEEF));
 
-   CASE(bi_iadd_v4s8_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false),
+   CASE(bi_iadd_v4s8_to(b, bi_register(1), bi_register(2),
+                        bi_imm_u32(0xDEADBEEF), false),
         bi_iadd_imm_v4i8_to(b, bi_register(1), bi_register(2), 0xDEADBEEF));
 
-   NEGCASE(bi_iadd_u32_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_u32(0xDEADBEEF), false));
-   NEGCASE(bi_iadd_v2u16_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_u32(0xDEADBEEF), false));
-   NEGCASE(bi_iadd_u32_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), true));
-   NEGCASE(bi_iadd_s32_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_u32(0xDEADBEEF), false));
-   NEGCASE(bi_iadd_v2s16_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_u32(0xDEADBEEF), false));
+   NEGCASE(bi_iadd_u32_to(b, bi_register(1),
+                          bi_swz_16(bi_register(2), false, false),
+                          bi_imm_u32(0xDEADBEEF), false));
+   NEGCASE(bi_iadd_v2u16_to(b, bi_register(1),
+                            bi_swz_16(bi_register(2), false, false),
+                            bi_imm_u32(0xDEADBEEF), false));
+   NEGCASE(bi_iadd_u32_to(b, bi_register(1), bi_register(2),
+                          bi_imm_u32(0xDEADBEEF), true));
+   NEGCASE(bi_iadd_s32_to(b, bi_register(1),
+                          bi_swz_16(bi_register(2), false, false),
+                          bi_imm_u32(0xDEADBEEF), false));
+   NEGCASE(bi_iadd_v2s16_to(b, bi_register(1),
+                            bi_swz_16(bi_register(2), false, false),
+                            bi_imm_u32(0xDEADBEEF), false));
 
-   NEGCASE(bi_iadd_s32_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), true));
+   NEGCASE(bi_iadd_s32_to(b, bi_register(1), bi_register(2),
+                          bi_imm_u32(0xDEADBEEF), true));
 }
 
-TEST_F(AddImm, Int8) {
+TEST_F(AddImm, Int8)
+{
    bi_index idx = bi_register(2);
    idx.swizzle = BI_SWIZZLE_B0000;
-   NEGCASE(bi_iadd_v4u8_to(b, bi_register(1), idx, bi_imm_u32(0xDEADBEEF), false));
-   NEGCASE(bi_iadd_v4s8_to(b, bi_register(1), idx, bi_imm_u32(0xDEADBEEF), false));
+   NEGCASE(
+      bi_iadd_v4u8_to(b, bi_register(1), idx, bi_imm_u32(0xDEADBEEF), false));
+   NEGCASE(
+      bi_iadd_v4s8_to(b, bi_register(1), idx, bi_imm_u32(0xDEADBEEF), false));
 }
 
-TEST_F(AddImm, OnlyRTE) {
+TEST_F(AddImm, OnlyRTE)
+{
    NEGCASE({
-         bi_instr *I = bi_fadd_f32_to(b, bi_register(1), bi_register(2), bi_imm_f32(42.0));
-         I->round = BI_ROUND_RTP;
+      bi_instr *I =
+         bi_fadd_f32_to(b, bi_register(1), bi_register(2), bi_imm_f32(42.0));
+      I->round = BI_ROUND_RTP;
    });
 
    NEGCASE({
-         bi_instr *I = bi_fadd_v2f16_to(b, bi_register(1), bi_register(2), bi_imm_f16(42.0));
-         I->round = BI_ROUND_RTZ;
+      bi_instr *I =
+         bi_fadd_v2f16_to(b, bi_register(1), bi_register(2), bi_imm_f16(42.0));
+      I->round = BI_ROUND_RTZ;
    });
 }
-
diff --git a/src/panfrost/bifrost/valhall/test/test-disassembler.c b/src/panfrost/bifrost/valhall/test/test-disassembler.c
index 9be708e86c3..7b10bad38f0 100644
--- a/src/panfrost/bifrost/valhall/test/test-disassembler.c
+++ b/src/panfrost/bifrost/valhall/test/test-disassembler.c
@@ -21,8 +21,8 @@
  * SOFTWARE.
  */
 
-#include <stdio.h>
 #include <inttypes.h>
+#include <stdio.h>
 #include "disassemble.h"
 
 static inline uint8_t
@@ -39,7 +39,7 @@ parse_hex(const char *in)
 
    for (unsigned i = 0; i < 8; ++i) {
       uint8_t byte = (parse_nibble(in[0]) << 4) | parse_nibble(in[1]);
-      v |= ((uint64_t) byte) << (8 * i);
+      v |= ((uint64_t)byte) << (8 * i);
 
       /* Skip the space after the byte */
       in += 3;
diff --git a/src/panfrost/bifrost/valhall/test/test-insert-flow.cpp b/src/panfrost/bifrost/valhall/test/test-insert-flow.cpp
index 228eee34635..a9703c1c996 100644
--- a/src/panfrost/bifrost/valhall/test/test-insert-flow.cpp
+++ b/src/panfrost/bifrost/valhall/test/test-insert-flow.cpp
@@ -21,8 +21,8 @@
  * SOFTWARE.
  */
 
-#include "bi_test.h"
 #include "bi_builder.h"
+#include "bi_test.h"
 #include "va_compiler.h"
 #include "valhall_enums.h"
 
@@ -37,177 +37,190 @@ strip_nops(bi_context *ctx)
    }
 }
 
-#define CASE(shader_stage, test) do { \
-   bi_builder *A = bit_builder(mem_ctx); \
-   bi_builder *B = bit_builder(mem_ctx); \
-   { \
-      UNUSED bi_builder *b = A; \
-      A->shader->stage = MESA_SHADER_ ## shader_stage; \
-      test; \
-   } \
-   strip_nops(A->shader); \
-   va_insert_flow_control_nops(A->shader); \
-   { \
-      UNUSED bi_builder *b = B; \
-      B->shader->stage = MESA_SHADER_ ## shader_stage; \
-      test; \
-   } \
-   ASSERT_SHADER_EQUAL(A->shader, B->shader); \
-} while(0)
+#define CASE(shader_stage, test)                                               \
+   do {                                                                        \
+      bi_builder *A = bit_builder(mem_ctx);                                    \
+      bi_builder *B = bit_builder(mem_ctx);                                    \
+      {                                                                        \
+         UNUSED bi_builder *b = A;                                             \
+         A->shader->stage = MESA_SHADER_##shader_stage;                        \
+         test;                                                                 \
+      }                                                                        \
+      strip_nops(A->shader);                                                   \
+      va_insert_flow_control_nops(A->shader);                                  \
+      {                                                                        \
+         UNUSED bi_builder *b = B;                                             \
+         B->shader->stage = MESA_SHADER_##shader_stage;                        \
+         test;                                                                 \
+      }                                                                        \
+      ASSERT_SHADER_EQUAL(A->shader, B->shader);                               \
+   } while (0)
 
-#define flow(f) bi_nop(b)->flow = VA_FLOW_ ## f
+#define flow(f) bi_nop(b)->flow = VA_FLOW_##f
 
 class InsertFlow : public testing::Test {
-protected:
-   InsertFlow() {
+ protected:
+   InsertFlow()
+   {
       mem_ctx = ralloc_context(NULL);
    }
 
-   ~InsertFlow() {
+   ~InsertFlow()
+   {
       ralloc_free(mem_ctx);
    }
 
    void *mem_ctx;
 };
 
-TEST_F(InsertFlow, PreserveEmptyShader) {
+TEST_F(InsertFlow, PreserveEmptyShader)
+{
    CASE(FRAGMENT, {});
 }
 
-TEST_F(InsertFlow, TilebufferWait7) {
+TEST_F(InsertFlow, TilebufferWait7)
+{
    CASE(FRAGMENT, {
-        flow(DISCARD);
-        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-        flow(WAIT);
-        bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
-                    bi_register(6), bi_register(7), bi_register(8),
-                    BI_REGISTER_FORMAT_AUTO, 4, 4);
-        flow(END);
+      flow(DISCARD);
+      bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+      flow(WAIT);
+      bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
+                  bi_register(6), bi_register(7), bi_register(8),
+                  BI_REGISTER_FORMAT_AUTO, 4, 4);
+      flow(END);
    });
 
    CASE(FRAGMENT, {
-        flow(DISCARD);
-        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-        flow(WAIT);
-        bi_st_tile(b, bi_register(0), bi_register(4), bi_register(5),
+      flow(DISCARD);
+      bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+      flow(WAIT);
+      bi_st_tile(b, bi_register(0), bi_register(4), bi_register(5),
+                 bi_register(6), BI_REGISTER_FORMAT_AUTO, BI_VECSIZE_V4);
+      flow(END);
+   });
+
+   CASE(FRAGMENT, {
+      flow(DISCARD);
+      bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+      flow(WAIT);
+      bi_ld_tile_to(b, bi_register(0), bi_register(4), bi_register(5),
                     bi_register(6), BI_REGISTER_FORMAT_AUTO, BI_VECSIZE_V4);
-        flow(END);
-   });
-
-   CASE(FRAGMENT, {
-        flow(DISCARD);
-        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-        flow(WAIT);
-        bi_ld_tile_to(b, bi_register(0), bi_register(4), bi_register(5),
-                    bi_register(6), BI_REGISTER_FORMAT_AUTO, BI_VECSIZE_V4);
-        flow(END);
+      flow(END);
    });
 }
 
-TEST_F(InsertFlow, AtestWait6AndWait0After) {
+TEST_F(InsertFlow, AtestWait6AndWait0After)
+{
    CASE(FRAGMENT, {
-        flow(DISCARD);
-        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-        flow(WAIT0126);
-        bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5),
-                    bi_fau(BIR_FAU_ATEST_PARAM, false));
-        flow(WAIT0);
-        flow(END);
+      flow(DISCARD);
+      bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+      flow(WAIT0126);
+      bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5),
+                  bi_fau(BIR_FAU_ATEST_PARAM, false));
+      flow(WAIT0);
+      flow(END);
    });
 }
 
-TEST_F(InsertFlow, ZSEmitWait6) {
+TEST_F(InsertFlow, ZSEmitWait6)
+{
    CASE(FRAGMENT, {
-        flow(DISCARD);
-        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-        flow(WAIT0126);
-        bi_zs_emit_to(b, bi_register(0), bi_register(4), bi_register(5),
-                      bi_register(6), true, true);
-        flow(END);
+      flow(DISCARD);
+      bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+      flow(WAIT0126);
+      bi_zs_emit_to(b, bi_register(0), bi_register(4), bi_register(5),
+                    bi_register(6), true, true);
+      flow(END);
    });
 }
 
-TEST_F(InsertFlow, LoadThenUnrelatedThenUse) {
+TEST_F(InsertFlow, LoadThenUnrelatedThenUse)
+{
    CASE(VERTEX, {
-         bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
-                           BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
-         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-         flow(WAIT0);
-         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(19));
-         flow(END);
+      bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
+                        BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
+      bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+      flow(WAIT0);
+      bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(19));
+      flow(END);
    });
 }
 
-TEST_F(InsertFlow, SingleLdVar) {
+TEST_F(InsertFlow, SingleLdVar)
+{
    CASE(FRAGMENT, {
-         flow(DISCARD);
-         bi_ld_var_buf_imm_f16_to(b, bi_register(2), bi_register(61),
-                                 BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
-                                 BI_SOURCE_FORMAT_F16,
-                                 BI_UPDATE_RETRIEVE, BI_VECSIZE_V4, 0);
-         flow(WAIT0);
-         flow(END);
+      flow(DISCARD);
+      bi_ld_var_buf_imm_f16_to(b, bi_register(2), bi_register(61),
+                               BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
+                               BI_SOURCE_FORMAT_F16, BI_UPDATE_RETRIEVE,
+                               BI_VECSIZE_V4, 0);
+      flow(WAIT0);
+      flow(END);
    });
 }
 
-TEST_F(InsertFlow, SerializeLdVars) {
+TEST_F(InsertFlow, SerializeLdVars)
+{
    CASE(FRAGMENT, {
-         flow(DISCARD);
-         bi_ld_var_buf_imm_f16_to(b, bi_register(16), bi_register(61),
-                                 BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
-                                 BI_SOURCE_FORMAT_F16,
-                                 BI_UPDATE_STORE, BI_VECSIZE_V4, 0);
-         bi_ld_var_buf_imm_f16_to(b, bi_register(2), bi_register(61),
-                                 BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
-                                 BI_SOURCE_FORMAT_F16,
-                                 BI_UPDATE_RETRIEVE, BI_VECSIZE_V4, 0);
-         flow(WAIT0);
-         bi_ld_var_buf_imm_f16_to(b, bi_register(8), bi_register(61),
-                                 BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
-                                 BI_SOURCE_FORMAT_F16,
-                                 BI_UPDATE_STORE, BI_VECSIZE_V4, 1);
-         flow(WAIT0);
-         flow(END);
+      flow(DISCARD);
+      bi_ld_var_buf_imm_f16_to(b, bi_register(16), bi_register(61),
+                               BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
+                               BI_SOURCE_FORMAT_F16, BI_UPDATE_STORE,
+                               BI_VECSIZE_V4, 0);
+      bi_ld_var_buf_imm_f16_to(b, bi_register(2), bi_register(61),
+                               BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
+                               BI_SOURCE_FORMAT_F16, BI_UPDATE_RETRIEVE,
+                               BI_VECSIZE_V4, 0);
+      flow(WAIT0);
+      bi_ld_var_buf_imm_f16_to(b, bi_register(8), bi_register(61),
+                               BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
+                               BI_SOURCE_FORMAT_F16, BI_UPDATE_STORE,
+                               BI_VECSIZE_V4, 1);
+      flow(WAIT0);
+      flow(END);
    });
 }
 
-TEST_F(InsertFlow, Clper) {
+TEST_F(InsertFlow, Clper)
+{
    CASE(FRAGMENT, {
-         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-         bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
-                         BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
-                         BI_SUBGROUP_SUBGROUP4);
-         flow(DISCARD);
-         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-         flow(END);
+      bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+      bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
+                      BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
+                      BI_SUBGROUP_SUBGROUP4);
+      flow(DISCARD);
+      bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+      flow(END);
    });
 }
 
-TEST_F(InsertFlow, TextureImplicit) {
+TEST_F(InsertFlow, TextureImplicit)
+{
    CASE(FRAGMENT, {
-         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-         bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
-                          bi_register(12), false, BI_DIMENSION_2D,
-                          BI_REGISTER_FORMAT_F32, false, false,
-                          BI_VA_LOD_MODE_COMPUTED_LOD, BI_WRITE_MASK_RGBA, 4);
-         flow(DISCARD);
-         flow(WAIT0);
-         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-         flow(END);
+      bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+      bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
+                       bi_register(12), false, BI_DIMENSION_2D,
+                       BI_REGISTER_FORMAT_F32, false, false,
+                       BI_VA_LOD_MODE_COMPUTED_LOD, BI_WRITE_MASK_RGBA, 4);
+      flow(DISCARD);
+      flow(WAIT0);
+      bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+      flow(END);
    });
 }
 
-TEST_F(InsertFlow, TextureExplicit) {
+TEST_F(InsertFlow, TextureExplicit)
+{
    CASE(FRAGMENT, {
-         flow(DISCARD);
-         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-         bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
-                          bi_register(12), false, BI_DIMENSION_2D,
-                          BI_REGISTER_FORMAT_F32, false, false,
-                          BI_VA_LOD_MODE_ZERO_LOD, BI_WRITE_MASK_RGBA, 4);
-         flow(WAIT0);
-         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-         flow(END);
+      flow(DISCARD);
+      bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+      bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
+                       bi_register(12), false, BI_DIMENSION_2D,
+                       BI_REGISTER_FORMAT_F32, false, false,
+                       BI_VA_LOD_MODE_ZERO_LOD, BI_WRITE_MASK_RGBA, 4);
+      flow(WAIT0);
+      bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+      flow(END);
    });
 }
 
@@ -217,49 +230,52 @@ TEST_F(InsertFlow, TextureExplicit) {
  *     \ /
  *      D
  */
-TEST_F(InsertFlow, DiamondCFG) {
+TEST_F(InsertFlow, DiamondCFG)
+{
    CASE(FRAGMENT, {
-         bi_block *A = bi_start_block(&b->shader->blocks);
-         bi_block *B = bit_block(b->shader);
-         bi_block *C = bit_block(b->shader);
-         bi_block *D = bit_block(b->shader);
+      bi_block *A = bi_start_block(&b->shader->blocks);
+      bi_block *B = bit_block(b->shader);
+      bi_block *C = bit_block(b->shader);
+      bi_block *D = bit_block(b->shader);
 
-         bi_block_add_successor(A, B);
-         bi_block_add_successor(A, C);
+      bi_block_add_successor(A, B);
+      bi_block_add_successor(A, C);
 
-         bi_block_add_successor(B, D);
-         bi_block_add_successor(C, D);
+      bi_block_add_successor(B, D);
+      bi_block_add_successor(C, D);
 
-         /* B uses helper invocations, no other block does.
-          *
-          * That means B and C need to discard helpers.
-          */
-         b->cursor = bi_after_block(B);
-         bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
-               BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
-               BI_SUBGROUP_SUBGROUP4);
-         flow(DISCARD);
-         flow(RECONVERGE);
+      /* B uses helper invocations, no other block does.
+       *
+       * That means B and C need to discard helpers.
+       */
+      b->cursor = bi_after_block(B);
+      bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
+                      BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
+                      BI_SUBGROUP_SUBGROUP4);
+      flow(DISCARD);
+      flow(RECONVERGE);
 
-         b->cursor = bi_after_block(C);
-         flow(DISCARD);
-         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-         flow(RECONVERGE);
+      b->cursor = bi_after_block(C);
+      flow(DISCARD);
+      bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+      flow(RECONVERGE);
 
-         b->cursor = bi_after_block(D);
-         flow(END);
+      b->cursor = bi_after_block(D);
+      flow(END);
    });
 }
 
-TEST_F(InsertFlow, BarrierBug) {
+TEST_F(InsertFlow, BarrierBug)
+{
    CASE(KERNEL, {
-         bi_instr *I = bi_store_i32(b, bi_register(0), bi_register(2), bi_register(4), BI_SEG_NONE, 0);
-         I->slot = 2;
+      bi_instr *I = bi_store_i32(b, bi_register(0), bi_register(2),
+                                 bi_register(4), BI_SEG_NONE, 0);
+      I->slot = 2;
 
-         bi_fadd_f32_to(b, bi_register(10), bi_register(10), bi_register(10));
-         flow(WAIT2);
-         bi_barrier(b);
-         flow(WAIT);
-         flow(END);
+      bi_fadd_f32_to(b, bi_register(10), bi_register(10), bi_register(10));
+      flow(WAIT2);
+      bi_barrier(b);
+      flow(WAIT);
+      flow(END);
    });
 }
diff --git a/src/panfrost/bifrost/valhall/test/test-lower-constants.cpp b/src/panfrost/bifrost/valhall/test/test-lower-constants.cpp
index 2d98a8fab82..d58805392fd 100644
--- a/src/panfrost/bifrost/valhall/test/test-lower-constants.cpp
+++ b/src/panfrost/bifrost/valhall/test/test-lower-constants.cpp
@@ -21,9 +21,9 @@
  * SOFTWARE.
  */
 
-#include "va_compiler.h"
-#include "bi_test.h"
 #include "bi_builder.h"
+#include "bi_test.h"
+#include "va_compiler.h"
 
 #include <gtest/gtest.h>
 
@@ -38,19 +38,22 @@ add_imm(bi_context *ctx)
 #define CASE(instr, expected) INSTRUCTION_CASE(instr, expected, add_imm)
 
 class LowerConstants : public testing::Test {
-protected:
-   LowerConstants() {
+ protected:
+   LowerConstants()
+   {
       mem_ctx = ralloc_context(NULL);
    }
 
-   ~LowerConstants() {
+   ~LowerConstants()
+   {
       ralloc_free(mem_ctx);
    }
 
    void *mem_ctx;
 };
 
-TEST_F(LowerConstants, Float32) {
+TEST_F(LowerConstants, Float32)
+{
    CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_imm_f32(0.0)),
         bi_fadd_f32_to(b, bi_register(0), bi_register(0), va_lut(0)));
 
@@ -61,46 +64,59 @@ TEST_F(LowerConstants, Float32) {
         bi_fadd_f32_to(b, bi_register(0), bi_register(0), va_lut(17)));
 }
 
-TEST_F(LowerConstants, WidenFloat16) {
+TEST_F(LowerConstants, WidenFloat16)
+{
    CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_imm_f32(0.5)),
-        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_half(va_lut(26), 1)));
+        bi_fadd_f32_to(b, bi_register(0), bi_register(0),
+                       bi_half(va_lut(26), 1)));
 
    CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_imm_f32(255.0)),
-        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_half(va_lut(23), 0)));
+        bi_fadd_f32_to(b, bi_register(0), bi_register(0),
+                       bi_half(va_lut(23), 0)));
 
    CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_imm_f32(256.0)),
-        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_half(va_lut(23), 1)));
+        bi_fadd_f32_to(b, bi_register(0), bi_register(0),
+                       bi_half(va_lut(23), 1)));
 
    CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_imm_f32(8.0)),
-        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_half(va_lut(30), 1)));
+        bi_fadd_f32_to(b, bi_register(0), bi_register(0),
+                       bi_half(va_lut(30), 1)));
 }
 
-TEST_F(LowerConstants, ReplicateFloat16) {
+TEST_F(LowerConstants, ReplicateFloat16)
+{
    CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_f16(255.0)),
-        bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_half(va_lut(23), 0)));
+        bi_fadd_v2f16_to(b, bi_register(0), bi_register(0),
+                         bi_half(va_lut(23), 0)));
 
    CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_f16(4.0)),
-        bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_half(va_lut(29), 1)));
+        bi_fadd_v2f16_to(b, bi_register(0), bi_register(0),
+                         bi_half(va_lut(29), 1)));
 }
 
-TEST_F(LowerConstants, NegateFloat32) {
+TEST_F(LowerConstants, NegateFloat32)
+{
    CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_imm_f32(-1.0)),
         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_neg(va_lut(16))));
 
    CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_imm_f32(-255.0)),
-        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_neg(bi_half(va_lut(23), 0))));
+        bi_fadd_f32_to(b, bi_register(0), bi_register(0),
+                       bi_neg(bi_half(va_lut(23), 0))));
 }
 
 TEST_F(LowerConstants, NegateReplicateFloat16)
 {
    CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_f16(-255.0)),
-        bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_neg(bi_half(va_lut(23), 0))));
+        bi_fadd_v2f16_to(b, bi_register(0), bi_register(0),
+                         bi_neg(bi_half(va_lut(23), 0))));
 }
 
 TEST_F(LowerConstants, NegateVec2Float16)
 {
-   CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_u32(0xBC008000)),
-        bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_neg(va_lut(27))));
+   CASE(
+      bi_fadd_v2f16_to(b, bi_register(0), bi_register(0),
+                       bi_imm_u32(0xBC008000)),
+      bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_neg(va_lut(27))));
 }
 
 TEST_F(LowerConstants, Int8InInt32)
@@ -117,87 +133,105 @@ TEST_F(LowerConstants, ZeroExtendForUnsigned)
    CASE(bi_icmp_and_u32_to(b, bi_register(0), bi_register(0), bi_imm_u32(0xFF),
                            bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
         bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
-                           bi_byte(va_lut(1), 0), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
+                           bi_byte(va_lut(1), 0), bi_register(0), BI_CMPF_LT,
+                           BI_RESULT_TYPE_I1));
 
-   CASE(bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
-                           bi_imm_u32(0xFFFF), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
-        bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
-                           bi_half(va_lut(1), 0), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
+   CASE(
+      bi_icmp_and_u32_to(b, bi_register(0), bi_register(0), bi_imm_u32(0xFFFF),
+                         bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
+      bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
+                         bi_half(va_lut(1), 0), bi_register(0), BI_CMPF_LT,
+                         BI_RESULT_TYPE_I1));
 }
 
 TEST_F(LowerConstants, SignExtendPositiveForSigned)
 {
-   CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
-                           bi_imm_u32(0x7F), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
+   CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0), bi_imm_u32(0x7F),
+                           bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
         bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
-                           bi_byte(va_lut(2), 3), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
+                           bi_byte(va_lut(2), 3), bi_register(0), BI_CMPF_LT,
+                           BI_RESULT_TYPE_I1));
 
-   CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
-                           bi_imm_u32(0x7FFF), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
-        bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
-                           bi_half(va_lut(2), 1), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
+   CASE(
+      bi_icmp_and_s32_to(b, bi_register(0), bi_register(0), bi_imm_u32(0x7FFF),
+                         bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
+      bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
+                         bi_half(va_lut(2), 1), bi_register(0), BI_CMPF_LT,
+                         BI_RESULT_TYPE_I1));
 }
 
 TEST_F(LowerConstants, SignExtendNegativeForSigned)
 {
    CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
-                           bi_imm_u32(0xFFFFFFF8), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
+                           bi_imm_u32(0xFFFFFFF8), bi_register(0), BI_CMPF_LT,
+                           BI_RESULT_TYPE_I1),
         bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
-                           bi_byte(va_lut(23), 0), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
+                           bi_byte(va_lut(23), 0), bi_register(0), BI_CMPF_LT,
+                           BI_RESULT_TYPE_I1));
 
    CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
-                           bi_imm_u32(0xFFFFFAFC), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
+                           bi_imm_u32(0xFFFFFAFC), bi_register(0), BI_CMPF_LT,
+                           BI_RESULT_TYPE_I1),
         bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
-                           bi_half(va_lut(3), 1), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
+                           bi_half(va_lut(3), 1), bi_register(0), BI_CMPF_LT,
+                           BI_RESULT_TYPE_I1));
 }
 
 TEST_F(LowerConstants, DontZeroExtendForSigned)
 {
-   CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
-                           bi_imm_u32(0xFF), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
+   CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0), bi_imm_u32(0xFF),
+                           bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
         bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
-                           bi_iadd_imm_i32(b, va_lut(0), 0xFF), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
-
-   CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
-                           bi_imm_u32(0xFFFF), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
-        bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
-                           bi_iadd_imm_i32(b, va_lut(0), 0xFFFF), bi_register(0),
+                           bi_iadd_imm_i32(b, va_lut(0), 0xFF), bi_register(0),
                            BI_CMPF_LT, BI_RESULT_TYPE_I1));
+
+   CASE(
+      bi_icmp_and_s32_to(b, bi_register(0), bi_register(0), bi_imm_u32(0xFFFF),
+                         bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
+      bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
+                         bi_iadd_imm_i32(b, va_lut(0), 0xFFFF), bi_register(0),
+                         BI_CMPF_LT, BI_RESULT_TYPE_I1));
 }
 
 TEST_F(LowerConstants, DontZeroExtendNegative)
 {
    CASE(bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
-                           bi_imm_u32(0xFFFFFFF8), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
+                           bi_imm_u32(0xFFFFFFF8), bi_register(0), BI_CMPF_LT,
+                           BI_RESULT_TYPE_I1),
         bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
-                           bi_iadd_imm_i32(b, va_lut(0), 0xFFFFFFF8), bi_register(0),
-                           BI_CMPF_LT, BI_RESULT_TYPE_I1));
+                           bi_iadd_imm_i32(b, va_lut(0), 0xFFFFFFF8),
+                           bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
 
    CASE(bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
-                           bi_imm_u32(0xFFFFFAFC), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
+                           bi_imm_u32(0xFFFFFAFC), bi_register(0), BI_CMPF_LT,
+                           BI_RESULT_TYPE_I1),
         bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
-                           bi_iadd_imm_i32(b, va_lut(0), 0xFFFFFAFC), bi_register(0),
-                           BI_CMPF_LT, BI_RESULT_TYPE_I1));
+                           bi_iadd_imm_i32(b, va_lut(0), 0xFFFFFAFC),
+                           bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
 }
 
 TEST_F(LowerConstants, HandleTrickyNegativesFP16)
 {
-   CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_f16(-57216.0)),
-        bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_half(va_lut(3), 1)));
+   CASE(
+      bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_f16(-57216.0)),
+      bi_fadd_v2f16_to(b, bi_register(0), bi_register(0),
+                       bi_half(va_lut(3), 1)));
 
-   CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_f16(57216.0)),
-        bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_neg(bi_half(va_lut(3), 1))));
+   CASE(
+      bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_f16(57216.0)),
+      bi_fadd_v2f16_to(b, bi_register(0), bi_register(0),
+                       bi_neg(bi_half(va_lut(3), 1))));
 }
 
 TEST_F(LowerConstants, MaintainMkvecRestrictedSwizzles)
 {
-   CASE(bi_mkvec_v2i8_to(b, bi_register(0), bi_register(0),
-                         bi_imm_u8(0), bi_imm_u32(0)),
+   CASE(bi_mkvec_v2i8_to(b, bi_register(0), bi_register(0), bi_imm_u8(0),
+                         bi_imm_u32(0)),
         bi_mkvec_v2i8_to(b, bi_register(0), bi_register(0),
                          bi_byte(va_lut(0), 0), va_lut(0)));
 
-   CASE(bi_mkvec_v2i8_to(b, bi_register(0), bi_register(0),
-                         bi_imm_u8(14), bi_imm_u32(0)),
+   CASE(bi_mkvec_v2i8_to(b, bi_register(0), bi_register(0), bi_imm_u8(14),
+                         bi_imm_u32(0)),
         bi_mkvec_v2i8_to(b, bi_register(0), bi_register(0),
                          bi_byte(va_lut(11), 2), va_lut(0)));
 }
diff --git a/src/panfrost/bifrost/valhall/test/test-lower-isel.cpp b/src/panfrost/bifrost/valhall/test/test-lower-isel.cpp
index 994885b66db..df1947be323 100644
--- a/src/panfrost/bifrost/valhall/test/test-lower-isel.cpp
+++ b/src/panfrost/bifrost/valhall/test/test-lower-isel.cpp
@@ -21,18 +21,19 @@
  * SOFTWARE.
  */
 
-#include "va_compiler.h"
-#include "bi_test.h"
 #include "bi_builder.h"
+#include "bi_test.h"
+#include "va_compiler.h"
 
 #include <gtest/gtest.h>
 
 #define CASE(instr, expected) INSTRUCTION_CASE(instr, expected, va_lower_isel)
-#define NEGCASE(instr) CASE(instr, instr)
+#define NEGCASE(instr)        CASE(instr, instr)
 
 class LowerIsel : public testing::Test {
-protected:
-   LowerIsel() {
+ protected:
+   LowerIsel()
+   {
       mem_ctx = ralloc_context(NULL);
       reg = bi_register(1);
       x = bi_register(2);
@@ -40,7 +41,8 @@ protected:
       z = bi_register(4);
    }
 
-   ~LowerIsel() {
+   ~LowerIsel()
+   {
       ralloc_free(mem_ctx);
    }
 
@@ -48,14 +50,16 @@ protected:
    bi_index reg, x, y, z;
 };
 
-TEST_F(LowerIsel, 8BitSwizzles) {
+TEST_F(LowerIsel, 8BitSwizzles)
+{
    for (unsigned i = 0; i < 4; ++i) {
       CASE(bi_swz_v4i8_to(b, reg, bi_byte(reg, i)),
            bi_iadd_v4u8_to(b, reg, bi_byte(reg, i), bi_zero(), false));
    }
 }
 
-TEST_F(LowerIsel, 16BitSwizzles) {
+TEST_F(LowerIsel, 16BitSwizzles)
+{
    for (unsigned i = 0; i < 2; ++i) {
       for (unsigned j = 0; j < 2; ++j) {
          CASE(bi_swz_v2i16_to(b, reg, bi_swz_16(reg, i, j)),
@@ -64,24 +68,30 @@ TEST_F(LowerIsel, 16BitSwizzles) {
    }
 }
 
-TEST_F(LowerIsel, JumpsLoweredToBranches) {
-   bi_block block = { };
+TEST_F(LowerIsel, JumpsLoweredToBranches)
+{
+   bi_block block = {};
 
-   CASE({
-      bi_instr *I = bi_jump(b, bi_imm_u32(0xDEADBEEF));
-      I->branch_target = &block;
-   }, {
-      bi_instr *I = bi_branchz_i16(b, bi_zero(), bi_imm_u32(0xDEADBEEF), BI_CMPF_EQ);
-      I->branch_target = &block;
-   });
+   CASE(
+      {
+         bi_instr *I = bi_jump(b, bi_imm_u32(0xDEADBEEF));
+         I->branch_target = &block;
+      },
+      {
+         bi_instr *I =
+            bi_branchz_i16(b, bi_zero(), bi_imm_u32(0xDEADBEEF), BI_CMPF_EQ);
+         I->branch_target = &block;
+      });
 }
 
-TEST_F(LowerIsel, IndirectJumpsLoweredToBranches) {
+TEST_F(LowerIsel, IndirectJumpsLoweredToBranches)
+{
    CASE(bi_jump(b, bi_register(17)),
         bi_branchzi(b, bi_zero(), bi_register(17), BI_CMPF_EQ));
 }
 
-TEST_F(LowerIsel, IntegerCSEL) {
+TEST_F(LowerIsel, IntegerCSEL)
+{
    CASE(bi_csel_i32(b, reg, reg, reg, reg, BI_CMPF_EQ),
         bi_csel_u32(b, reg, reg, reg, reg, BI_CMPF_EQ));
 
@@ -89,7 +99,8 @@ TEST_F(LowerIsel, IntegerCSEL) {
         bi_csel_v2u16(b, reg, reg, reg, reg, BI_CMPF_EQ));
 }
 
-TEST_F(LowerIsel, AvoidSimpleMux) {
+TEST_F(LowerIsel, AvoidSimpleMux)
+{
    CASE(bi_mux_i32(b, x, y, z, BI_MUX_INT_ZERO),
         bi_csel_u32(b, z, bi_zero(), x, y, BI_CMPF_EQ));
    CASE(bi_mux_i32(b, x, y, z, BI_MUX_NEG),
@@ -105,27 +116,32 @@ TEST_F(LowerIsel, AvoidSimpleMux) {
         bi_csel_v2f16(b, z, bi_zero(), x, y, BI_CMPF_EQ));
 }
 
-TEST_F(LowerIsel, BitwiseMux) {
+TEST_F(LowerIsel, BitwiseMux)
+{
    NEGCASE(bi_mux_i32(b, x, y, z, BI_MUX_BIT));
    NEGCASE(bi_mux_v2i16(b, x, y, z, BI_MUX_BIT));
    NEGCASE(bi_mux_v4i8(b, x, y, z, BI_MUX_BIT));
 }
 
-TEST_F(LowerIsel, MuxInt8) {
+TEST_F(LowerIsel, MuxInt8)
+{
    NEGCASE(bi_mux_v4i8(b, x, y, z, BI_MUX_INT_ZERO));
    NEGCASE(bi_mux_v4i8(b, x, y, z, BI_MUX_NEG));
    NEGCASE(bi_mux_v4i8(b, x, y, z, BI_MUX_FP_ZERO));
 }
 
-TEST_F(LowerIsel, FaddRscale) {
-   CASE(bi_fadd_rscale_f32_to(b, reg, x, y, z, BI_SPECIAL_NONE),
-        bi_fma_rscale_f32_to(b, reg, x, bi_imm_f32(1.0), y, z, BI_SPECIAL_NONE));
+TEST_F(LowerIsel, FaddRscale)
+{
+   CASE(
+      bi_fadd_rscale_f32_to(b, reg, x, y, z, BI_SPECIAL_NONE),
+      bi_fma_rscale_f32_to(b, reg, x, bi_imm_f32(1.0), y, z, BI_SPECIAL_NONE));
 
    CASE(bi_fadd_rscale_f32_to(b, reg, x, y, z, BI_SPECIAL_N),
         bi_fma_rscale_f32_to(b, reg, x, bi_imm_f32(1.0), y, z, BI_SPECIAL_N));
 }
 
-TEST_F(LowerIsel, Smoke) {
+TEST_F(LowerIsel, Smoke)
+{
    NEGCASE(bi_fadd_f32_to(b, reg, reg, reg));
    NEGCASE(bi_csel_s32_to(b, reg, reg, reg, reg, reg, BI_CMPF_LT));
    NEGCASE(bi_csel_u32_to(b, reg, reg, reg, reg, reg, BI_CMPF_LT));
diff --git a/src/panfrost/bifrost/valhall/test/test-mark-last.cpp b/src/panfrost/bifrost/valhall/test/test-mark-last.cpp
index f79b9a73855..779a13c2b65 100644
--- a/src/panfrost/bifrost/valhall/test/test-mark-last.cpp
+++ b/src/panfrost/bifrost/valhall/test/test-mark-last.cpp
@@ -21,14 +21,14 @@
  * SOFTWARE.
  */
 
-#include "bi_test.h"
 #include "bi_builder.h"
+#include "bi_test.h"
 #include "va_compiler.h"
 #include "valhall_enums.h"
 
 #include <gtest/gtest.h>
 
-#define R(x) bi_register(x)
+#define R(x)  bi_register(x)
 #define DR(x) bi_discard(R(x))
 
 static void
@@ -40,105 +40,119 @@ strip_discard(bi_context *ctx)
    }
 }
 
-#define CASE(test) do { \
-   void *mem_ctx = ralloc_context(NULL); \
-   bi_builder *A = bit_builder(mem_ctx); \
-   bi_builder *B = bit_builder(mem_ctx); \
-   { \
-      UNUSED bi_builder *b = A; \
-      test; \
-   } \
-   strip_discard(A->shader); \
-   va_mark_last(A->shader); \
-   { \
-      UNUSED bi_builder *b = B; \
-      test; \
-   } \
-   ASSERT_SHADER_EQUAL(A->shader, B->shader); \
-   ralloc_free(mem_ctx); \
-} while(0)
+#define CASE(test)                                                             \
+   do {                                                                        \
+      void *mem_ctx = ralloc_context(NULL);                                    \
+      bi_builder *A = bit_builder(mem_ctx);                                    \
+      bi_builder *B = bit_builder(mem_ctx);                                    \
+      {                                                                        \
+         UNUSED bi_builder *b = A;                                             \
+         test;                                                                 \
+      }                                                                        \
+      strip_discard(A->shader);                                                \
+      va_mark_last(A->shader);                                                 \
+      {                                                                        \
+         UNUSED bi_builder *b = B;                                             \
+         test;                                                                 \
+      }                                                                        \
+      ASSERT_SHADER_EQUAL(A->shader, B->shader);                               \
+      ralloc_free(mem_ctx);                                                    \
+   } while (0)
 
-TEST(MarkLast, Simple) {
+TEST(MarkLast, Simple)
+{
    CASE(bi_fadd_f32_to(b, R(0), DR(0), DR(1)));
 
    CASE({
-        bi_fadd_f32_to(b, R(2), R(0), DR(1));
-        bi_fadd_f32_to(b, R(0), DR(0), DR(2));
+      bi_fadd_f32_to(b, R(2), R(0), DR(1));
+      bi_fadd_f32_to(b, R(0), DR(0), DR(2));
    });
 }
 
-TEST(MarkLast, SameSourceAndDestination) {
+TEST(MarkLast, SameSourceAndDestination)
+{
    CASE({
-         bi_fadd_f32_to(b, R(0), DR(0), DR(0));
-         bi_fadd_f32_to(b, R(0), DR(0), DR(0));
-         bi_fadd_f32_to(b, R(0), DR(0), DR(0));
+      bi_fadd_f32_to(b, R(0), DR(0), DR(0));
+      bi_fadd_f32_to(b, R(0), DR(0), DR(0));
+      bi_fadd_f32_to(b, R(0), DR(0), DR(0));
    });
 }
 
-TEST(MarkLast, StagingReadBefore) {
+TEST(MarkLast, StagingReadBefore)
+{
    CASE({
-         bi_fadd_f32_to(b, R(9), R(2), DR(7));
-         bi_st_tile(b, R(0), DR(4), DR(5), DR(6), BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4);
+      bi_fadd_f32_to(b, R(9), R(2), DR(7));
+      bi_st_tile(b, R(0), DR(4), DR(5), DR(6), BI_REGISTER_FORMAT_F32,
+                 BI_VECSIZE_V4);
    });
 }
 
-TEST(MarkLast, StagingReadAfter) {
+TEST(MarkLast, StagingReadAfter)
+{
    CASE({
-         bi_st_tile(b, R(0), DR(4), DR(5), DR(6), BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4);
-         bi_fadd_f32_to(b, R(9), R(2), DR(7));
+      bi_st_tile(b, R(0), DR(4), DR(5), DR(6), BI_REGISTER_FORMAT_F32,
+                 BI_VECSIZE_V4);
+      bi_fadd_f32_to(b, R(9), R(2), DR(7));
    });
 }
 
-TEST(MarkLast, NonstagingSourceToAsync) {
+TEST(MarkLast, NonstagingSourceToAsync)
+{
    CASE({
-         bi_st_tile(b, R(0), R(4), R(5), DR(6), BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4);
-         bi_fadd_f32_to(b, R(9), DR(4), DR(5));
+      bi_st_tile(b, R(0), R(4), R(5), DR(6), BI_REGISTER_FORMAT_F32,
+                 BI_VECSIZE_V4);
+      bi_fadd_f32_to(b, R(9), DR(4), DR(5));
    });
 }
 
-TEST(MarkLast, Both64) {
+TEST(MarkLast, Both64)
+{
    CASE(bi_load_i32_to(b, R(0), DR(8), DR(9), BI_SEG_NONE, 0));
 }
 
-TEST(MarkLast, Neither64ThenBoth) {
+TEST(MarkLast, Neither64ThenBoth)
+{
    CASE({
-         bi_load_i32_to(b, R(0), R(8), R(9), BI_SEG_NONE, 0);
-         bi_load_i32_to(b, R(1), DR(8), DR(9), BI_SEG_NONE, 8);
+      bi_load_i32_to(b, R(0), R(8), R(9), BI_SEG_NONE, 0);
+      bi_load_i32_to(b, R(1), DR(8), DR(9), BI_SEG_NONE, 8);
    });
 }
 
-TEST(MarkLast, Half64) {
+TEST(MarkLast, Half64)
+{
    CASE({
-         bi_load_i32_to(b, R(0), R(8), R(9), BI_SEG_NONE, 0);
-         bi_fadd_f32_to(b, R(8), DR(8), DR(8));
+      bi_load_i32_to(b, R(0), R(8), R(9), BI_SEG_NONE, 0);
+      bi_fadd_f32_to(b, R(8), DR(8), DR(8));
    });
 
    CASE({
-         bi_load_i32_to(b, R(0), R(8), R(9), BI_SEG_NONE, 0);
-         bi_fadd_f32_to(b, R(9), DR(9), DR(9));
+      bi_load_i32_to(b, R(0), R(8), R(9), BI_SEG_NONE, 0);
+      bi_fadd_f32_to(b, R(9), DR(9), DR(9));
    });
 }
 
-TEST(MarkLast, RegisterBlendDescriptor) {
+TEST(MarkLast, RegisterBlendDescriptor)
+{
    CASE({
-         bi_blend_to(b, R(48), R(0), DR(60), DR(4), DR(5), bi_null(),
-                     BI_REGISTER_FORMAT_F32, 4, 0);
+      bi_blend_to(b, R(48), R(0), DR(60), DR(4), DR(5), bi_null(),
+                  BI_REGISTER_FORMAT_F32, 4, 0);
    });
 
    CASE({
-         bi_blend_to(b, R(48), R(0), DR(60), R(4), R(5), bi_null(),
-                     BI_REGISTER_FORMAT_F32, 4, 0);
-         bi_fadd_f32_to(b, R(4), DR(4), DR(7));
+      bi_blend_to(b, R(48), R(0), DR(60), R(4), R(5), bi_null(),
+                  BI_REGISTER_FORMAT_F32, 4, 0);
+      bi_fadd_f32_to(b, R(4), DR(4), DR(7));
    });
 
    CASE({
-         bi_blend_to(b, R(48), R(0), DR(60), R(4), R(5), bi_null(),
-                     BI_REGISTER_FORMAT_F32, 4, 0);
-         bi_fadd_f32_to(b, R(4), DR(5), DR(7));
+      bi_blend_to(b, R(48), R(0), DR(60), R(4), R(5), bi_null(),
+                  BI_REGISTER_FORMAT_F32, 4, 0);
+      bi_fadd_f32_to(b, R(4), DR(5), DR(7));
    });
 }
 
-TEST(MarkLast, ControlFlowAllFeatures) {
+TEST(MarkLast, ControlFlowAllFeatures)
+{
    /*      A
     *     / \
     *    B   C
@@ -153,9 +167,8 @@ TEST(MarkLast, ControlFlowAllFeatures) {
 
       b->cursor = bi_after_block(A);
       {
-         bi_instr *I =
-            bi_st_tile(b, R(10), DR(14), DR(15), DR(16),
-                       BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4);
+         bi_instr *I = bi_st_tile(b, R(10), DR(14), DR(15), DR(16),
+                                  BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4);
          I->slot = 2;
 
          bi_load_i32_to(b, R(20), R(28), R(29), BI_SEG_NONE, 0);
diff --git a/src/panfrost/bifrost/valhall/test/test-merge-flow.cpp b/src/panfrost/bifrost/valhall/test/test-merge-flow.cpp
index a02600bb31d..36e8c1c5064 100644
--- a/src/panfrost/bifrost/valhall/test/test-merge-flow.cpp
+++ b/src/panfrost/bifrost/valhall/test/test-merge-flow.cpp
@@ -21,42 +21,45 @@
  * SOFTWARE.
  */
 
-#include "bi_test.h"
 #include "bi_builder.h"
+#include "bi_test.h"
 #include "va_compiler.h"
 #include "valhall_enums.h"
 
 #include <gtest/gtest.h>
 
-#define CASE(test, expected) do { \
-   bi_builder *A = bit_builder(mem_ctx); \
-   bi_builder *B = bit_builder(mem_ctx); \
-   { \
-      bi_builder *b = A; \
-      A->shader->stage = MESA_SHADER_FRAGMENT; \
-      test; \
-   } \
-   va_merge_flow(A->shader); \
-   { \
-      bi_builder *b = B; \
-      B->shader->stage = MESA_SHADER_FRAGMENT; \
-      expected; \
-   } \
-   ASSERT_SHADER_EQUAL(A->shader, B->shader); \
-} while(0)
+#define CASE(test, expected)                                                   \
+   do {                                                                        \
+      bi_builder *A = bit_builder(mem_ctx);                                    \
+      bi_builder *B = bit_builder(mem_ctx);                                    \
+      {                                                                        \
+         bi_builder *b = A;                                                    \
+         A->shader->stage = MESA_SHADER_FRAGMENT;                              \
+         test;                                                                 \
+      }                                                                        \
+      va_merge_flow(A->shader);                                                \
+      {                                                                        \
+         bi_builder *b = B;                                                    \
+         B->shader->stage = MESA_SHADER_FRAGMENT;                              \
+         expected;                                                             \
+      }                                                                        \
+      ASSERT_SHADER_EQUAL(A->shader, B->shader);                               \
+   } while (0)
 
 #define NEGCASE(test) CASE(test, test)
 
-#define flow(f) bi_nop(b)->flow = VA_FLOW_ ## f
+#define flow(f) bi_nop(b)->flow = VA_FLOW_##f
 
 class MergeFlow : public testing::Test {
-protected:
-   MergeFlow() {
+ protected:
+   MergeFlow()
+   {
       mem_ctx = ralloc_context(NULL);
       atest = bi_fau(BIR_FAU_ATEST_PARAM, false);
    }
 
-   ~MergeFlow() {
+   ~MergeFlow()
+   {
       ralloc_free(mem_ctx);
    }
 
@@ -65,74 +68,84 @@ protected:
    bi_index atest;
 };
 
-TEST_F(MergeFlow, End) {
-   CASE({
-        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-        bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
-                    bi_register(6), bi_register(7), bi_register(8),
-                    BI_REGISTER_FORMAT_AUTO, 4, 4);
-        flow(END);
-   },
-   {
-        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-        I = bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
-                        bi_register(6), bi_register(7), bi_register(8),
-                        BI_REGISTER_FORMAT_AUTO, 4, 4);
-        I->flow = VA_FLOW_END;
-   });
+TEST_F(MergeFlow, End)
+{
+   CASE(
+      {
+         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+         bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
+                     bi_register(6), bi_register(7), bi_register(8),
+                     BI_REGISTER_FORMAT_AUTO, 4, 4);
+         flow(END);
+      },
+      {
+         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+         I = bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
+                         bi_register(6), bi_register(7), bi_register(8),
+                         BI_REGISTER_FORMAT_AUTO, 4, 4);
+         I->flow = VA_FLOW_END;
+      });
 }
 
-TEST_F(MergeFlow, Reconverge) {
-   CASE({
-        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-        bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
-                    bi_register(6), bi_register(7), bi_register(8),
-                    BI_REGISTER_FORMAT_AUTO, 4, 4);
-        flow(RECONVERGE);
-   },
-   {
-        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-        I = bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
-                        bi_register(6), bi_register(7), bi_register(8),
-                        BI_REGISTER_FORMAT_AUTO, 4, 4);
-        I->flow = VA_FLOW_RECONVERGE;
-   });
+TEST_F(MergeFlow, Reconverge)
+{
+   CASE(
+      {
+         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+         bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
+                     bi_register(6), bi_register(7), bi_register(8),
+                     BI_REGISTER_FORMAT_AUTO, 4, 4);
+         flow(RECONVERGE);
+      },
+      {
+         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+         I = bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
+                         bi_register(6), bi_register(7), bi_register(8),
+                         BI_REGISTER_FORMAT_AUTO, 4, 4);
+         I->flow = VA_FLOW_RECONVERGE;
+      });
 }
 
-TEST_F(MergeFlow, TrivialWait) {
-   CASE({
-        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-        flow(WAIT0126);
-        bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5), atest);
-   },
-   {
-        I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-        I->flow = VA_FLOW_WAIT0126;
-        bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5), atest);
-   });
+TEST_F(MergeFlow, TrivialWait)
+{
+   CASE(
+      {
+         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+         flow(WAIT0126);
+         bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5), atest);
+      },
+      {
+         I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+         I->flow = VA_FLOW_WAIT0126;
+         bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5), atest);
+      });
 }
 
-TEST_F(MergeFlow, LoadThenUnrelatedThenUse) {
-   CASE({
+TEST_F(MergeFlow, LoadThenUnrelatedThenUse)
+{
+   CASE(
+      {
          bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
                            BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
          bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
          flow(WAIT0);
          bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(19));
          flow(END);
-   },
-   {
+      },
+      {
          bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
                            BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
          I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
          I->flow = VA_FLOW_WAIT0;
          I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(19));
          I->flow = VA_FLOW_END;
-   });
+      });
 }
 
-TEST_F(MergeFlow, TrivialDiscard) {
-   CASE({
+TEST_F(MergeFlow, TrivialDiscard)
+{
+   CASE(
+      {
          bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
          bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
                          BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
@@ -140,31 +153,35 @@ TEST_F(MergeFlow, TrivialDiscard) {
          flow(DISCARD);
          bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
          flow(END);
-   },
-   {
+      },
+      {
          bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
          I = bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
-                         BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
-                         BI_SUBGROUP_SUBGROUP4);
+                             BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
+                             BI_SUBGROUP_SUBGROUP4);
          I->flow = VA_FLOW_DISCARD;
          I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
          I->flow = VA_FLOW_END;
-   });
+      });
 }
 
-TEST_F(MergeFlow, TrivialDiscardAtTheStart) {
-   CASE({
+TEST_F(MergeFlow, TrivialDiscardAtTheStart)
+{
+   CASE(
+      {
          flow(DISCARD);
          bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-   },
-   {
+      },
+      {
          I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
          I->flow = VA_FLOW_DISCARD;
-   });
+      });
 }
 
-TEST_F(MergeFlow, MoveDiscardPastWait) {
-   CASE({
+TEST_F(MergeFlow, MoveDiscardPastWait)
+{
+   CASE(
+      {
          bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
          bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
                          BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
@@ -172,20 +189,22 @@ TEST_F(MergeFlow, MoveDiscardPastWait) {
          flow(DISCARD);
          flow(WAIT0);
          bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-   },
-   {
+      },
+      {
          bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
          I = bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
-                         BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
-                         BI_SUBGROUP_SUBGROUP4);
+                             BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
+                             BI_SUBGROUP_SUBGROUP4);
          I->flow = VA_FLOW_WAIT0;
          I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
          I->flow = VA_FLOW_DISCARD;
-   });
+      });
 }
 
-TEST_F(MergeFlow, OccludedWaitsAndDiscard) {
-   CASE({
+TEST_F(MergeFlow, OccludedWaitsAndDiscard)
+{
+   CASE(
+      {
          bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
          bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
                          BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
@@ -194,75 +213,84 @@ TEST_F(MergeFlow, OccludedWaitsAndDiscard) {
          flow(DISCARD);
          flow(WAIT2);
          bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-   },
-   {
+      },
+      {
          bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
          I = bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
-                         BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
-                         BI_SUBGROUP_SUBGROUP4);
+                             BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
+                             BI_SUBGROUP_SUBGROUP4);
          I->flow = VA_FLOW_WAIT02;
          I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
          I->flow = VA_FLOW_DISCARD;
-   });
+      });
 }
 
-TEST_F(MergeFlow, DeleteUselessWaits) {
-   CASE({
+TEST_F(MergeFlow, DeleteUselessWaits)
+{
+   CASE(
+      {
          bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
          bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
          flow(WAIT0);
          flow(WAIT2);
          flow(END);
-   },
-   {
+      },
+      {
          bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
          I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
          I->flow = VA_FLOW_END;
-   });
+      });
 }
 
-TEST_F(MergeFlow, BlockFullOfUselessWaits) {
-   CASE({
+TEST_F(MergeFlow, BlockFullOfUselessWaits)
+{
+   CASE(
+      {
          flow(WAIT0);
          flow(WAIT2);
          flow(DISCARD);
          flow(END);
-   },
-   {
-         flow(END);
-   });
+      },
+      { flow(END); });
 }
 
-TEST_F(MergeFlow, WaitWithMessage) {
-   CASE({
+TEST_F(MergeFlow, WaitWithMessage)
+{
+   CASE(
+      {
          bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
                            BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
          flow(WAIT0);
-   },
-   {
-         I = bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
-                               BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
+      },
+      {
+         I = bi_ld_attr_imm_to(b, bi_register(16), bi_register(60),
+                               bi_register(61), BI_REGISTER_FORMAT_F32,
+                               BI_VECSIZE_V4, 1);
          I->flow = VA_FLOW_WAIT0;
-   });
+      });
 }
 
-TEST_F(MergeFlow, CantMoveWaitPastMessage) {
+TEST_F(MergeFlow, CantMoveWaitPastMessage)
+{
    NEGCASE({
-         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-         I = bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
+      bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+      I =
+         bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
                            BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
 
-         /* Pretend it's blocked for some reason. This doesn't actually happen
-          * with the current algorithm, but it's good to handle the special
-          * cases correctly in case we change later on.
-          */
-         I->flow = VA_FLOW_DISCARD;
-         flow(WAIT0);
+      /* Pretend it's blocked for some reason. This doesn't actually happen
+       * with the current algorithm, but it's good to handle the special
+       * cases correctly in case we change later on.
+       */
+      I->flow = VA_FLOW_DISCARD;
+      flow(WAIT0);
    });
 }
 
-TEST_F(MergeFlow, DeletePointlessDiscard) {
-   CASE({
+TEST_F(MergeFlow, DeletePointlessDiscard)
+{
+   CASE(
+      {
          bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
          bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
                           bi_register(12), false, BI_DIMENSION_2D,
@@ -277,31 +305,34 @@ TEST_F(MergeFlow, DeletePointlessDiscard) {
                      bi_register(6), bi_register(7), bi_register(8),
                      BI_REGISTER_FORMAT_AUTO, 4, 4);
          flow(END);
-   },
-   {
+      },
+      {
          bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-         I = bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
-                          bi_register(12), false, BI_DIMENSION_2D,
-                          BI_REGISTER_FORMAT_F32, false, false,
-                          BI_VA_LOD_MODE_COMPUTED_LOD, BI_WRITE_MASK_RGBA, 4);
+         I = bi_tex_single_to(
+            b, bi_register(0), bi_register(4), bi_register(8), bi_register(12),
+            false, BI_DIMENSION_2D, BI_REGISTER_FORMAT_F32, false, false,
+            BI_VA_LOD_MODE_COMPUTED_LOD, BI_WRITE_MASK_RGBA, 4);
          I->flow = VA_FLOW_WAIT0126;
-         I = bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5), atest);
+         I = bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5),
+                         atest);
          I->flow = VA_FLOW_WAIT;
          I = bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
                          bi_register(6), bi_register(7), bi_register(8),
                          BI_REGISTER_FORMAT_AUTO, 4, 4);
          I->flow = VA_FLOW_END;
-   });
+      });
 }
 
-TEST_F(MergeFlow, PreserveTerminalBarriers) {
-   CASE({
+TEST_F(MergeFlow, PreserveTerminalBarriers)
+{
+   CASE(
+      {
          bi_barrier(b);
          flow(WAIT);
          flow(END);
-   },
-   {
+      },
+      {
          bi_barrier(b)->flow = VA_FLOW_WAIT;
          flow(END);
-   });
+      });
 }
diff --git a/src/panfrost/bifrost/valhall/test/test-packing.cpp b/src/panfrost/bifrost/valhall/test/test-packing.cpp
index 5e8cd7c0f42..b7428497897 100644
--- a/src/panfrost/bifrost/valhall/test/test-packing.cpp
+++ b/src/panfrost/bifrost/valhall/test/test-packing.cpp
@@ -21,34 +21,38 @@
  * SOFTWARE.
  */
 
-#include "va_compiler.h"
-#include "bi_test.h"
 #include "bi_builder.h"
+#include "bi_test.h"
+#include "va_compiler.h"
 
 #include <gtest/gtest.h>
 
-#define CASE(instr, expected) do { \
-   uint64_t _value = va_pack_instr(instr); \
-   if (_value != expected) { \
-      fprintf(stderr, "Got %" PRIx64 ", expected %" PRIx64 "\n", _value, (uint64_t) expected); \
-      bi_print_instr(instr, stderr); \
-      fprintf(stderr, "\n"); \
-      ADD_FAILURE(); \
-   } \
-} while(0)
+#define CASE(instr, expected)                                                  \
+   do {                                                                        \
+      uint64_t _value = va_pack_instr(instr);                                  \
+      if (_value != expected) {                                                \
+         fprintf(stderr, "Got %" PRIx64 ", expected %" PRIx64 "\n", _value,    \
+                 (uint64_t)expected);                                          \
+         bi_print_instr(instr, stderr);                                        \
+         fprintf(stderr, "\n");                                                \
+         ADD_FAILURE();                                                        \
+      }                                                                        \
+   } while (0)
 
 class ValhallPacking : public testing::Test {
-protected:
-   ValhallPacking() {
+ protected:
+   ValhallPacking()
+   {
       mem_ctx = ralloc_context(NULL);
       b = bit_builder(mem_ctx);
 
-      zero = bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | 0), false);
-      one = bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | 8), false);
-      n4567 = bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | 4), true);
+      zero = bi_fau((enum bir_fau)(BIR_FAU_IMMEDIATE | 0), false);
+      one = bi_fau((enum bir_fau)(BIR_FAU_IMMEDIATE | 8), false);
+      n4567 = bi_fau((enum bir_fau)(BIR_FAU_IMMEDIATE | 4), true);
    }
 
-   ~ValhallPacking() {
+   ~ValhallPacking()
+   {
       ralloc_free(mem_ctx);
    }
 
@@ -57,60 +61,67 @@ protected:
    bi_index zero, one, n4567;
 };
 
-TEST_F(ValhallPacking, Moves) {
+TEST_F(ValhallPacking, Moves)
+{
    CASE(bi_mov_i32_to(b, bi_register(1), bi_register(2)),
-         0x0091c10000000002ULL);
-   CASE(bi_mov_i32_to(b, bi_register(1), bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 5), false)),
-         0x0091c1000000008aULL);
+        0x0091c10000000002ULL);
+   CASE(bi_mov_i32_to(b, bi_register(1),
+                      bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 5), false)),
+        0x0091c1000000008aULL);
 }
 
-TEST_F(ValhallPacking, Fadd) {
+TEST_F(ValhallPacking, Fadd)
+{
    CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_register(2)),
-         0x00a4c00000000201ULL);
-   CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_abs(bi_register(2))),
-         0x00a4c02000000201ULL);
-   CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_neg(bi_register(2))),
-         0x00a4c01000000201ULL);
+        0x00a4c00000000201ULL);
+   CASE(
+      bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_abs(bi_register(2))),
+      0x00a4c02000000201ULL);
+   CASE(
+      bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_neg(bi_register(2))),
+      0x00a4c01000000201ULL);
 
-   CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_swz_16(bi_register(1), false, false),
+   CASE(bi_fadd_v2f16_to(b, bi_register(0),
+                         bi_swz_16(bi_register(1), false, false),
                          bi_swz_16(bi_register(0), true, true)),
-         0x00a5c0000c000001ULL);
+        0x00a5c0000c000001ULL);
 
    CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(1), bi_register(0)),
-         0x00a5c00028000001ULL);
+        0x00a5c00028000001ULL);
 
    CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(1),
                          bi_swz_16(bi_register(0), true, false)),
-         0x00a5c00024000001ULL);
+        0x00a5c00024000001ULL);
 
    CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_discard(bi_abs(bi_register(0))),
                          bi_neg(zero)),
-         0x00a5c0902800c040ULL);
+        0x00a5c0902800c040ULL);
 
-   CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1),
-                       zero),
-         0x00a4c0000000c001ULL);
+   CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), zero),
+        0x00a4c0000000c001ULL);
 
-   CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1),
-                       bi_neg(zero)),
-         0x00a4c0100000c001ULL);
+   CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_neg(zero)),
+        0x00a4c0100000c001ULL);
 
    CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1),
                        bi_half(bi_register(0), true)),
-         0x00a4c00008000001ULL);
+        0x00a4c00008000001ULL);
 
    CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1),
                        bi_half(bi_register(0), false)),
-         0x00a4c00004000001ULL);
+        0x00a4c00004000001ULL);
 }
 
-TEST_F(ValhallPacking, Clper) {
+TEST_F(ValhallPacking, Clper)
+{
    CASE(bi_clper_i32_to(b, bi_register(0), bi_register(0), bi_byte(n4567, 0),
-                        BI_INACTIVE_RESULT_F1, BI_LANE_OP_NONE, BI_SUBGROUP_SUBGROUP16),
-         0x00a0c030128fc900);
+                        BI_INACTIVE_RESULT_F1, BI_LANE_OP_NONE,
+                        BI_SUBGROUP_SUBGROUP16),
+        0x00a0c030128fc900);
 }
 
-TEST_F(ValhallPacking, Clamps) {
+TEST_F(ValhallPacking, Clamps)
+{
    bi_instr *I = bi_fadd_f32_to(b, bi_register(0), bi_register(1),
                                 bi_neg(bi_abs(bi_register(2))));
    CASE(I, 0x00a4c03000000201ULL);
@@ -119,209 +130,243 @@ TEST_F(ValhallPacking, Clamps) {
    CASE(I, 0x00a4c03200000201ULL);
 }
 
-TEST_F(ValhallPacking, Misc) {
+TEST_F(ValhallPacking, Misc)
+{
    CASE(bi_fma_f32_to(b, bi_register(1), bi_discard(bi_register(1)),
-                         bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 4), false),
-                         bi_neg(zero)),
-         0x00b2c10400c08841ULL);
+                      bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 4), false),
+                      bi_neg(zero)),
+        0x00b2c10400c08841ULL);
 
    CASE(bi_fround_f32_to(b, bi_register(2), bi_discard(bi_neg(bi_register(2))),
                          BI_ROUND_RTN),
-         0x0090c240800d0042ULL);
+        0x0090c240800d0042ULL);
 
    CASE(bi_fround_v2f16_to(b, bi_half(bi_register(0), false), bi_register(0),
-                         BI_ROUND_RTN),
-         0x00904000a00f0000ULL);
+                           BI_ROUND_RTN),
+        0x00904000a00f0000ULL);
 
-   CASE(bi_fround_v2f16_to(b, bi_half(bi_register(0), false),
-                           bi_swz_16(bi_register(1), true, false), BI_ROUND_RTN),
-         0x00904000900f0001ULL);
+   CASE(
+      bi_fround_v2f16_to(b, bi_half(bi_register(0), false),
+                         bi_swz_16(bi_register(1), true, false), BI_ROUND_RTN),
+      0x00904000900f0001ULL);
 }
 
-TEST_F(ValhallPacking, FaddImm) {
-   CASE(bi_fadd_imm_f32_to(b, bi_register(2), bi_discard(bi_register(2)), 0x4847C6C0),
-         0x0114C24847C6C042ULL);
+TEST_F(ValhallPacking, FaddImm)
+{
+   CASE(bi_fadd_imm_f32_to(b, bi_register(2), bi_discard(bi_register(2)),
+                           0x4847C6C0),
+        0x0114C24847C6C042ULL);
 
-   CASE(bi_fadd_imm_v2f16_to(b, bi_register(2), bi_discard(bi_register(2)), 0x70AC6784),
-         0x0115C270AC678442ULL);
+   CASE(bi_fadd_imm_v2f16_to(b, bi_register(2), bi_discard(bi_register(2)),
+                             0x70AC6784),
+        0x0115C270AC678442ULL);
 }
 
-TEST_F(ValhallPacking, Comparions) {
+TEST_F(ValhallPacking, Comparions)
+{
    CASE(bi_icmp_or_v2s16_to(b, bi_register(2),
-            bi_discard(bi_swz_16(bi_register(3), true, false)),
-            bi_discard(bi_swz_16(bi_register(2), true, false)),
-            zero, BI_CMPF_GT, BI_RESULT_TYPE_M1),
+                            bi_discard(bi_swz_16(bi_register(3), true, false)),
+                            bi_discard(bi_swz_16(bi_register(2), true, false)),
+                            zero, BI_CMPF_GT, BI_RESULT_TYPE_M1),
         0x00f9c21184c04243);
 
    CASE(bi_fcmp_or_v2f16_to(b, bi_register(2),
-            bi_discard(bi_swz_16(bi_register(3), true, false)),
-            bi_discard(bi_swz_16(bi_register(2), false, false)),
-            zero, BI_CMPF_GT, BI_RESULT_TYPE_M1),
-         0x00f5c20190c04243);
+                            bi_discard(bi_swz_16(bi_register(3), true, false)),
+                            bi_discard(bi_swz_16(bi_register(2), false, false)),
+                            zero, BI_CMPF_GT, BI_RESULT_TYPE_M1),
+        0x00f5c20190c04243);
 }
 
-TEST_F(ValhallPacking, Conversions) {
+TEST_F(ValhallPacking, Conversions)
+{
    CASE(bi_v2s16_to_v2f16_to(b, bi_register(2), bi_discard(bi_register(2))),
-         0x0090c22000070042);
+        0x0090c22000070042);
 }
 
-TEST_F(ValhallPacking, BranchzI16) {
-   bi_instr *I = bi_branchz_i16(b, bi_half(bi_register(2), false), bi_null(), BI_CMPF_EQ);
+TEST_F(ValhallPacking, BranchzI16)
+{
+   bi_instr *I =
+      bi_branchz_i16(b, bi_half(bi_register(2), false), bi_null(), BI_CMPF_EQ);
    I->branch_offset = 1;
    CASE(I, 0x001fc03000000102);
 }
 
-TEST_F(ValhallPacking, BranchzI16Backwards) {
+TEST_F(ValhallPacking, BranchzI16Backwards)
+{
    bi_instr *I = bi_branchz_i16(b, zero, bi_null(), BI_CMPF_EQ);
    I->branch_offset = -8;
    CASE(I, 0x001fc017fffff8c0);
 }
 
-TEST_F(ValhallPacking, Blend) {
-   CASE(bi_blend_to(b, bi_null(), bi_register(0), bi_register(60),
-                       bi_fau(BIR_FAU_BLEND_0, false),
-                       bi_fau(BIR_FAU_BLEND_0, true),
-                       bi_null(), BI_REGISTER_FORMAT_F16, 2, 0),
-        0x007f4004333c00f0);
+TEST_F(ValhallPacking, Blend)
+{
+   CASE(
+      bi_blend_to(b, bi_null(), bi_register(0), bi_register(60),
+                  bi_fau(BIR_FAU_BLEND_0, false), bi_fau(BIR_FAU_BLEND_0, true),
+                  bi_null(), BI_REGISTER_FORMAT_F16, 2, 0),
+      0x007f4004333c00f0);
 }
 
-TEST_F(ValhallPacking, Mux) {
+TEST_F(ValhallPacking, Mux)
+{
    CASE(bi_mux_i32_to(b, bi_register(0), bi_discard(bi_register(0)),
                       bi_discard(bi_register(4)),
-                      bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 0), false), BI_MUX_BIT),
+                      bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 0), false),
+                      BI_MUX_BIT),
         0x00b8c00300804440ull);
 }
 
-TEST_F(ValhallPacking, AtestFP16) {
+TEST_F(ValhallPacking, AtestFP16)
+{
    CASE(bi_atest_to(b, bi_register(60), bi_register(60),
                     bi_half(bi_register(1), true),
                     bi_fau(BIR_FAU_ATEST_PARAM, false)),
         0x007dbc0208ea013c);
 }
 
-TEST_F(ValhallPacking, AtestFP32) {
+TEST_F(ValhallPacking, AtestFP32)
+{
    CASE(bi_atest_to(b, bi_register(60), bi_register(60), one,
                     bi_fau(BIR_FAU_ATEST_PARAM, false)),
         0x007dbc0200ead03c);
 }
 
-TEST_F(ValhallPacking, Transcendentals) {
+TEST_F(ValhallPacking, Transcendentals)
+{
    CASE(bi_frexpm_f32_to(b, bi_register(1), bi_register(0), false, true),
         0x0099c10001000000);
 
-   CASE(bi_frexpe_f32_to(b, bi_register(0), bi_discard(bi_register(0)), false, true),
+   CASE(bi_frexpe_f32_to(b, bi_register(0), bi_discard(bi_register(0)), false,
+                         true),
         0x0099c00001020040);
 
-   CASE(bi_frsq_f32_to(b, bi_register(2), bi_register(1)),
-        0x009cc20000020001);
+   CASE(bi_frsq_f32_to(b, bi_register(2), bi_register(1)), 0x009cc20000020001);
 
-   CASE(bi_fma_rscale_f32_to(b, bi_register(0), bi_discard(bi_register(1)), bi_discard(bi_register(2)), bi_neg(zero), bi_discard(bi_register(0)), BI_SPECIAL_LEFT),
+   CASE(bi_fma_rscale_f32_to(b, bi_register(0), bi_discard(bi_register(1)),
+                             bi_discard(bi_register(2)), bi_neg(zero),
+                             bi_discard(bi_register(0)), BI_SPECIAL_LEFT),
         0x0162c00440c04241);
 }
 
-TEST_F(ValhallPacking, Csel) {
+TEST_F(ValhallPacking, Csel)
+{
    CASE(bi_csel_u32_to(b, bi_register(1), bi_discard(bi_register(2)),
                        bi_discard(bi_register(3)),
-                       bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 2), false),
-                       bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 2), true),
+                       bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), false),
+                       bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), true),
                        BI_CMPF_EQ),
         0x0150c10085844342);
 
    CASE(bi_csel_u32_to(b, bi_register(1), bi_discard(bi_register(2)),
                        bi_discard(bi_register(3)),
-                       bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 2), false),
-                       bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 2), true),
+                       bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), false),
+                       bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), true),
                        BI_CMPF_LT),
         0x0150c10485844342);
 
    CASE(bi_csel_s32_to(b, bi_register(1), bi_discard(bi_register(2)),
                        bi_discard(bi_register(3)),
-                       bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 2), false),
-                       bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 2), true),
+                       bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), false),
+                       bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), true),
                        BI_CMPF_LT),
         0x0158c10485844342);
 }
 
-TEST_F(ValhallPacking, LdAttrImm) {
-   bi_instr *I = bi_ld_attr_imm_to(b, bi_register(0),
-                                   bi_discard(bi_register(60)),
-                                   bi_discard(bi_register(61)),
-                                   BI_REGISTER_FORMAT_F16, BI_VECSIZE_V4, 1);
+TEST_F(ValhallPacking, LdAttrImm)
+{
+   bi_instr *I = bi_ld_attr_imm_to(
+      b, bi_register(0), bi_discard(bi_register(60)),
+      bi_discard(bi_register(61)), BI_REGISTER_FORMAT_F16, BI_VECSIZE_V4, 1);
    I->table = 1;
 
    CASE(I, 0x0066800433117d7c);
 }
 
-TEST_F(ValhallPacking, LdVarBufImmF16) {
+TEST_F(ValhallPacking, LdVarBufImmF16)
+{
    CASE(bi_ld_var_buf_imm_f16_to(b, bi_register(2), bi_register(61),
                                  BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
-                                 BI_SOURCE_FORMAT_F16,
-                                 BI_UPDATE_RETRIEVE, BI_VECSIZE_V4, 0),
+                                 BI_SOURCE_FORMAT_F16, BI_UPDATE_RETRIEVE,
+                                 BI_VECSIZE_V4, 0),
         0x005d82143300003d);
 
    CASE(bi_ld_var_buf_imm_f16_to(b, bi_register(0), bi_register(61),
                                  BI_REGISTER_FORMAT_F16, BI_SAMPLE_SAMPLE,
-                                 BI_SOURCE_FORMAT_F16,
-                                 BI_UPDATE_STORE, BI_VECSIZE_V4, 0),
-         0x005d80843300003d);
+                                 BI_SOURCE_FORMAT_F16, BI_UPDATE_STORE,
+                                 BI_VECSIZE_V4, 0),
+        0x005d80843300003d);
 
    CASE(bi_ld_var_buf_imm_f16_to(b, bi_register(0), bi_register(61),
                                  BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTROID,
-                                 BI_SOURCE_FORMAT_F16,
-                                 BI_UPDATE_STORE, BI_VECSIZE_V4, 8),
-         0x005d80443308003d);
+                                 BI_SOURCE_FORMAT_F16, BI_UPDATE_STORE,
+                                 BI_VECSIZE_V4, 8),
+        0x005d80443308003d);
 }
 
-TEST_F(ValhallPacking, LeaBufImm) {
+TEST_F(ValhallPacking, LeaBufImm)
+{
    CASE(bi_lea_buf_imm_to(b, bi_register(4), bi_discard(bi_register(59))),
         0x005e840400000d7b);
 }
 
-TEST_F(ValhallPacking, StoreSegment) {
+TEST_F(ValhallPacking, StoreSegment)
+{
    CASE(bi_store_i96(b, bi_register(0), bi_discard(bi_register(4)),
-                        bi_discard(bi_register(5)), BI_SEG_VARY, 0),
+                     bi_discard(bi_register(5)), BI_SEG_VARY, 0),
         0x0061400632000044);
 }
 
-TEST_F(ValhallPacking, Convert16To32) {
-   CASE(bi_u16_to_u32_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(55), false, false))),
-         0x0090c20000140077);
+TEST_F(ValhallPacking, Convert16To32)
+{
+   CASE(bi_u16_to_u32_to(b, bi_register(2),
+                         bi_discard(bi_swz_16(bi_register(55), false, false))),
+        0x0090c20000140077);
 
-   CASE(bi_u16_to_u32_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(55), true, false))),
-         0x0090c20010140077);
+   CASE(bi_u16_to_u32_to(b, bi_register(2),
+                         bi_discard(bi_swz_16(bi_register(55), true, false))),
+        0x0090c20010140077);
 
-   CASE(bi_u16_to_f32_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(55), false, false))),
-         0x0090c20000150077);
+   CASE(bi_u16_to_f32_to(b, bi_register(2),
+                         bi_discard(bi_swz_16(bi_register(55), false, false))),
+        0x0090c20000150077);
 
-   CASE(bi_u16_to_f32_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(55), true, false))),
-         0x0090c20010150077);
+   CASE(bi_u16_to_f32_to(b, bi_register(2),
+                         bi_discard(bi_swz_16(bi_register(55), true, false))),
+        0x0090c20010150077);
 
-   CASE(bi_s16_to_s32_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(55), false, false))),
-         0x0090c20000040077);
+   CASE(bi_s16_to_s32_to(b, bi_register(2),
+                         bi_discard(bi_swz_16(bi_register(55), false, false))),
+        0x0090c20000040077);
 
-   CASE(bi_s16_to_s32_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(55), true, false))),
-         0x0090c20010040077);
+   CASE(bi_s16_to_s32_to(b, bi_register(2),
+                         bi_discard(bi_swz_16(bi_register(55), true, false))),
+        0x0090c20010040077);
 }
 
-TEST_F(ValhallPacking, Swizzle8) {
-   CASE(bi_icmp_or_v4u8_to(b, bi_register(1), bi_byte(bi_register(0), 0),
-                           zero, zero, BI_CMPF_NE, BI_RESULT_TYPE_I1),
+TEST_F(ValhallPacking, Swizzle8)
+{
+   CASE(bi_icmp_or_v4u8_to(b, bi_register(1), bi_byte(bi_register(0), 0), zero,
+                           zero, BI_CMPF_NE, BI_RESULT_TYPE_I1),
         0x00f2c14300c0c000);
 }
 
-TEST_F(ValhallPacking, FauPage1) {
-   CASE(bi_mov_i32_to(b, bi_register(1), bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 32), false)),
-         0x0291c10000000080ULL);
+TEST_F(ValhallPacking, FauPage1)
+{
+   CASE(bi_mov_i32_to(b, bi_register(1),
+                      bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 32), false)),
+        0x0291c10000000080ULL);
 }
 
-TEST_F(ValhallPacking, LdTileV3F16) {
+TEST_F(ValhallPacking, LdTileV3F16)
+{
    CASE(bi_ld_tile_to(b, bi_register(4), bi_discard(bi_register(0)),
-                         bi_register(60), bi_register(3),
-                         BI_REGISTER_FORMAT_F16, BI_VECSIZE_V3),
+                      bi_register(60), bi_register(3), BI_REGISTER_FORMAT_F16,
+                      BI_VECSIZE_V3),
         0x0078840423033c40);
 }
 
-TEST_F(ValhallPacking, Rhadd8) {
+TEST_F(ValhallPacking, Rhadd8)
+{
    CASE(bi_hadd_v4s8_to(b, bi_register(0), bi_discard(bi_register(1)),
                         bi_discard(bi_register(0)), BI_ROUND_RTP),
         0x00aac000400b4041);
diff --git a/src/panfrost/bifrost/valhall/test/test-validate-fau.cpp b/src/panfrost/bifrost/valhall/test/test-validate-fau.cpp
index a704d31af30..553a9ad343e 100644
--- a/src/panfrost/bifrost/valhall/test/test-validate-fau.cpp
+++ b/src/panfrost/bifrost/valhall/test/test-validate-fau.cpp
@@ -21,41 +21,44 @@
  * SOFTWARE.
  */
 
-#include "va_compiler.h"
-#include "bi_test.h"
 #include "bi_builder.h"
+#include "bi_test.h"
+#include "va_compiler.h"
 
 #include <gtest/gtest.h>
 
-#define CASE(instr, expected) do { \
-   if (va_validate_fau(instr) != expected) { \
-      fprintf(stderr, "Incorrect validation for:\n"); \
-      bi_print_instr(instr, stderr); \
-      fprintf(stderr, "\n"); \
-      ADD_FAILURE(); \
-   } \
-} while(0)
+#define CASE(instr, expected)                                                  \
+   do {                                                                        \
+      if (va_validate_fau(instr) != expected) {                                \
+         fprintf(stderr, "Incorrect validation for:\n");                       \
+         bi_print_instr(instr, stderr);                                        \
+         fprintf(stderr, "\n");                                                \
+         ADD_FAILURE();                                                        \
+      }                                                                        \
+   } while (0)
 
-#define VALID(instr) CASE(instr, true)
+#define VALID(instr)   CASE(instr, true)
 #define INVALID(instr) CASE(instr, false)
 
 class ValidateFau : public testing::Test {
-protected:
-   ValidateFau() {
+ protected:
+   ValidateFau()
+   {
       mem_ctx = ralloc_context(NULL);
       b = bit_builder(mem_ctx);
 
-      zero = bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | 0), false);
-      imm1 = bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | 1), false);
-      imm2 = bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | 2), false);
-      unif = bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 5), false);
-      unif_hi = bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 5), true);
-      unif2 = bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 6), false);
+      zero = bi_fau((enum bir_fau)(BIR_FAU_IMMEDIATE | 0), false);
+      imm1 = bi_fau((enum bir_fau)(BIR_FAU_IMMEDIATE | 1), false);
+      imm2 = bi_fau((enum bir_fau)(BIR_FAU_IMMEDIATE | 2), false);
+      unif = bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 5), false);
+      unif_hi = bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 5), true);
+      unif2 = bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 6), false);
       core_id = bi_fau(BIR_FAU_CORE_ID, false);
       lane_id = bi_fau(BIR_FAU_LANE_ID, false);
    }
 
-   ~ValidateFau() {
+   ~ValidateFau()
+   {
       ralloc_free(mem_ctx);
    }
 
@@ -66,8 +69,8 @@ protected:
 
 TEST_F(ValidateFau, One64BitUniformSlot)
 {
-   VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_register(3),
-            unif));
+   VALID(
+      bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_register(3), unif));
    VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), unif_hi, unif));
    VALID(bi_fma_f32_to(b, bi_register(1), unif, unif, unif_hi));
    INVALID(bi_fma_f32_to(b, bi_register(1), unif, unif2, bi_register(1)));
@@ -77,8 +80,8 @@ TEST_F(ValidateFau, One64BitUniformSlot)
     * marked as valid in early versions of the validator.
     */
    INVALID(bi_fma_f32_to(b, bi_register(1), bi_register(2),
-                         bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 0), false),
-                         bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 1), true)));
+                         bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 0), false),
+                         bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 1), true)));
 }
 
 TEST_F(ValidateFau, Combined64BitUniformsConstants)
@@ -99,17 +102,16 @@ TEST_F(ValidateFau, UniformsOnlyInDefaultMode)
 TEST_F(ValidateFau, SingleSpecialImmediate)
 {
    VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_register(2),
-            lane_id));
+                       lane_id));
    VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_register(2),
-            core_id));
-   INVALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), lane_id,
-            core_id));
+                       core_id));
+   INVALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), lane_id, core_id));
 }
 
 TEST_F(ValidateFau, SmokeTests)
 {
    VALID(bi_mov_i32_to(b, bi_register(1), bi_register(2)));
    VALID(bi_mov_i32_to(b, bi_register(1), unif));
-   VALID(bi_fma_f32_to(b, bi_register(1), bi_discard(bi_register(1)),
-                        unif, bi_neg(zero)));
+   VALID(bi_fma_f32_to(b, bi_register(1), bi_discard(bi_register(1)), unif,
+                       bi_neg(zero)));
 }
diff --git a/src/panfrost/bifrost/valhall/va_compiler.h b/src/panfrost/bifrost/valhall/va_compiler.h
index 534f0a0ee91..24d71b3536b 100644
--- a/src/panfrost/bifrost/valhall/va_compiler.h
+++ b/src/panfrost/bifrost/valhall/va_compiler.h
@@ -79,7 +79,7 @@ va_select_fau_page(const bi_instr *I)
 {
    bi_foreach_src(I, s) {
       if (I->src[s].type == BI_INDEX_FAU)
-         return va_fau_page((enum bir_fau) I->src[s].value);
+         return va_fau_page((enum bir_fau)I->src[s].value);
    }
 
    return 0;
@@ -91,8 +91,7 @@ struct va_stats {
    unsigned fma, cvt, sfu, v, ls, t;
 };
 
-void
-va_count_instr_stats(bi_instr *I, struct va_stats *stats);
+void va_count_instr_stats(bi_instr *I, struct va_stats *stats);
 
 #ifdef __cplusplus
 } /* extern C */
diff --git a/src/panfrost/bifrost/valhall/va_insert_flow.c b/src/panfrost/bifrost/valhall/va_insert_flow.c
index 68eb808b45c..5cbe6a13ad6 100644
--- a/src/panfrost/bifrost/valhall/va_insert_flow.c
+++ b/src/panfrost/bifrost/valhall/va_insert_flow.c
@@ -21,9 +21,9 @@
  * SOFTWARE.
  */
 
+#include "bi_builder.h"
 #include "va_compiler.h"
 #include "valhall_enums.h"
-#include "bi_builder.h"
 
 /*
  * Insert flow control into a scheduled and register allocated shader.  This
@@ -176,7 +176,8 @@ bi_depend_on_writers(struct bi_scoreboard_state *st, uint64_t regmask)
 /* Sets the dependencies for a given clause, updating the model */
 
 static void
-bi_set_dependencies(bi_block *block, bi_instr *I, struct bi_scoreboard_state *st)
+bi_set_dependencies(bi_block *block, bi_instr *I,
+                    struct bi_scoreboard_state *st)
 {
    /* Depend on writers to handle read-after-write and write-after-write
     * dependencies. Write-after-read dependencies are handled in the hardware
@@ -482,7 +483,8 @@ va_insert_flow_control_nops(bi_context *ctx)
        */
       if (va_should_end(block) || block->needs_nop) {
          /* Don't bother adding a NOP into an unreachable block */
-         if (block == bi_start_block(&ctx->blocks) || bi_num_predecessors(block))
+         if (block == bi_start_block(&ctx->blocks) ||
+             bi_num_predecessors(block))
             bi_flow(ctx, bi_after_block(block), VA_FLOW_END);
       } else if (bi_reconverge_branches(block)) {
          /* TODO: Do we have ever need to reconverge from an empty block? */
diff --git a/src/panfrost/bifrost/valhall/va_lower_constants.c b/src/panfrost/bifrost/valhall/va_lower_constants.c
index e5a8fd7e224..be5a40586c4 100644
--- a/src/panfrost/bifrost/valhall/va_lower_constants.c
+++ b/src/panfrost/bifrost/valhall/va_lower_constants.c
@@ -21,9 +21,9 @@
  * SOFTWARE.
  */
 
+#include "bi_builder.h"
 #include "va_compiler.h"
 #include "valhall.h"
-#include "bi_builder.h"
 
 /* Only some special immediates are available, as specified in the Table of
  * Immediates in the specification. Other immediates must be lowered, either to
@@ -51,7 +51,7 @@ va_lut_index_32(uint32_t imm)
 static bi_index
 va_lut_index_16(uint16_t imm)
 {
-   uint16_t *arr16 = (uint16_t *) valhall_immediates;
+   uint16_t *arr16 = (uint16_t *)valhall_immediates;
 
    for (unsigned i = 0; i < (2 * ARRAY_SIZE(valhall_immediates)); ++i) {
       if (arr16[i] == imm)
@@ -64,7 +64,7 @@ va_lut_index_16(uint16_t imm)
 UNUSED static bi_index
 va_lut_index_8(uint8_t imm)
 {
-   uint8_t *arr8 = (uint8_t *) valhall_immediates;
+   uint8_t *arr8 = (uint8_t *)valhall_immediates;
 
    for (unsigned i = 0; i < (4 * ARRAY_SIZE(valhall_immediates)); ++i) {
       if (arr8[i] == imm)
@@ -109,36 +109,43 @@ is_extension_of_16(uint32_t x, bool is_signed)
 }
 
 static bi_index
-va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info, bool is_signed, bool staging)
+va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info,
+                    bool is_signed, bool staging)
 {
    /* Try the constant as-is */
    if (!staging) {
       bi_index lut = va_lut_index_32(value);
-      if (!bi_is_null(lut)) return lut;
+      if (!bi_is_null(lut))
+         return lut;
 
       /* ...or negated as a FP32 constant */
       if (info.absneg && info.size == VA_SIZE_32) {
          lut = bi_neg(va_lut_index_32(fui(-uif(value))));
-         if (!bi_is_null(lut)) return lut;
+         if (!bi_is_null(lut))
+            return lut;
       }
 
       /* ...or negated as a FP16 constant */
       if (info.absneg && info.size == VA_SIZE_16) {
          lut = bi_neg(va_lut_index_32(value ^ 0x80008000));
-         if (!bi_is_null(lut)) return lut;
+         if (!bi_is_null(lut))
+            return lut;
       }
    }
 
    /* Try using a single half of a FP16 constant */
    bool replicated_halves = (value & 0xFFFF) == (value >> 16);
-   if (!staging && info.swizzle && info.size == VA_SIZE_16 && replicated_halves) {
+   if (!staging && info.swizzle && info.size == VA_SIZE_16 &&
+       replicated_halves) {
       bi_index lut = va_lut_index_16(value & 0xFFFF);
-      if (!bi_is_null(lut)) return lut;
+      if (!bi_is_null(lut))
+         return lut;
 
       /* ...possibly negated */
       if (info.absneg) {
          lut = bi_neg(va_lut_index_16((value & 0xFFFF) ^ 0x8000));
-         if (!bi_is_null(lut)) return lut;
+         if (!bi_is_null(lut))
+            return lut;
       }
    }
 
@@ -147,25 +154,28 @@ va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info, bool
        is_extension_of_8(value, is_signed)) {
 
       bi_index lut = va_lut_index_8(value & 0xFF);
-      if (!bi_is_null(lut)) return lut;
+      if (!bi_is_null(lut))
+         return lut;
    }
 
    /* Try extending a halfword */
-   if (!staging && info.widen &&
-       is_extension_of_16(value, is_signed)) {
+   if (!staging && info.widen && is_extension_of_16(value, is_signed)) {
 
       bi_index lut = va_lut_index_16(value & 0xFFFF);
-      if (!bi_is_null(lut)) return lut;
+      if (!bi_is_null(lut))
+         return lut;
    }
 
    /* Try demoting the constant to FP16 */
    if (!staging && info.swizzle && info.size == VA_SIZE_32) {
       bi_index lut = va_demote_constant_fp16(value);
-      if (!bi_is_null(lut)) return lut;
+      if (!bi_is_null(lut))
+         return lut;
 
       if (info.absneg) {
          bi_index lut = bi_neg(va_demote_constant_fp16(fui(-uif(value))));
-         if (!bi_is_null(lut)) return lut;
+         if (!bi_is_null(lut))
+            return lut;
       }
    }
 
@@ -218,7 +228,8 @@ va_lower_constants(bi_context *ctx, bi_instr *I)
             value = bi_apply_swizzle(value, swz);
          }
 
-         bi_index cons = va_resolve_constant(&b, value, info, is_signed, staging);
+         bi_index cons =
+            va_resolve_constant(&b, value, info, is_signed, staging);
          cons.neg ^= I->src[s].neg;
          I->src[s] = cons;
 
diff --git a/src/panfrost/bifrost/valhall/va_lower_isel.c b/src/panfrost/bifrost/valhall/va_lower_isel.c
index ec244d66524..284289573f6 100644
--- a/src/panfrost/bifrost/valhall/va_lower_isel.c
+++ b/src/panfrost/bifrost/valhall/va_lower_isel.c
@@ -21,9 +21,9 @@
  * SOFTWARE.
  */
 
+#include "bi_builder.h"
 #include "va_compiler.h"
 #include "valhall.h"
-#include "bi_builder.h"
 
 static bi_instr *
 lower(bi_builder *b, bi_instr *I)
@@ -38,45 +38,56 @@ lower(bi_builder *b, bi_instr *I)
       return bi_iadd_v4u8_to(b, I->dest[0], I->src[0], bi_zero(), false);
 
    case BI_OPCODE_ICMP_I32:
-      return bi_icmp_or_u32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
+      return bi_icmp_or_u32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
+                               I->cmpf, I->result_type);
 
    case BI_OPCODE_ICMP_V2I16:
-      return bi_icmp_or_v2u16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
+      return bi_icmp_or_v2u16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
+                                 I->cmpf, I->result_type);
 
    case BI_OPCODE_ICMP_V4I8:
-      return bi_icmp_or_v4u8_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
+      return bi_icmp_or_v4u8_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
+                                I->cmpf, I->result_type);
 
    case BI_OPCODE_ICMP_U32:
-      return bi_icmp_or_u32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
+      return bi_icmp_or_u32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
+                               I->cmpf, I->result_type);
 
    case BI_OPCODE_ICMP_V2U16:
-      return bi_icmp_or_v2u16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
+      return bi_icmp_or_v2u16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
+                                 I->cmpf, I->result_type);
 
    case BI_OPCODE_ICMP_V4U8:
-      return bi_icmp_or_v4u8_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
+      return bi_icmp_or_v4u8_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
+                                I->cmpf, I->result_type);
 
    case BI_OPCODE_ICMP_S32:
-      return bi_icmp_or_s32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
+      return bi_icmp_or_s32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
+                               I->cmpf, I->result_type);
 
    case BI_OPCODE_ICMP_V2S16:
-      return bi_icmp_or_v2s16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
+      return bi_icmp_or_v2s16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
+                                 I->cmpf, I->result_type);
 
    case BI_OPCODE_ICMP_V4S8:
-      return bi_icmp_or_v4s8_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
+      return bi_icmp_or_v4s8_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
+                                I->cmpf, I->result_type);
 
    case BI_OPCODE_FCMP_F32:
-      return bi_fcmp_or_f32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
+      return bi_fcmp_or_f32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
+                               I->cmpf, I->result_type);
 
    case BI_OPCODE_FCMP_V2F16:
-      return bi_fcmp_or_v2f16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
+      return bi_fcmp_or_v2f16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
+                                 I->cmpf, I->result_type);
 
    /* Integer CSEL must have a signedness */
    case BI_OPCODE_CSEL_I32:
    case BI_OPCODE_CSEL_V2I16:
       assert(I->cmpf == BI_CMPF_EQ || I->cmpf == BI_CMPF_NE);
 
-      I->op = (I->op == BI_OPCODE_CSEL_I32) ? BI_OPCODE_CSEL_U32 :
-              BI_OPCODE_CSEL_V2U16;
+      I->op = (I->op == BI_OPCODE_CSEL_I32) ? BI_OPCODE_CSEL_U32
+                                            : BI_OPCODE_CSEL_V2U16;
       return NULL;
 
    /* Jump -> conditional branch with condition tied to true. */
@@ -117,7 +128,7 @@ lower(bi_builder *b, bi_instr *I)
 
    case BI_OPCODE_FADD_RSCALE_F32:
       return bi_fma_rscale_f32_to(b, I->dest[0], I->src[0], bi_imm_f32(1.0),
-                                     I->src[1], I->src[2], I->special);
+                                  I->src[1], I->src[2], I->special);
 
    default:
       return NULL;
diff --git a/src/panfrost/bifrost/valhall/va_lower_split_64bit.c b/src/panfrost/bifrost/valhall/va_lower_split_64bit.c
index 3c67f3e8a46..947138511e4 100644
--- a/src/panfrost/bifrost/valhall/va_lower_split_64bit.c
+++ b/src/panfrost/bifrost/valhall/va_lower_split_64bit.c
@@ -21,8 +21,8 @@
  * SOFTWARE.
  */
 
-#include "va_compiler.h"
 #include "bi_builder.h"
+#include "va_compiler.h"
 
 /*
  * Bifrost uses split 64-bit addresses, specified as two consecutive sources.
@@ -38,8 +38,7 @@ lower_split_src(bi_context *ctx, bi_instr *I, unsigned s)
    bi_index offset_fau = I->src[s];
    offset_fau.offset++;
 
-   if (I->src[s].type == BI_INDEX_FAU &&
-       I->src[s].offset == 0 &&
+   if (I->src[s].type == BI_INDEX_FAU && I->src[s].offset == 0 &&
        bi_is_value_equiv(offset_fau, I->src[s + 1])) {
       return;
    }
diff --git a/src/panfrost/bifrost/valhall/va_mark_last.c b/src/panfrost/bifrost/valhall/va_mark_last.c
index d17c0ec65c8..5c3f1ec3b78 100644
--- a/src/panfrost/bifrost/valhall/va_mark_last.c
+++ b/src/panfrost/bifrost/valhall/va_mark_last.c
@@ -97,7 +97,7 @@ scoreboard_update(struct bi_scoreboard_state *st, const bi_instr *I)
    /* Unmark registers after they are waited on */
    for (unsigned i = 0; i < VA_NUM_GENERAL_SLOTS; ++i) {
       if (waits_on_slot(I->flow, i))
-            st->read[i] = 0;
+         st->read[i] = 0;
    }
 }
 
@@ -111,8 +111,8 @@ va_analyze_scoreboard_reads(bi_context *ctx)
       bi_worklist_push_tail(&worklist, block);
 
       /* Reset analysis from previous pass */
-      block->scoreboard_in = (struct bi_scoreboard_state){ 0 };
-      block->scoreboard_out = (struct bi_scoreboard_state){ 0 };
+      block->scoreboard_in = (struct bi_scoreboard_state){0};
+      block->scoreboard_out = (struct bi_scoreboard_state){0};
    }
 
    /* Perform forward data flow analysis to calculate dependencies */
diff --git a/src/panfrost/bifrost/valhall/va_merge_flow.c b/src/panfrost/bifrost/valhall/va_merge_flow.c
index 64f3c38c9ae..05de5109260 100644
--- a/src/panfrost/bifrost/valhall/va_merge_flow.c
+++ b/src/panfrost/bifrost/valhall/va_merge_flow.c
@@ -21,9 +21,9 @@
  * SOFTWARE.
  */
 
+#include "bi_builder.h"
 #include "va_compiler.h"
 #include "valhall_enums.h"
-#include "bi_builder.h"
 
 /*
  * Merge NOPs with flow control with nearby instructions to eliminate the NOPs,
@@ -80,8 +80,10 @@ merge_end_reconverge(bi_block *block)
    bi_instr *last = list_last_entry(&block->instructions, bi_instr, link);
    bi_instr *penult = bi_prev_op(last);
 
-   if (last->op != BI_OPCODE_NOP) return;
-   if (last->flow != VA_FLOW_RECONVERGE && last->flow != VA_FLOW_END) return;
+   if (last->op != BI_OPCODE_NOP)
+      return;
+   if (last->flow != VA_FLOW_RECONVERGE && last->flow != VA_FLOW_END)
+      return;
 
    /* End implies all other flow control except for waiting on barriers (slot
     * #7, with VA_FLOW_WAIT), so remove blocking flow control.
@@ -99,7 +101,8 @@ merge_end_reconverge(bi_block *block)
    }
 
    /* If there is blocking flow control, we can't merge */
-   if (penult->flow != VA_FLOW_NONE) return;
+   if (penult->flow != VA_FLOW_NONE)
+      return;
 
    /* Else, merge */
    penult->flow = last->flow;
@@ -133,8 +136,8 @@ merge_waits(bi_block *block)
    bi_instr *last_free = NULL;
 
    bi_foreach_instr_in_block_safe(block, I) {
-      if (last_free != NULL &&
-          I->op == BI_OPCODE_NOP && va_flow_is_wait_or_none(I->flow)) {
+      if (last_free != NULL && I->op == BI_OPCODE_NOP &&
+          va_flow_is_wait_or_none(I->flow)) {
 
          /* Merge waits with compatible instructions */
          last_free->flow = union_waits(last_free->flow, I->flow);
@@ -212,8 +215,10 @@ va_merge_flow(bi_context *ctx)
 {
    bi_foreach_block(ctx, block) {
       /* If there are less than 2 instructions, there's nothing to merge */
-      if (list_is_empty(&block->instructions)) continue;
-      if (list_is_singular(&block->instructions)) continue;
+      if (list_is_empty(&block->instructions))
+         continue;
+      if (list_is_singular(&block->instructions))
+         continue;
 
       merge_end_reconverge(block);
       merge_waits(block);
diff --git a/src/panfrost/bifrost/valhall/va_optimize.c b/src/panfrost/bifrost/valhall/va_optimize.c
index a50c4244952..46202e4d52d 100644
--- a/src/panfrost/bifrost/valhall/va_optimize.c
+++ b/src/panfrost/bifrost/valhall/va_optimize.c
@@ -29,15 +29,21 @@ static enum bi_opcode
 va_op_add_imm(enum bi_opcode op)
 {
    switch (op) {
-   case BI_OPCODE_FADD_F32:   return BI_OPCODE_FADD_IMM_F32;
-   case BI_OPCODE_FADD_V2F16: return BI_OPCODE_FADD_IMM_V2F16;
+   case BI_OPCODE_FADD_F32:
+      return BI_OPCODE_FADD_IMM_F32;
+   case BI_OPCODE_FADD_V2F16:
+      return BI_OPCODE_FADD_IMM_V2F16;
    case BI_OPCODE_IADD_S32:
-   case BI_OPCODE_IADD_U32:   return BI_OPCODE_IADD_IMM_I32;
+   case BI_OPCODE_IADD_U32:
+      return BI_OPCODE_IADD_IMM_I32;
    case BI_OPCODE_IADD_V2S16:
-   case BI_OPCODE_IADD_V2U16: return BI_OPCODE_IADD_IMM_V2I16;
+   case BI_OPCODE_IADD_V2U16:
+      return BI_OPCODE_IADD_IMM_V2I16;
    case BI_OPCODE_IADD_V4S8:
-   case BI_OPCODE_IADD_V4U8:  return BI_OPCODE_IADD_IMM_V4I8;
-   default: return 0;
+   case BI_OPCODE_IADD_V4U8:
+      return BI_OPCODE_IADD_IMM_V4I8;
+   default:
+      return 0;
    }
 }
 
@@ -46,8 +52,8 @@ va_is_add_imm(bi_instr *I, unsigned s)
 {
    assert(s < I->nr_srcs);
 
-   return I->src[s].swizzle == BI_SWIZZLE_H01 &&
-          !I->src[s].abs && !I->src[s].neg && !I->clamp && !I->round;
+   return I->src[s].swizzle == BI_SWIZZLE_H01 && !I->src[s].abs &&
+          !I->src[s].neg && !I->clamp && !I->round;
 }
 
 static unsigned
@@ -83,11 +89,14 @@ va_fuse_add_imm(bi_instr *I)
    }
 
    enum bi_opcode op = va_op_add_imm(I->op);
-   if (!op) return;
+   if (!op)
+      return;
 
    unsigned s = va_choose_imm(I);
-   if (s > 1) return;
-   if (!va_is_add_imm(I, 1 - s)) return;
+   if (s > 1)
+      return;
+   if (!va_is_add_imm(I, 1 - s))
+      return;
 
    I->op = op;
    I->index = bi_apply_swizzle(I->src[s].value, I->src[s].swizzle);
diff --git a/src/panfrost/bifrost/valhall/va_pack.c b/src/panfrost/bifrost/valhall/va_pack.c
index 33e7dcdf079..f6381c6f5f0 100644
--- a/src/panfrost/bifrost/valhall/va_pack.c
+++ b/src/panfrost/bifrost/valhall/va_pack.c
@@ -21,10 +21,10 @@
  * SOFTWARE.
  */
 
+#include "bi_builder.h"
 #include "va_compiler.h"
 #include "valhall.h"
 #include "valhall_enums.h"
-#include "bi_builder.h"
 
 /* This file contains the final passes of the compiler. Running after
  * scheduling and RA, the IR is now finalized, so we need to emit it to actual
@@ -36,7 +36,7 @@
  * Prints the (first) failing instruction to aid debugging.
  */
 NORETURN static void PRINTFLIKE(2, 3)
-invalid_instruction(const bi_instr *I, const char *cause, ...)
+   invalid_instruction(const bi_instr *I, const char *cause, ...)
 {
    fputs("\nInvalid ", stderr);
 
@@ -56,8 +56,9 @@ invalid_instruction(const bi_instr *I, const char *cause, ...)
  * Like assert, but prints the instruction if the assertion fails to aid
  * debugging invalid inputs to the packing module.
  */
-#define pack_assert(I, cond) \
-   if (!(cond)) invalid_instruction(I, "invariant " #cond);
+#define pack_assert(I, cond)                                                   \
+   if (!(cond))                                                                \
+      invalid_instruction(I, "invariant " #cond);
 
 /*
  * Validate that two adjacent 32-bit sources form an aligned 64-bit register
@@ -95,14 +96,20 @@ static unsigned
 va_pack_fau_special(const bi_instr *I, enum bir_fau fau)
 {
    switch (fau) {
-   case BIR_FAU_ATEST_PARAM:     return VA_FAU_SPECIAL_PAGE_0_ATEST_DATUM;
-   case BIR_FAU_TLS_PTR:         return VA_FAU_SPECIAL_PAGE_1_THREAD_LOCAL_POINTER;
-   case BIR_FAU_WLS_PTR:         return VA_FAU_SPECIAL_PAGE_1_WORKGROUP_LOCAL_POINTER;
-   case BIR_FAU_LANE_ID:         return VA_FAU_SPECIAL_PAGE_3_LANE_ID;
-   case BIR_FAU_PROGRAM_COUNTER: return VA_FAU_SPECIAL_PAGE_3_PROGRAM_COUNTER;
-   case BIR_FAU_SAMPLE_POS_ARRAY:return VA_FAU_SPECIAL_PAGE_0_SAMPLE;
+   case BIR_FAU_ATEST_PARAM:
+      return VA_FAU_SPECIAL_PAGE_0_ATEST_DATUM;
+   case BIR_FAU_TLS_PTR:
+      return VA_FAU_SPECIAL_PAGE_1_THREAD_LOCAL_POINTER;
+   case BIR_FAU_WLS_PTR:
+      return VA_FAU_SPECIAL_PAGE_1_WORKGROUP_LOCAL_POINTER;
+   case BIR_FAU_LANE_ID:
+      return VA_FAU_SPECIAL_PAGE_3_LANE_ID;
+   case BIR_FAU_PROGRAM_COUNTER:
+      return VA_FAU_SPECIAL_PAGE_3_PROGRAM_COUNTER;
+   case BIR_FAU_SAMPLE_POS_ARRAY:
+      return VA_FAU_SPECIAL_PAGE_0_SAMPLE;
 
-   case BIR_FAU_BLEND_0...(BIR_FAU_BLEND_0 + 7):
+   case BIR_FAU_BLEND_0 ...(BIR_FAU_BLEND_0 + 7):
       return VA_FAU_SPECIAL_PAGE_0_BLEND_DESCRIPTOR_0 + (fau - BIR_FAU_BLEND_0);
 
    default:
@@ -136,7 +143,8 @@ va_pack_src(const bi_instr *I, unsigned s)
 
    if (idx.type == BI_INDEX_REGISTER) {
       unsigned value = va_pack_reg(I, idx);
-      if (idx.discard) value |= (1 << 6);
+      if (idx.discard)
+         value |= (1 << 6);
       return value;
    } else if (idx.type == BI_INDEX_FAU) {
       pack_assert(I, idx.offset <= 1);
@@ -150,10 +158,14 @@ static unsigned
 va_pack_wrmask(const bi_instr *I)
 {
    switch (I->dest[0].swizzle) {
-   case BI_SWIZZLE_H00: return 0x1;
-   case BI_SWIZZLE_H11: return 0x2;
-   case BI_SWIZZLE_H01: return 0x3;
-   default: invalid_instruction(I, "write mask");
+   case BI_SWIZZLE_H00:
+      return 0x1;
+   case BI_SWIZZLE_H11:
+      return 0x2;
+   case BI_SWIZZLE_H01:
+      return 0x3;
+   default:
+      invalid_instruction(I, "write mask");
    }
 }
 
@@ -161,17 +173,27 @@ static enum va_atomic_operation
 va_pack_atom_opc(const bi_instr *I)
 {
    switch (I->atom_opc) {
-   case BI_ATOM_OPC_AADD:  return VA_ATOMIC_OPERATION_AADD;
-   case BI_ATOM_OPC_ASMIN: return VA_ATOMIC_OPERATION_ASMIN;
-   case BI_ATOM_OPC_ASMAX: return VA_ATOMIC_OPERATION_ASMAX;
-   case BI_ATOM_OPC_AUMIN: return VA_ATOMIC_OPERATION_AUMIN;
-   case BI_ATOM_OPC_AUMAX: return VA_ATOMIC_OPERATION_AUMAX;
-   case BI_ATOM_OPC_AAND:  return VA_ATOMIC_OPERATION_AAND;
-   case BI_ATOM_OPC_AOR:   return VA_ATOMIC_OPERATION_AOR;
-   case BI_ATOM_OPC_AXOR:  return VA_ATOMIC_OPERATION_AXOR;
+   case BI_ATOM_OPC_AADD:
+      return VA_ATOMIC_OPERATION_AADD;
+   case BI_ATOM_OPC_ASMIN:
+      return VA_ATOMIC_OPERATION_ASMIN;
+   case BI_ATOM_OPC_ASMAX:
+      return VA_ATOMIC_OPERATION_ASMAX;
+   case BI_ATOM_OPC_AUMIN:
+      return VA_ATOMIC_OPERATION_AUMIN;
+   case BI_ATOM_OPC_AUMAX:
+      return VA_ATOMIC_OPERATION_AUMAX;
+   case BI_ATOM_OPC_AAND:
+      return VA_ATOMIC_OPERATION_AAND;
+   case BI_ATOM_OPC_AOR:
+      return VA_ATOMIC_OPERATION_AOR;
+   case BI_ATOM_OPC_AXOR:
+      return VA_ATOMIC_OPERATION_AXOR;
    case BI_ATOM_OPC_ACMPXCHG:
-   case BI_ATOM_OPC_AXCHG: return VA_ATOMIC_OPERATION_AXCHG;
-   default: invalid_instruction(I, "atomic opcode");
+   case BI_ATOM_OPC_AXCHG:
+      return VA_ATOMIC_OPERATION_AXCHG;
+   default:
+      invalid_instruction(I, "atomic opcode");
    }
 }
 
@@ -179,12 +201,18 @@ static enum va_atomic_operation_with_1
 va_pack_atom_opc_1(const bi_instr *I)
 {
    switch (I->atom_opc) {
-   case BI_ATOM_OPC_AINC:     return VA_ATOMIC_OPERATION_WITH_1_AINC;
-   case BI_ATOM_OPC_ADEC:     return VA_ATOMIC_OPERATION_WITH_1_ADEC;
-   case BI_ATOM_OPC_AUMAX1:   return VA_ATOMIC_OPERATION_WITH_1_AUMAX1;
-   case BI_ATOM_OPC_ASMAX1:   return VA_ATOMIC_OPERATION_WITH_1_ASMAX1;
-   case BI_ATOM_OPC_AOR1:     return VA_ATOMIC_OPERATION_WITH_1_AOR1;
-   default: invalid_instruction(I, "atomic opcode with implicit 1");
+   case BI_ATOM_OPC_AINC:
+      return VA_ATOMIC_OPERATION_WITH_1_AINC;
+   case BI_ATOM_OPC_ADEC:
+      return VA_ATOMIC_OPERATION_WITH_1_ADEC;
+   case BI_ATOM_OPC_AUMAX1:
+      return VA_ATOMIC_OPERATION_WITH_1_AUMAX1;
+   case BI_ATOM_OPC_ASMAX1:
+      return VA_ATOMIC_OPERATION_WITH_1_ASMAX1;
+   case BI_ATOM_OPC_AOR1:
+      return VA_ATOMIC_OPERATION_WITH_1_AOR1;
+   default:
+      invalid_instruction(I, "atomic opcode with implicit 1");
    }
 }
 
@@ -199,10 +227,14 @@ static enum va_widen
 va_pack_widen_f32(const bi_instr *I, enum bi_swizzle swz)
 {
    switch (swz) {
-   case BI_SWIZZLE_H01: return VA_WIDEN_NONE;
-   case BI_SWIZZLE_H00: return VA_WIDEN_H0;
-   case BI_SWIZZLE_H11: return VA_WIDEN_H1;
-   default: invalid_instruction(I, "widen");
+   case BI_SWIZZLE_H01:
+      return VA_WIDEN_NONE;
+   case BI_SWIZZLE_H00:
+      return VA_WIDEN_H0;
+   case BI_SWIZZLE_H11:
+      return VA_WIDEN_H1;
+   default:
+      invalid_instruction(I, "widen");
    }
 }
 
@@ -210,11 +242,16 @@ static enum va_swizzles_16_bit
 va_pack_swizzle_f16(const bi_instr *I, enum bi_swizzle swz)
 {
    switch (swz) {
-   case BI_SWIZZLE_H00: return VA_SWIZZLES_16_BIT_H00;
-   case BI_SWIZZLE_H10: return VA_SWIZZLES_16_BIT_H10;
-   case BI_SWIZZLE_H01: return VA_SWIZZLES_16_BIT_H01;
-   case BI_SWIZZLE_H11: return VA_SWIZZLES_16_BIT_H11;
-   default: invalid_instruction(I, "16-bit swizzle");
+   case BI_SWIZZLE_H00:
+      return VA_SWIZZLES_16_BIT_H00;
+   case BI_SWIZZLE_H10:
+      return VA_SWIZZLES_16_BIT_H10;
+   case BI_SWIZZLE_H01:
+      return VA_SWIZZLES_16_BIT_H01;
+   case BI_SWIZZLE_H11:
+      return VA_SWIZZLES_16_BIT_H11;
+   default:
+      invalid_instruction(I, "16-bit swizzle");
    }
 }
 
@@ -223,37 +260,62 @@ va_pack_widen(const bi_instr *I, enum bi_swizzle swz, enum va_size size)
 {
    if (size == VA_SIZE_8) {
       switch (swz) {
-      case BI_SWIZZLE_H01:    return VA_SWIZZLES_8_BIT_B0123;
-      case BI_SWIZZLE_H00:    return VA_SWIZZLES_8_BIT_B0101;
-      case BI_SWIZZLE_H11:    return VA_SWIZZLES_8_BIT_B2323;
-      case BI_SWIZZLE_B0000:  return VA_SWIZZLES_8_BIT_B0000;
-      case BI_SWIZZLE_B1111:  return VA_SWIZZLES_8_BIT_B1111;
-      case BI_SWIZZLE_B2222:  return VA_SWIZZLES_8_BIT_B2222;
-      case BI_SWIZZLE_B3333:  return VA_SWIZZLES_8_BIT_B3333;
-      default: invalid_instruction(I, "8-bit widen");
+      case BI_SWIZZLE_H01:
+         return VA_SWIZZLES_8_BIT_B0123;
+      case BI_SWIZZLE_H00:
+         return VA_SWIZZLES_8_BIT_B0101;
+      case BI_SWIZZLE_H11:
+         return VA_SWIZZLES_8_BIT_B2323;
+      case BI_SWIZZLE_B0000:
+         return VA_SWIZZLES_8_BIT_B0000;
+      case BI_SWIZZLE_B1111:
+         return VA_SWIZZLES_8_BIT_B1111;
+      case BI_SWIZZLE_B2222:
+         return VA_SWIZZLES_8_BIT_B2222;
+      case BI_SWIZZLE_B3333:
+         return VA_SWIZZLES_8_BIT_B3333;
+      default:
+         invalid_instruction(I, "8-bit widen");
       }
    } else if (size == VA_SIZE_16) {
       switch (swz) {
-      case BI_SWIZZLE_H00:    return VA_SWIZZLES_16_BIT_H00;
-      case BI_SWIZZLE_H10:    return VA_SWIZZLES_16_BIT_H10;
-      case BI_SWIZZLE_H01:    return VA_SWIZZLES_16_BIT_H01;
-      case BI_SWIZZLE_H11:    return VA_SWIZZLES_16_BIT_H11;
-      case BI_SWIZZLE_B0000:  return VA_SWIZZLES_16_BIT_B00;
-      case BI_SWIZZLE_B1111:  return VA_SWIZZLES_16_BIT_B11;
-      case BI_SWIZZLE_B2222:  return VA_SWIZZLES_16_BIT_B22;
-      case BI_SWIZZLE_B3333:  return VA_SWIZZLES_16_BIT_B33;
-      default: invalid_instruction(I, "16-bit widen");
+      case BI_SWIZZLE_H00:
+         return VA_SWIZZLES_16_BIT_H00;
+      case BI_SWIZZLE_H10:
+         return VA_SWIZZLES_16_BIT_H10;
+      case BI_SWIZZLE_H01:
+         return VA_SWIZZLES_16_BIT_H01;
+      case BI_SWIZZLE_H11:
+         return VA_SWIZZLES_16_BIT_H11;
+      case BI_SWIZZLE_B0000:
+         return VA_SWIZZLES_16_BIT_B00;
+      case BI_SWIZZLE_B1111:
+         return VA_SWIZZLES_16_BIT_B11;
+      case BI_SWIZZLE_B2222:
+         return VA_SWIZZLES_16_BIT_B22;
+      case BI_SWIZZLE_B3333:
+         return VA_SWIZZLES_16_BIT_B33;
+      default:
+         invalid_instruction(I, "16-bit widen");
       }
    } else if (size == VA_SIZE_32) {
       switch (swz) {
-      case BI_SWIZZLE_H01:    return VA_SWIZZLES_32_BIT_NONE;
-      case BI_SWIZZLE_H00:    return VA_SWIZZLES_32_BIT_H0;
-      case BI_SWIZZLE_H11:    return VA_SWIZZLES_32_BIT_H1;
-      case BI_SWIZZLE_B0000:  return VA_SWIZZLES_32_BIT_B0;
-      case BI_SWIZZLE_B1111:  return VA_SWIZZLES_32_BIT_B1;
-      case BI_SWIZZLE_B2222:  return VA_SWIZZLES_32_BIT_B2;
-      case BI_SWIZZLE_B3333:  return VA_SWIZZLES_32_BIT_B3;
-      default: invalid_instruction(I, "32-bit widen");
+      case BI_SWIZZLE_H01:
+         return VA_SWIZZLES_32_BIT_NONE;
+      case BI_SWIZZLE_H00:
+         return VA_SWIZZLES_32_BIT_H0;
+      case BI_SWIZZLE_H11:
+         return VA_SWIZZLES_32_BIT_H1;
+      case BI_SWIZZLE_B0000:
+         return VA_SWIZZLES_32_BIT_B0;
+      case BI_SWIZZLE_B1111:
+         return VA_SWIZZLES_32_BIT_B1;
+      case BI_SWIZZLE_B2222:
+         return VA_SWIZZLES_32_BIT_B2;
+      case BI_SWIZZLE_B3333:
+         return VA_SWIZZLES_32_BIT_B3;
+      default:
+         invalid_instruction(I, "32-bit widen");
       }
    } else {
       invalid_instruction(I, "type size for widen");
@@ -264,14 +326,22 @@ static enum va_half_swizzles_8_bit
 va_pack_halfswizzle(const bi_instr *I, enum bi_swizzle swz)
 {
    switch (swz) {
-   case BI_SWIZZLE_B0000: return VA_HALF_SWIZZLES_8_BIT_B00;
-   case BI_SWIZZLE_B1111: return VA_HALF_SWIZZLES_8_BIT_B11;
-   case BI_SWIZZLE_B2222: return VA_HALF_SWIZZLES_8_BIT_B22;
-   case BI_SWIZZLE_B3333: return VA_HALF_SWIZZLES_8_BIT_B33;
-   case BI_SWIZZLE_B0011: return VA_HALF_SWIZZLES_8_BIT_B01;
-   case BI_SWIZZLE_B2233: return VA_HALF_SWIZZLES_8_BIT_B23;
-   case BI_SWIZZLE_B0022: return VA_HALF_SWIZZLES_8_BIT_B02;
-   default: invalid_instruction(I, "v2u8 swizzle");
+   case BI_SWIZZLE_B0000:
+      return VA_HALF_SWIZZLES_8_BIT_B00;
+   case BI_SWIZZLE_B1111:
+      return VA_HALF_SWIZZLES_8_BIT_B11;
+   case BI_SWIZZLE_B2222:
+      return VA_HALF_SWIZZLES_8_BIT_B22;
+   case BI_SWIZZLE_B3333:
+      return VA_HALF_SWIZZLES_8_BIT_B33;
+   case BI_SWIZZLE_B0011:
+      return VA_HALF_SWIZZLES_8_BIT_B01;
+   case BI_SWIZZLE_B2233:
+      return VA_HALF_SWIZZLES_8_BIT_B23;
+   case BI_SWIZZLE_B0022:
+      return VA_HALF_SWIZZLES_8_BIT_B02;
+   default:
+      invalid_instruction(I, "v2u8 swizzle");
    }
 }
 
@@ -279,12 +349,18 @@ static enum va_lanes_8_bit
 va_pack_shift_lanes(const bi_instr *I, enum bi_swizzle swz)
 {
    switch (swz) {
-   case BI_SWIZZLE_H01:    return VA_LANES_8_BIT_B02;
-   case BI_SWIZZLE_B0000:  return VA_LANES_8_BIT_B00;
-   case BI_SWIZZLE_B1111:  return VA_LANES_8_BIT_B11;
-   case BI_SWIZZLE_B2222:  return VA_LANES_8_BIT_B22;
-   case BI_SWIZZLE_B3333:  return VA_LANES_8_BIT_B33;
-   default: invalid_instruction(I, "lane shift");
+   case BI_SWIZZLE_H01:
+      return VA_LANES_8_BIT_B02;
+   case BI_SWIZZLE_B0000:
+      return VA_LANES_8_BIT_B00;
+   case BI_SWIZZLE_B1111:
+      return VA_LANES_8_BIT_B11;
+   case BI_SWIZZLE_B2222:
+      return VA_LANES_8_BIT_B22;
+   case BI_SWIZZLE_B3333:
+      return VA_LANES_8_BIT_B33;
+   default:
+      invalid_instruction(I, "lane shift");
    }
 }
 
@@ -292,10 +368,14 @@ static enum va_combine
 va_pack_combine(const bi_instr *I, enum bi_swizzle swz)
 {
    switch (swz) {
-   case BI_SWIZZLE_H01: return VA_COMBINE_NONE;
-   case BI_SWIZZLE_H00: return VA_COMBINE_H0;
-   case BI_SWIZZLE_H11: return VA_COMBINE_H1;
-   default: invalid_instruction(I, "branch lane");
+   case BI_SWIZZLE_H01:
+      return VA_COMBINE_NONE;
+   case BI_SWIZZLE_H00:
+      return VA_COMBINE_H0;
+   case BI_SWIZZLE_H11:
+      return VA_COMBINE_H1;
+   default:
+      invalid_instruction(I, "branch lane");
    }
 }
 
@@ -303,10 +383,14 @@ static enum va_source_format
 va_pack_source_format(const bi_instr *I)
 {
    switch (I->source_format) {
-   case BI_SOURCE_FORMAT_FLAT32: return VA_SOURCE_FORMAT_SRC_FLAT32;
-   case BI_SOURCE_FORMAT_FLAT16: return VA_SOURCE_FORMAT_SRC_FLAT16;
-   case BI_SOURCE_FORMAT_F32: return VA_SOURCE_FORMAT_SRC_F32;
-   case BI_SOURCE_FORMAT_F16: return VA_SOURCE_FORMAT_SRC_F16;
+   case BI_SOURCE_FORMAT_FLAT32:
+      return VA_SOURCE_FORMAT_SRC_FLAT32;
+   case BI_SOURCE_FORMAT_FLAT16:
+      return VA_SOURCE_FORMAT_SRC_FLAT16;
+   case BI_SOURCE_FORMAT_F32:
+      return VA_SOURCE_FORMAT_SRC_F32;
+   case BI_SOURCE_FORMAT_F16:
+      return VA_SOURCE_FORMAT_SRC_F16;
    }
 
    invalid_instruction(I, "source format");
@@ -316,9 +400,12 @@ static uint64_t
 va_pack_rhadd(const bi_instr *I)
 {
    switch (I->round) {
-   case BI_ROUND_RTN: return 0; /* hadd */
-   case BI_ROUND_RTP: return BITFIELD_BIT(30); /* rhadd */
-   default: unreachable("Invalid round for HADD");
+   case BI_ROUND_RTN:
+      return 0; /* hadd */
+   case BI_ROUND_RTP:
+      return BITFIELD_BIT(30); /* rhadd */
+   default:
+      unreachable("Invalid round for HADD");
    }
 }
 
@@ -334,15 +421,17 @@ va_pack_alu(const bi_instr *I)
    case BI_OPCODE_FREXPE_V2F16:
    case BI_OPCODE_FREXPM_F32:
    case BI_OPCODE_FREXPM_V2F16:
-      if (I->sqrt) hex |= 1ull << 24;
-      if (I->log) hex |= 1ull << 25;
+      if (I->sqrt)
+         hex |= 1ull << 24;
+      if (I->log)
+         hex |= 1ull << 25;
       break;
 
    /* Add mux type */
    case BI_OPCODE_MUX_I32:
    case BI_OPCODE_MUX_V2I16:
    case BI_OPCODE_MUX_V4I8:
-      hex |= (uint64_t) I->mux << 32;
+      hex |= (uint64_t)I->mux << 32;
       break;
 
    /* Add .eq flag */
@@ -350,12 +439,13 @@ va_pack_alu(const bi_instr *I)
    case BI_OPCODE_BRANCHZI:
       pack_assert(I, I->cmpf == BI_CMPF_EQ || I->cmpf == BI_CMPF_NE);
 
-      if (I->cmpf == BI_CMPF_EQ) hex |= (1ull << 36);
+      if (I->cmpf == BI_CMPF_EQ)
+         hex |= (1ull << 36);
 
       if (I->op == BI_OPCODE_BRANCHZI)
          hex |= (0x1ull << 40); /* Absolute */
       else
-         hex |= ((uint64_t) I->branch_offset & BITFIELD_MASK(27)) << 8;
+         hex |= ((uint64_t)I->branch_offset & BITFIELD_MASK(27)) << 8;
 
       break;
 
@@ -369,7 +459,7 @@ va_pack_alu(const bi_instr *I)
    case BI_OPCODE_RSHIFT_XOR_I32:
    case BI_OPCODE_RSHIFT_XOR_V2I16:
    case BI_OPCODE_RSHIFT_XOR_V4I8:
-      hex |= (uint64_t) I->arithmetic << 34;
+      hex |= (uint64_t)I->arithmetic << 34;
       break;
 
    case BI_OPCODE_LEA_BUF_IMM:
@@ -378,8 +468,8 @@ va_pack_alu(const bi_instr *I)
       break;
 
    case BI_OPCODE_LEA_ATTR_IMM:
-      hex |= ((uint64_t) I->table) << 16;
-      hex |= ((uint64_t) I->attribute_index) << 20;
+      hex |= ((uint64_t)I->table) << 16;
+      hex |= ((uint64_t)I->attribute_index) << 20;
       break;
 
    case BI_OPCODE_IADD_IMM_I32:
@@ -387,13 +477,13 @@ va_pack_alu(const bi_instr *I)
    case BI_OPCODE_IADD_IMM_V4I8:
    case BI_OPCODE_FADD_IMM_F32:
    case BI_OPCODE_FADD_IMM_V2F16:
-      hex |= ((uint64_t) I->index) << 8;
+      hex |= ((uint64_t)I->index) << 8;
       break;
 
    case BI_OPCODE_CLPER_I32:
-      hex |= ((uint64_t) I->inactive_result) << 22;
-      hex |= ((uint64_t) I->lane_op) << 32;
-      hex |= ((uint64_t) I->subgroup) << 36;
+      hex |= ((uint64_t)I->inactive_result) << 22;
+      hex |= ((uint64_t)I->lane_op) << 32;
+      hex |= ((uint64_t)I->subgroup) << 36;
       break;
 
    case BI_OPCODE_LD_VAR:
@@ -406,35 +496,37 @@ va_pack_alu(const bi_instr *I)
    case BI_OPCODE_LD_VAR_BUF_IMM_F32:
    case BI_OPCODE_LD_VAR_SPECIAL:
       if (I->op == BI_OPCODE_LD_VAR_SPECIAL)
-         hex |= ((uint64_t) I->varying_name) << 12; /* instead of index */
+         hex |= ((uint64_t)I->varying_name) << 12; /* instead of index */
       else if (I->op == BI_OPCODE_LD_VAR_BUF_IMM_F16 ||
                I->op == BI_OPCODE_LD_VAR_BUF_IMM_F32) {
-         hex |= ((uint64_t) I->index) << 16;
+         hex |= ((uint64_t)I->index) << 16;
       } else if (I->op == BI_OPCODE_LD_VAR_IMM ||
                  I->op == BI_OPCODE_LD_VAR_FLAT_IMM) {
-         hex |= ((uint64_t) I->table) << 8;
-         hex |= ((uint64_t) I->index) << 12;
+         hex |= ((uint64_t)I->table) << 8;
+         hex |= ((uint64_t)I->index) << 12;
       }
 
-      hex |= ((uint64_t) va_pack_source_format(I)) << 24;
-      hex |= ((uint64_t) I->update) << 36;
-      hex |= ((uint64_t) I->sample) << 38;
+      hex |= ((uint64_t)va_pack_source_format(I)) << 24;
+      hex |= ((uint64_t)I->update) << 36;
+      hex |= ((uint64_t)I->sample) << 38;
       break;
 
    case BI_OPCODE_LD_ATTR_IMM:
-      hex |= ((uint64_t) I->table) << 16;
-      hex |= ((uint64_t) I->attribute_index) << 20;
+      hex |= ((uint64_t)I->table) << 16;
+      hex |= ((uint64_t)I->attribute_index) << 20;
       break;
 
    case BI_OPCODE_LD_TEX_IMM:
    case BI_OPCODE_LEA_TEX_IMM:
-      hex |= ((uint64_t) I->table) << 16;
-      hex |= ((uint64_t) I->texture_index) << 20;
+      hex |= ((uint64_t)I->table) << 16;
+      hex |= ((uint64_t)I->texture_index) << 20;
       break;
 
    case BI_OPCODE_ZS_EMIT:
-      if (I->stencil) hex |= (1 << 24);
-      if (I->z) hex |= (1 << 25);
+      if (I->stencil)
+         hex |= (1 << 24);
+      if (I->z)
+         hex |= (1 << 25);
       break;
 
    default:
@@ -444,14 +536,14 @@ va_pack_alu(const bi_instr *I)
    /* FMA_RSCALE.f32 special modes treated as extra opcodes */
    if (I->op == BI_OPCODE_FMA_RSCALE_F32) {
       pack_assert(I, I->special < 4);
-      hex |= ((uint64_t) I->special) << 48;
+      hex |= ((uint64_t)I->special) << 48;
    }
 
    /* Add the normal destination or a placeholder.  Staging destinations are
     * added elsewhere, as they require special handling for control fields.
     */
    if (info.has_dest && info.nr_staging_dests == 0) {
-      hex |= (uint64_t) va_pack_dest(I) << 40;
+      hex |= (uint64_t)va_pack_dest(I) << 40;
    } else if (info.nr_staging_dests == 0 && info.nr_staging_srcs == 0) {
       pack_assert(I, I->nr_dests == 0);
       hex |= 0xC0ull << 40; /* Placeholder */
@@ -469,19 +561,24 @@ va_pack_alu(const bi_instr *I)
       enum va_size size = src_info.size;
 
       bi_index src = I->src[logical_i + src_offset];
-      hex |= (uint64_t) va_pack_src(I, logical_i + src_offset) << (8 * i);
+      hex |= (uint64_t)va_pack_src(I, logical_i + src_offset) << (8 * i);
 
       if (src_info.notted) {
-         if (src.neg) hex |= (1ull << 35);
+         if (src.neg)
+            hex |= (1ull << 35);
       } else if (src_info.absneg) {
          unsigned neg_offs = 32 + 2 + ((2 - i) * 2);
          unsigned abs_offs = 33 + 2 + ((2 - i) * 2);
 
-         if (src.neg) hex |= 1ull << neg_offs;
-         if (src.abs) hex |= 1ull << abs_offs;
+         if (src.neg)
+            hex |= 1ull << neg_offs;
+         if (src.abs)
+            hex |= 1ull << abs_offs;
       } else {
-         if (src.neg) invalid_instruction(I, "negate");
-         if (src.abs) invalid_instruction(I, "absolute value");
+         if (src.neg)
+            invalid_instruction(I, "negate");
+         if (src.abs)
+            invalid_instruction(I, "absolute value");
       }
 
       if (src_info.swizzle) {
@@ -489,50 +586,56 @@ va_pack_alu(const bi_instr *I)
          unsigned S = src.swizzle;
          pack_assert(I, size == VA_SIZE_16 || size == VA_SIZE_32);
 
-         uint64_t v = (size == VA_SIZE_32 ? va_pack_widen_f32(I, S) : va_pack_swizzle_f16(I, S));
+         uint64_t v = (size == VA_SIZE_32 ? va_pack_widen_f32(I, S)
+                                          : va_pack_swizzle_f16(I, S));
          hex |= v << offs;
       } else if (src_info.widen) {
          unsigned offs = (i == 1) ? 26 : 36;
-         hex |= (uint64_t) va_pack_widen(I, src.swizzle, src_info.size) << offs;
+         hex |= (uint64_t)va_pack_widen(I, src.swizzle, src_info.size) << offs;
       } else if (src_info.lane) {
-         unsigned offs = (I->op == BI_OPCODE_MKVEC_V2I8) ?
-                         ((i == 0) ? 38 : 36) :
-                         28;
+         unsigned offs =
+            (I->op == BI_OPCODE_MKVEC_V2I8) ? ((i == 0) ? 38 : 36) : 28;
 
          if (src_info.size == VA_SIZE_16) {
             hex |= (src.swizzle == BI_SWIZZLE_H11 ? 1 : 0) << offs;
          } else if (I->op == BI_OPCODE_BRANCHZ_I16) {
-            hex |= ((uint64_t) va_pack_combine(I, src.swizzle) << 37);
+            hex |= ((uint64_t)va_pack_combine(I, src.swizzle) << 37);
          } else {
             pack_assert(I, src_info.size == VA_SIZE_8);
             unsigned comp = src.swizzle - BI_SWIZZLE_B0000;
             pack_assert(I, comp < 4);
-            hex |= (uint64_t) comp << offs;
+            hex |= (uint64_t)comp << offs;
          }
       } else if (src_info.lanes) {
          pack_assert(I, src_info.size == VA_SIZE_8);
          pack_assert(I, i == 1);
-         hex |= (uint64_t) va_pack_shift_lanes(I, src.swizzle) << 26;
+         hex |= (uint64_t)va_pack_shift_lanes(I, src.swizzle) << 26;
       } else if (src_info.combine) {
          /* Treat as swizzle, subgroup ops not yet supported */
          pack_assert(I, src_info.size == VA_SIZE_32);
          pack_assert(I, i == 0);
-         hex |= (uint64_t) va_pack_widen_f32(I, src.swizzle) << 37;
+         hex |= (uint64_t)va_pack_widen_f32(I, src.swizzle) << 37;
       } else if (src_info.halfswizzle) {
          pack_assert(I, src_info.size == VA_SIZE_8);
          pack_assert(I, i == 0);
-         hex |= (uint64_t) va_pack_halfswizzle(I, src.swizzle) << 36;
+         hex |= (uint64_t)va_pack_halfswizzle(I, src.swizzle) << 36;
       } else if (src.swizzle != BI_SWIZZLE_H01) {
          invalid_instruction(I, "swizzle");
       }
    }
 
-   if (info.saturate) hex |= (uint64_t) I->saturate << 30;
-   if (info.rhadd) hex |= va_pack_rhadd(I);
-   if (info.clamp) hex |= (uint64_t) I->clamp << 32;
-   if (info.round_mode) hex |= (uint64_t) I->round << 30;
-   if (info.condition) hex |= (uint64_t) I->cmpf << 32;
-   if (info.result_type) hex |= (uint64_t) I->result_type << 30;
+   if (info.saturate)
+      hex |= (uint64_t)I->saturate << 30;
+   if (info.rhadd)
+      hex |= va_pack_rhadd(I);
+   if (info.clamp)
+      hex |= (uint64_t)I->clamp << 32;
+   if (info.round_mode)
+      hex |= (uint64_t)I->round << 30;
+   if (info.condition)
+      hex |= (uint64_t)I->cmpf << 32;
+   if (info.result_type)
+      hex |= (uint64_t)I->result_type << 30;
 
    return hex;
 }
@@ -541,37 +644,35 @@ static uint64_t
 va_pack_byte_offset(const bi_instr *I)
 {
    int16_t offset = I->byte_offset;
-   if (offset != I->byte_offset) invalid_instruction(I, "byte offset");
+   if (offset != I->byte_offset)
+      invalid_instruction(I, "byte offset");
 
    uint16_t offset_as_u16 = offset;
-   return ((uint64_t) offset_as_u16) << 8;
+   return ((uint64_t)offset_as_u16) << 8;
 }
 
 static uint64_t
 va_pack_byte_offset_8(const bi_instr *I)
 {
    uint8_t offset = I->byte_offset;
-   if (offset != I->byte_offset) invalid_instruction(I, "byte offset");
+   if (offset != I->byte_offset)
+      invalid_instruction(I, "byte offset");
 
-   return ((uint64_t) offset) << 8;
+   return ((uint64_t)offset) << 8;
 }
 
 static uint64_t
 va_pack_load(const bi_instr *I, bool buffer_descriptor)
 {
    const uint8_t load_lane_identity[8] = {
-      VA_LOAD_LANE_8_BIT_B0,
-      VA_LOAD_LANE_16_BIT_H0,
-      VA_LOAD_LANE_24_BIT_IDENTITY,
-      VA_LOAD_LANE_32_BIT_W0,
-      VA_LOAD_LANE_48_BIT_IDENTITY,
-      VA_LOAD_LANE_64_BIT_IDENTITY,
-      VA_LOAD_LANE_96_BIT_IDENTITY,
-      VA_LOAD_LANE_128_BIT_IDENTITY,
+      VA_LOAD_LANE_8_BIT_B0,        VA_LOAD_LANE_16_BIT_H0,
+      VA_LOAD_LANE_24_BIT_IDENTITY, VA_LOAD_LANE_32_BIT_W0,
+      VA_LOAD_LANE_48_BIT_IDENTITY, VA_LOAD_LANE_64_BIT_IDENTITY,
+      VA_LOAD_LANE_96_BIT_IDENTITY, VA_LOAD_LANE_128_BIT_IDENTITY,
    };
 
    unsigned memory_size = (valhall_opcodes[I->op].exact >> 27) & 0x7;
-   uint64_t hex = (uint64_t) load_lane_identity[memory_size] << 36;
+   uint64_t hex = (uint64_t)load_lane_identity[memory_size] << 36;
 
    // unsigned
    hex |= (1ull << 39);
@@ -579,10 +680,10 @@ va_pack_load(const bi_instr *I, bool buffer_descriptor)
    if (!buffer_descriptor)
       hex |= va_pack_byte_offset(I);
 
-   hex |= (uint64_t) va_pack_src(I, 0) << 0;
+   hex |= (uint64_t)va_pack_src(I, 0) << 0;
 
    if (buffer_descriptor)
-      hex |= (uint64_t) va_pack_src(I, 1) << 8;
+      hex |= (uint64_t)va_pack_src(I, 1) << 8;
 
    return hex;
 }
@@ -591,10 +692,14 @@ static uint64_t
 va_pack_memory_access(const bi_instr *I)
 {
    switch (I->seg) {
-   case BI_SEG_TL:   return VA_MEMORY_ACCESS_FORCE;
-   case BI_SEG_POS:  return VA_MEMORY_ACCESS_ISTREAM;
-   case BI_SEG_VARY: return VA_MEMORY_ACCESS_ESTREAM;
-   default:          return VA_MEMORY_ACCESS_NONE;
+   case BI_SEG_TL:
+      return VA_MEMORY_ACCESS_FORCE;
+   case BI_SEG_POS:
+      return VA_MEMORY_ACCESS_ISTREAM;
+   case BI_SEG_VARY:
+      return VA_MEMORY_ACCESS_ESTREAM;
+   default:
+      return VA_MEMORY_ACCESS_NONE;
    }
 }
 
@@ -604,7 +709,7 @@ va_pack_store(const bi_instr *I)
    uint64_t hex = va_pack_memory_access(I) << 24;
 
    va_validate_register_pair(I, 1);
-   hex |= (uint64_t) va_pack_src(I, 1) << 0;
+   hex |= (uint64_t)va_pack_src(I, 1) << 0;
 
    hex |= va_pack_byte_offset(I);
 
@@ -615,11 +720,16 @@ static enum va_lod_mode
 va_pack_lod_mode(const bi_instr *I)
 {
    switch (I->va_lod_mode) {
-   case BI_VA_LOD_MODE_ZERO_LOD:       return VA_LOD_MODE_ZERO;
-   case BI_VA_LOD_MODE_COMPUTED_LOD:   return VA_LOD_MODE_COMPUTED;
-   case BI_VA_LOD_MODE_EXPLICIT:       return VA_LOD_MODE_EXPLICIT;
-   case BI_VA_LOD_MODE_COMPUTED_BIAS:  return VA_LOD_MODE_COMPUTED_BIAS;
-   case BI_VA_LOD_MODE_GRDESC:         return VA_LOD_MODE_GRDESC;
+   case BI_VA_LOD_MODE_ZERO_LOD:
+      return VA_LOD_MODE_ZERO;
+   case BI_VA_LOD_MODE_COMPUTED_LOD:
+      return VA_LOD_MODE_COMPUTED;
+   case BI_VA_LOD_MODE_EXPLICIT:
+      return VA_LOD_MODE_EXPLICIT;
+   case BI_VA_LOD_MODE_COMPUTED_BIAS:
+      return VA_LOD_MODE_COMPUTED_BIAS;
+   case BI_VA_LOD_MODE_GRDESC:
+      return VA_LOD_MODE_GRDESC;
    }
 
    invalid_instruction(I, "LOD mode");
@@ -650,14 +760,22 @@ static enum va_register_format
 va_pack_register_format(const bi_instr *I)
 {
    switch (I->register_format) {
-   case BI_REGISTER_FORMAT_AUTO: return VA_REGISTER_FORMAT_AUTO;
-   case BI_REGISTER_FORMAT_F32:  return VA_REGISTER_FORMAT_F32;
-   case BI_REGISTER_FORMAT_F16:  return VA_REGISTER_FORMAT_F16;
-   case BI_REGISTER_FORMAT_S32:  return VA_REGISTER_FORMAT_S32;
-   case BI_REGISTER_FORMAT_S16:  return VA_REGISTER_FORMAT_S16;
-   case BI_REGISTER_FORMAT_U32:  return VA_REGISTER_FORMAT_U32;
-   case BI_REGISTER_FORMAT_U16:  return VA_REGISTER_FORMAT_U16;
-   default: invalid_instruction(I, "register format");
+   case BI_REGISTER_FORMAT_AUTO:
+      return VA_REGISTER_FORMAT_AUTO;
+   case BI_REGISTER_FORMAT_F32:
+      return VA_REGISTER_FORMAT_F32;
+   case BI_REGISTER_FORMAT_F16:
+      return VA_REGISTER_FORMAT_F16;
+   case BI_REGISTER_FORMAT_S32:
+      return VA_REGISTER_FORMAT_S32;
+   case BI_REGISTER_FORMAT_S16:
+      return VA_REGISTER_FORMAT_S16;
+   case BI_REGISTER_FORMAT_U32:
+      return VA_REGISTER_FORMAT_U32;
+   case BI_REGISTER_FORMAT_U16:
+      return VA_REGISTER_FORMAT_U16;
+   default:
+      invalid_instruction(I, "register format");
    }
 }
 
@@ -666,35 +784,34 @@ va_pack_instr(const bi_instr *I)
 {
    struct va_opcode_info info = valhall_opcodes[I->op];
 
-   uint64_t hex = info.exact | (((uint64_t) I->flow) << 59);
-   hex |= ((uint64_t) va_select_fau_page(I)) << 57;
+   uint64_t hex = info.exact | (((uint64_t)I->flow) << 59);
+   hex |= ((uint64_t)va_select_fau_page(I)) << 57;
 
    if (info.slot)
-      hex |= ((uint64_t) I->slot << 30);
+      hex |= ((uint64_t)I->slot << 30);
 
    if (info.sr_count) {
       bool read = bi_opcode_props[I->op].sr_read;
       bi_index sr = read ? I->src[0] : I->dest[0];
 
-      unsigned count = read ?
-         bi_count_read_registers(I, 0) :
-         bi_count_write_registers(I, 0);
+      unsigned count =
+         read ? bi_count_read_registers(I, 0) : bi_count_write_registers(I, 0);
 
-      hex |= ((uint64_t) count << 33);
-      hex |= (uint64_t) va_pack_reg(I, sr) << 40;
-      hex |= ((uint64_t) info.sr_control << 46);
+      hex |= ((uint64_t)count << 33);
+      hex |= (uint64_t)va_pack_reg(I, sr) << 40;
+      hex |= ((uint64_t)info.sr_control << 46);
    }
 
    if (info.sr_write_count) {
-      hex |= ((uint64_t) bi_count_write_registers(I, 0) - 1) << 36;
-      hex |= ((uint64_t) va_pack_reg(I, I->dest[0])) << 16;
+      hex |= ((uint64_t)bi_count_write_registers(I, 0) - 1) << 36;
+      hex |= ((uint64_t)va_pack_reg(I, I->dest[0])) << 16;
    }
 
    if (info.vecsize)
-      hex |= ((uint64_t) I->vecsize << 28);
+      hex |= ((uint64_t)I->vecsize << 28);
 
    if (info.register_format)
-      hex |= ((uint64_t) va_pack_register_format(I)) << 24;
+      hex |= ((uint64_t)va_pack_register_format(I)) << 24;
 
    switch (I->op) {
    case BI_OPCODE_LOAD_I8:
@@ -738,18 +855,18 @@ va_pack_instr(const bi_instr *I)
 
       /* 64-bit source */
       va_validate_register_pair(I, 0);
-      hex |= (uint64_t) va_pack_src(I, 0) << 0;
+      hex |= (uint64_t)va_pack_src(I, 0) << 0;
       hex |= va_pack_byte_offset_8(I);
-      hex |= ((uint64_t) va_pack_atom_opc_1(I)) << 22;
+      hex |= ((uint64_t)va_pack_atom_opc_1(I)) << 22;
       break;
 
    case BI_OPCODE_ATOM_I32:
    case BI_OPCODE_ATOM_RETURN_I32:
       /* 64-bit source */
       va_validate_register_pair(I, 1);
-      hex |= (uint64_t) va_pack_src(I, 1) << 0;
+      hex |= (uint64_t)va_pack_src(I, 1) << 0;
       hex |= va_pack_byte_offset_8(I);
-      hex |= ((uint64_t) va_pack_atom_opc(I)) << 22;
+      hex |= ((uint64_t)va_pack_atom_opc(I)) << 22;
 
       if (I->op == BI_OPCODE_ATOM_RETURN_I32)
          hex |= (0xc0ull << 40); // flags
@@ -764,56 +881,61 @@ va_pack_instr(const bi_instr *I)
       hex |= va_pack_store(I);
 
       /* Conversion descriptor */
-      hex |= (uint64_t) va_pack_src(I, 3) << 16;
+      hex |= (uint64_t)va_pack_src(I, 3) << 16;
       break;
 
-   case BI_OPCODE_BLEND:
-   {
+   case BI_OPCODE_BLEND: {
       /* Source 0 - Blend descriptor (64-bit) */
-      hex |= ((uint64_t) va_pack_src(I, 2)) << 0;
+      hex |= ((uint64_t)va_pack_src(I, 2)) << 0;
       va_validate_register_pair(I, 2);
 
       /* Target */
-      if (I->branch_offset & 0x7) invalid_instruction(I, "unaligned branch");
+      if (I->branch_offset & 0x7)
+         invalid_instruction(I, "unaligned branch");
       hex |= ((I->branch_offset >> 3) << 8);
 
       /* Source 2 - coverage mask */
-      hex |= ((uint64_t) va_pack_reg(I, I->src[1])) << 16;
+      hex |= ((uint64_t)va_pack_reg(I, I->src[1])) << 16;
 
       /* Vector size */
       unsigned vecsize = 4;
-      hex |= ((uint64_t) (vecsize - 1) << 28);
+      hex |= ((uint64_t)(vecsize - 1) << 28);
 
       break;
    }
 
    case BI_OPCODE_TEX_SINGLE:
    case BI_OPCODE_TEX_FETCH:
-   case BI_OPCODE_TEX_GATHER:
-   {
+   case BI_OPCODE_TEX_GATHER: {
       /* Image to read from */
-      hex |= ((uint64_t) va_pack_src(I, 1)) << 0;
+      hex |= ((uint64_t)va_pack_src(I, 1)) << 0;
 
       if (I->op == BI_OPCODE_TEX_FETCH && I->shadow)
          invalid_instruction(I, "TEX_FETCH does not support .shadow");
 
-      if (I->array_enable) hex |= (1ull << 10);
-      if (I->texel_offset) hex |= (1ull << 11);
-      if (I->shadow) hex |= (1ull << 12);
-      if (I->skip) hex |= (1ull << 39);
-      if (!bi_is_regfmt_16(I->register_format)) hex |= (1ull << 46);
+      if (I->array_enable)
+         hex |= (1ull << 10);
+      if (I->texel_offset)
+         hex |= (1ull << 11);
+      if (I->shadow)
+         hex |= (1ull << 12);
+      if (I->skip)
+         hex |= (1ull << 39);
+      if (!bi_is_regfmt_16(I->register_format))
+         hex |= (1ull << 46);
 
       if (I->op == BI_OPCODE_TEX_SINGLE)
-         hex |= ((uint64_t) va_pack_lod_mode(I)) << 13;
+         hex |= ((uint64_t)va_pack_lod_mode(I)) << 13;
 
       if (I->op == BI_OPCODE_TEX_GATHER) {
-         if (I->integer_coordinates) hex |= (1 << 13);
-         hex |= ((uint64_t) I->fetch_component) << 14;
+         if (I->integer_coordinates)
+            hex |= (1 << 13);
+         hex |= ((uint64_t)I->fetch_component) << 14;
       }
 
       hex |= (I->write_mask << 22);
-      hex |= ((uint64_t) va_pack_register_type(I)) << 26;
-      hex |= ((uint64_t) I->dimension) << 28;
+      hex |= ((uint64_t)va_pack_register_type(I)) << 26;
+      hex |= ((uint64_t)I->dimension) << 28;
 
       break;
    }
diff --git a/src/panfrost/bifrost/valhall/va_perf.c b/src/panfrost/bifrost/valhall/va_perf.c
index 7175302bf25..29a1424162f 100644
--- a/src/panfrost/bifrost/valhall/va_perf.c
+++ b/src/panfrost/bifrost/valhall/va_perf.c
@@ -22,9 +22,9 @@
  * SOFTWARE.
  */
 
+#include "bi_builder.h"
 #include "va_compiler.h"
 #include "valhall.h"
-#include "bi_builder.h"
 
 void
 va_count_instr_stats(bi_instr *I, struct va_stats *stats)
@@ -48,8 +48,8 @@ va_count_instr_stats(bi_instr *I, struct va_stats *stats)
 
    /* Varying is scaled by 16-bit components interpolated */
    case VA_UNIT_V:
-      stats->v += (I->vecsize + 1) *
-         (bi_is_regfmt_16(I->register_format) ? 1 : 2);
+      stats->v +=
+         (I->vecsize + 1) * (bi_is_regfmt_16(I->register_format) ? 1 : 2);
       return;
 
    /* We just count load/store and texturing for now */
diff --git a/src/panfrost/bifrost/valhall/va_validate.c b/src/panfrost/bifrost/valhall/va_validate.c
index 847a92a0867..0afa3dfccac 100644
--- a/src/panfrost/bifrost/valhall/va_validate.c
+++ b/src/panfrost/bifrost/valhall/va_validate.c
@@ -21,15 +21,16 @@
  * SOFTWARE.
  */
 
+#include "bi_builder.h"
 #include "va_compiler.h"
 #include "valhall.h"
-#include "bi_builder.h"
 
 /* Valhall has limits on access to fast-access uniforms:
  *
  *   An instruction may access no more than a single 64-bit uniform slot.
- *   An instruction may access no more than 64-bits of combined uniforms and constants.
- *   An instruction may access no more than a single special immediate (e.g. lane_id).
+ *   An instruction may access no more than 64-bits of combined uniforms and
+ * constants. An instruction may access no more than a single special immediate
+ * (e.g. lane_id).
  *
  * We validate these constraints.
  *
@@ -114,7 +115,7 @@ bool
 va_validate_fau(bi_instr *I)
 {
    bool valid = true;
-   struct fau_state fau = { .uniform_slot = -1 };
+   struct fau_state fau = {.uniform_slot = -1};
    unsigned fau_page = va_select_fau_page(I);
 
    bi_foreach_src(I, s) {
@@ -127,7 +128,7 @@ va_validate_fau(bi_instr *I)
 void
 va_repair_fau(bi_builder *b, bi_instr *I)
 {
-   struct fau_state fau = { .uniform_slot = -1 };
+   struct fau_state fau = {.uniform_slot = -1};
    unsigned fau_page = va_select_fau_page(I);
 
    bi_foreach_src(I, s) {
diff --git a/src/panfrost/bifrost/valhall/valhall.h b/src/panfrost/bifrost/valhall/valhall.h
index f3fcc1ce435..14442946664 100644
--- a/src/panfrost/bifrost/valhall/valhall.h
+++ b/src/panfrost/bifrost/valhall/valhall.h
@@ -73,43 +73,42 @@ enum va_unit {
 };
 
 struct va_src_info {
-   bool absneg : 1;
-   bool swizzle : 1;
-   bool notted : 1;
-   bool lane : 1;
-   bool lanes : 1;
-   bool halfswizzle : 1;
-   bool widen : 1;
-   bool combine : 1;
+   bool absneg       : 1;
+   bool swizzle      : 1;
+   bool notted       : 1;
+   bool lane         : 1;
+   bool lanes        : 1;
+   bool halfswizzle  : 1;
+   bool widen        : 1;
+   bool combine      : 1;
    enum va_size size : 2;
 } __attribute__((packed));
 
 struct va_opcode_info {
    uint64_t exact;
    struct va_src_info srcs[4];
-   uint8_t type_size : 8;
-   enum va_unit unit : 3;
-   unsigned nr_srcs : 3;
-   unsigned nr_staging_srcs : 2;
+   uint8_t type_size         : 8;
+   enum va_unit unit         : 3;
+   unsigned nr_srcs          : 3;
+   unsigned nr_staging_srcs  : 2;
    unsigned nr_staging_dests : 2;
-   bool has_dest : 1;
-   bool is_signed : 1;
-   bool clamp : 1;
-   bool saturate : 1;
-   bool rhadd : 1;
-   bool round_mode : 1;
-   bool condition : 1;
-   bool result_type : 1;
-   bool vecsize : 1;
-   bool register_format : 1;
-   bool slot : 1;
-   bool sr_count : 1;
-   bool sr_write_count : 1;
-   unsigned sr_control : 2;
+   bool has_dest             : 1;
+   bool is_signed            : 1;
+   bool clamp                : 1;
+   bool saturate             : 1;
+   bool rhadd                : 1;
+   bool round_mode           : 1;
+   bool condition            : 1;
+   bool result_type          : 1;
+   bool vecsize              : 1;
+   bool register_format      : 1;
+   bool slot                 : 1;
+   bool sr_count             : 1;
+   bool sr_write_count       : 1;
+   unsigned sr_control       : 2;
 };
 
-extern const struct va_opcode_info
-valhall_opcodes[BI_NUM_OPCODES];
+extern const struct va_opcode_info valhall_opcodes[BI_NUM_OPCODES];
 
 /* Bifrost specifies the source of bitwise operations as (A, B, shift), but
  * Valhall specifies (A, shift, B). We follow Bifrost conventions in the
diff --git a/src/panfrost/drm-shim/panfrost_noop.c b/src/panfrost/drm-shim/panfrost_noop.c
index 1b3d50c5e5d..bf3e97d17fb 100644
--- a/src/panfrost/drm-shim/panfrost_noop.c
+++ b/src/panfrost/drm-shim/panfrost_noop.c
@@ -47,8 +47,7 @@ pan_ioctl_get_param(int fd, unsigned long request, void *arg)
    struct drm_panfrost_get_param *gp = arg;
 
    switch (gp->param) {
-   case DRM_PANFROST_PARAM_GPU_PROD_ID:
-   {
+   case DRM_PANFROST_PARAM_GPU_PROD_ID: {
       char *override_version = getenv("PAN_GPU_ID");
 
       if (override_version)
diff --git a/src/panfrost/ds/pan_pps_driver.h b/src/panfrost/ds/pan_pps_driver.h
index f6476d9dee3..9392b9a5673 100644
--- a/src/panfrost/ds/pan_pps_driver.h
+++ b/src/panfrost/ds/pan_pps_driver.h
@@ -13,22 +13,21 @@
 
 #include "pan_pps_perf.h"
 
-namespace pps
-{
+namespace pps {
 /// @brief Panfrost implementation of PPS driver.
-/// This driver queries the GPU through `drm/panfrost_drm.h`, using performance counters ioctls,
-/// which can be enabled by setting a kernel parameter: `modprobe panfrost unstable_ioctls=1`.
-/// The ioctl needs a buffer to copy data from kernel to user space.
-class PanfrostDriver : public Driver
-{
-   public:
+/// This driver queries the GPU through `drm/panfrost_drm.h`, using performance
+/// counters ioctls, which can be enabled by setting a kernel parameter:
+/// `modprobe panfrost unstable_ioctls=1`. The ioctl needs a buffer to copy data
+/// from kernel to user space.
+class PanfrostDriver : public Driver {
+ public:
    static inline PanfrostDriver &into(Driver &dri);
    static inline const PanfrostDriver &into(const Driver &dri);
 
    /// @param A list of mali counter names
    /// @return A pair with two lists: counter groups and available counters
-   static std::pair<std::vector<CounterGroup>, std::vector<Counter>> create_available_counters(
-      const PanfrostPerf& perf);
+   static std::pair<std::vector<CounterGroup>, std::vector<Counter>>
+   create_available_counters(const PanfrostPerf &perf);
 
    PanfrostDriver();
    ~PanfrostDriver();
@@ -50,12 +49,14 @@ class PanfrostDriver : public Driver
    std::unique_ptr<PanfrostPerf> perf = nullptr;
 };
 
-PanfrostDriver &PanfrostDriver::into(Driver &dri)
+PanfrostDriver &
+PanfrostDriver::into(Driver &dri)
 {
    return reinterpret_cast<PanfrostDriver &>(dri);
 }
 
-const PanfrostDriver &PanfrostDriver::into(const Driver &dri)
+const PanfrostDriver &
+PanfrostDriver::into(const Driver &dri)
 {
    return reinterpret_cast<const PanfrostDriver &>(dri);
 }
diff --git a/src/panfrost/ds/pan_pps_perf.h b/src/panfrost/ds/pan_pps_perf.h
index 48ae2f58e53..c046e09b0df 100644
--- a/src/panfrost/ds/pan_pps_perf.h
+++ b/src/panfrost/ds/pan_pps_perf.h
@@ -10,35 +10,32 @@
 struct panfrost_device;
 struct panfrost_perf;
 
-namespace pps
-{
-class PanfrostDevice
-{
-   public:
+namespace pps {
+class PanfrostDevice {
+ public:
    PanfrostDevice(int fd);
    ~PanfrostDevice();
 
    PanfrostDevice(const PanfrostDevice &) = delete;
    PanfrostDevice &operator=(const PanfrostDevice &) = delete;
 
-   PanfrostDevice(PanfrostDevice&&);
-   PanfrostDevice& operator=(PanfrostDevice&&);
+   PanfrostDevice(PanfrostDevice &&);
+   PanfrostDevice &operator=(PanfrostDevice &&);
 
    void *ctx = nullptr;
-   struct panfrost_device* dev = nullptr;
+   struct panfrost_device *dev = nullptr;
 };
 
-class PanfrostPerf
-{
-   public:
-   PanfrostPerf(const PanfrostDevice& dev);
+class PanfrostPerf {
+ public:
+   PanfrostPerf(const PanfrostDevice &dev);
    ~PanfrostPerf();
 
    PanfrostPerf(const PanfrostPerf &) = delete;
    PanfrostPerf &operator=(const PanfrostPerf &) = delete;
 
-   PanfrostPerf(PanfrostPerf&&);
-   PanfrostPerf& operator=(PanfrostPerf&&);
+   PanfrostPerf(PanfrostPerf &&);
+   PanfrostPerf &operator=(PanfrostPerf &&);
 
    int enable() const;
    void disable() const;
diff --git a/src/panfrost/include/panfrost-job.h b/src/panfrost/include/panfrost-job.h
index 19e537fcebf..fe09389f630 100644
--- a/src/panfrost/include/panfrost-job.h
+++ b/src/panfrost/include/panfrost-job.h
@@ -28,11 +28,11 @@
 #ifndef __PANFROST_JOB_H__
 #define __PANFROST_JOB_H__
 
-#include <stdint.h>
-#include <stdbool.h>
 #include <inttypes.h>
+#include <stdbool.h>
+#include <stdint.h>
 
-typedef uint8_t  u8;
+typedef uint8_t u8;
 typedef uint16_t u16;
 typedef uint32_t u32;
 typedef uint64_t u64;
@@ -68,13 +68,13 @@ typedef uint64_t mali_ptr;
 /* These formats seem to largely duplicate the others. They're used at least
  * for Bifrost framebuffer output.
  */
-#define MALI_FORMAT_SPECIAL2 (7 << 5)
-#define MALI_EXTRACT_TYPE(fmt) ((fmt) & 0xe0)
+#define MALI_FORMAT_SPECIAL2   (7 << 5)
+#define MALI_EXTRACT_TYPE(fmt) ((fmt)&0xe0)
 
 /* If the high 3 bits are 3 to 6 these two bits say how many components
  * there are.
  */
-#define MALI_NR_CHANNELS(n) ((n - 1) << 3)
+#define MALI_NR_CHANNELS(n)        ((n - 1) << 3)
 #define MALI_EXTRACT_CHANNELS(fmt) ((((fmt) >> 3) & 3) + 1)
 
 /* If the high 3 bits are 3 to 6, then the low 3 bits say how big each
@@ -93,7 +93,7 @@ typedef uint64_t mali_ptr;
 /* For MALI_FORMAT_SINT it means a half-float (e.g. RG16F). For
  * MALI_FORMAT_UNORM, it means a 32-bit float.
  */
-#define MALI_CHANNEL_FLOAT 7
+#define MALI_CHANNEL_FLOAT     7
 #define MALI_EXTRACT_BITS(fmt) (fmt & 0x7)
 
 #define MALI_EXTRACT_INDEX(pixfmt) (((pixfmt) >> 12) & 0xFF)
@@ -241,18 +241,18 @@ typedef uint64_t mali_ptr;
 /* Used for lod encoding. Thanks @urjaman for pointing out these routines can
  * be cleaned up a lot. */
 
-#define DECODE_FIXED_16(x) ((float) (x / 256.0))
+#define DECODE_FIXED_16(x) ((float)(x / 256.0))
 
 static inline int16_t
 FIXED_16(float x, bool allow_negative)
 {
-        /* Clamp inputs, accounting for float error */
-        float max_lod = (32.0 - (1.0 / 512.0));
-        float min_lod = allow_negative ? -max_lod : 0.0;
+   /* Clamp inputs, accounting for float error */
+   float max_lod = (32.0 - (1.0 / 512.0));
+   float min_lod = allow_negative ? -max_lod : 0.0;
 
-        x = ((x > max_lod) ? max_lod : ((x < min_lod) ? min_lod : x));
+   x = ((x > max_lod) ? max_lod : ((x < min_lod) ? min_lod : x));
 
-        return (int) (x * 256.0);
+   return (int)(x * 256.0);
 }
 
 #endif /* __PANFROST_JOB_H__ */
diff --git a/src/panfrost/lib/genxml/decode.c b/src/panfrost/lib/genxml/decode.c
index 9f942505e04..aa92af16d2e 100644
--- a/src/panfrost/lib/genxml/decode.c
+++ b/src/panfrost/lib/genxml/decode.c
@@ -23,73 +23,77 @@
  * SOFTWARE.
  */
 
-#include <genxml/gen_macros.h>
+#include "decode.h"
+#include <ctype.h>
+#include <errno.h>
+#include <memory.h>
+#include <stdarg.h>
+#include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <memory.h>
-#include <stdbool.h>
-#include <stdarg.h>
-#include <errno.h>
-#include <ctype.h>
-#include "decode.h"
+#include <genxml/gen_macros.h>
 
-#include "util/set.h"
-#include "midgard/disassemble.h"
 #include "bifrost/disassemble.h"
 #include "bifrost/valhall/disassemble.h"
+#include "midgard/disassemble.h"
+#include "util/set.h"
 
-#define DUMP_UNPACKED(T, var, ...) { \
-        pandecode_log(__VA_ARGS__); \
-        pan_print(pandecode_dump_stream, T, var, (pandecode_indent + 1) * 2); \
-}
+#define DUMP_UNPACKED(T, var, ...)                                             \
+   {                                                                           \
+      pandecode_log(__VA_ARGS__);                                              \
+      pan_print(pandecode_dump_stream, T, var, (pandecode_indent + 1) * 2);    \
+   }
 
-#define DUMP_CL(T, cl, ...) {\
-        pan_unpack(cl, T, temp); \
-        DUMP_UNPACKED(T, temp, __VA_ARGS__); \
-}
+#define DUMP_CL(T, cl, ...)                                                    \
+   {                                                                           \
+      pan_unpack(cl, T, temp);                                                 \
+      DUMP_UNPACKED(T, temp, __VA_ARGS__);                                     \
+   }
 
-#define DUMP_SECTION(A, S, cl, ...) { \
-        pan_section_unpack(cl, A, S, temp); \
-        pandecode_log(__VA_ARGS__); \
-        pan_section_print(pandecode_dump_stream, A, S, temp, (pandecode_indent + 1) * 2); \
-}
+#define DUMP_SECTION(A, S, cl, ...)                                            \
+   {                                                                           \
+      pan_section_unpack(cl, A, S, temp);                                      \
+      pandecode_log(__VA_ARGS__);                                              \
+      pan_section_print(pandecode_dump_stream, A, S, temp,                     \
+                        (pandecode_indent + 1) * 2);                           \
+   }
 
-#define MAP_ADDR(T, addr, cl) \
-        const uint8_t *cl = pandecode_fetch_gpu_mem(addr, pan_size(T));
+#define MAP_ADDR(T, addr, cl)                                                  \
+   const uint8_t *cl = pandecode_fetch_gpu_mem(addr, pan_size(T));
 
-#define DUMP_ADDR(T, addr, ...) {\
-        MAP_ADDR(T, addr, cl) \
-        DUMP_CL(T, cl, __VA_ARGS__); \
-}
+#define DUMP_ADDR(T, addr, ...)                                                \
+   {                                                                           \
+      MAP_ADDR(T, addr, cl)                                                    \
+      DUMP_CL(T, cl, __VA_ARGS__);                                             \
+   }
 
 static unsigned pandecode_indent = 0;
 
 static void
 pandecode_make_indent(void)
 {
-        for (unsigned i = 0; i < pandecode_indent; ++i)
-                fprintf(pandecode_dump_stream, "  ");
+   for (unsigned i = 0; i < pandecode_indent; ++i)
+      fprintf(pandecode_dump_stream, "  ");
 }
 
-static void PRINTFLIKE(1, 2)
-pandecode_log(const char *format, ...)
+static void PRINTFLIKE(1, 2) pandecode_log(const char *format, ...)
 {
-        va_list ap;
+   va_list ap;
 
-        pandecode_make_indent();
-        va_start(ap, format);
-        vfprintf(pandecode_dump_stream, format, ap);
-        va_end(ap);
+   pandecode_make_indent();
+   va_start(ap, format);
+   vfprintf(pandecode_dump_stream, format, ap);
+   va_end(ap);
 }
 
 static void
 pandecode_log_cont(const char *format, ...)
 {
-        va_list ap;
+   va_list ap;
 
-        va_start(ap, format);
-        vfprintf(pandecode_dump_stream, format, ap);
-        va_end(ap);
+   va_start(ap, format);
+   vfprintf(pandecode_dump_stream, format, ap);
+   va_end(ap);
 }
 
 /* To check for memory safety issues, validates that the given pointer in GPU
@@ -101,33 +105,33 @@ pandecode_log_cont(const char *format, ...)
 static void
 pandecode_validate_buffer(mali_ptr addr, size_t sz)
 {
-        if (!addr) {
-                pandecode_log("// XXX: null pointer deref\n");
-                return;
-        }
+   if (!addr) {
+      pandecode_log("// XXX: null pointer deref\n");
+      return;
+   }
 
-        /* Find a BO */
+   /* Find a BO */
 
-        struct pandecode_mapped_memory *bo =
-                pandecode_find_mapped_gpu_mem_containing(addr);
+   struct pandecode_mapped_memory *bo =
+      pandecode_find_mapped_gpu_mem_containing(addr);
 
-        if (!bo) {
-                pandecode_log("// XXX: invalid memory dereference\n");
-                return;
-        }
+   if (!bo) {
+      pandecode_log("// XXX: invalid memory dereference\n");
+      return;
+   }
 
-        /* Bounds check */
+   /* Bounds check */
 
-        unsigned offset = addr - bo->gpu_va;
-        unsigned total = offset + sz;
+   unsigned offset = addr - bo->gpu_va;
+   unsigned total = offset + sz;
 
-        if (total > bo->length) {
-                pandecode_log("// XXX: buffer overrun. "
-                                "Chunk of size %zu at offset %d in buffer of size %zu. "
-                                "Overrun by %zu bytes. \n",
-                                sz, offset, bo->length, total - bo->length);
-                return;
-        }
+   if (total > bo->length) {
+      pandecode_log("// XXX: buffer overrun. "
+                    "Chunk of size %zu at offset %d in buffer of size %zu. "
+                    "Overrun by %zu bytes. \n",
+                    sz, offset, bo->length, total - bo->length);
+      return;
+   }
 }
 
 #if PAN_ARCH <= 5
@@ -135,28 +139,27 @@ pandecode_validate_buffer(mali_ptr addr, size_t sz)
  * larger FBD */
 
 static void
-pandecode_midgard_tiler_descriptor(
-                const struct mali_tiler_context_packed *tp,
-                const struct mali_tiler_weights_packed *wp)
+pandecode_midgard_tiler_descriptor(const struct mali_tiler_context_packed *tp,
+                                   const struct mali_tiler_weights_packed *wp)
 {
-        pan_unpack(tp, TILER_CONTEXT, t);
-        DUMP_UNPACKED(TILER_CONTEXT, t, "Tiler:\n");
+   pan_unpack(tp, TILER_CONTEXT, t);
+   DUMP_UNPACKED(TILER_CONTEXT, t, "Tiler:\n");
 
-        /* We've never seen weights used in practice, but they exist */
-        pan_unpack(wp, TILER_WEIGHTS, w);
-        bool nonzero_weights = false;
+   /* We've never seen weights used in practice, but they exist */
+   pan_unpack(wp, TILER_WEIGHTS, w);
+   bool nonzero_weights = false;
 
-        nonzero_weights |= w.weight0 != 0x0;
-        nonzero_weights |= w.weight1 != 0x0;
-        nonzero_weights |= w.weight2 != 0x0;
-        nonzero_weights |= w.weight3 != 0x0;
-        nonzero_weights |= w.weight4 != 0x0;
-        nonzero_weights |= w.weight5 != 0x0;
-        nonzero_weights |= w.weight6 != 0x0;
-        nonzero_weights |= w.weight7 != 0x0;
+   nonzero_weights |= w.weight0 != 0x0;
+   nonzero_weights |= w.weight1 != 0x0;
+   nonzero_weights |= w.weight2 != 0x0;
+   nonzero_weights |= w.weight3 != 0x0;
+   nonzero_weights |= w.weight4 != 0x0;
+   nonzero_weights |= w.weight5 != 0x0;
+   nonzero_weights |= w.weight6 != 0x0;
+   nonzero_weights |= w.weight7 != 0x0;
 
-        if (nonzero_weights)
-                DUMP_UNPACKED(TILER_WEIGHTS, w, "Tiler Weights:\n");
+   if (nonzero_weights)
+      DUMP_UNPACKED(TILER_WEIGHTS, w, "Tiler Weights:\n");
 }
 #endif
 
@@ -164,25 +167,27 @@ pandecode_midgard_tiler_descriptor(
 static void
 pandecode_local_storage(uint64_t gpu_va)
 {
-        const struct mali_local_storage_packed *PANDECODE_PTR_VAR(s, (mali_ptr) gpu_va);
-        DUMP_CL(LOCAL_STORAGE, s, "Local Storage:\n");
+   const struct mali_local_storage_packed *PANDECODE_PTR_VAR(s,
+                                                             (mali_ptr)gpu_va);
+   DUMP_CL(LOCAL_STORAGE, s, "Local Storage:\n");
 }
 
 static void
 pandecode_render_target(uint64_t gpu_va, unsigned gpu_id,
                         const struct MALI_FRAMEBUFFER_PARAMETERS *fb)
 {
-        pandecode_log("Color Render Targets:\n");
-        pandecode_indent++;
+   pandecode_log("Color Render Targets:\n");
+   pandecode_indent++;
 
-        for (int i = 0; i < (fb->render_target_count); i++) {
-                mali_ptr rt_va = gpu_va + i * pan_size(RENDER_TARGET);
-                const struct mali_render_target_packed *PANDECODE_PTR_VAR(rtp, (mali_ptr) rt_va);
-                DUMP_CL(RENDER_TARGET, rtp, "Color Render Target %d:\n", i);
-        }
+   for (int i = 0; i < (fb->render_target_count); i++) {
+      mali_ptr rt_va = gpu_va + i * pan_size(RENDER_TARGET);
+      const struct mali_render_target_packed *PANDECODE_PTR_VAR(
+         rtp, (mali_ptr)rt_va);
+      DUMP_CL(RENDER_TARGET, rtp, "Color Render Target %d:\n", i);
+   }
 
-        pandecode_indent--;
-        pandecode_log("\n");
+   pandecode_indent--;
+   pandecode_log("\n");
 }
 #endif
 
@@ -190,156 +195,158 @@ pandecode_render_target(uint64_t gpu_va, unsigned gpu_id,
 static void
 pandecode_sample_locations(const void *fb)
 {
-        pan_section_unpack(fb, FRAMEBUFFER, PARAMETERS, params);
+   pan_section_unpack(fb, FRAMEBUFFER, PARAMETERS, params);
 
-        const u16 *PANDECODE_PTR_VAR(samples, params.sample_locations);
+   const u16 *PANDECODE_PTR_VAR(samples, params.sample_locations);
 
-        pandecode_log("Sample locations:\n");
-        for (int i = 0; i < 33; i++) {
-                pandecode_log("  (%d, %d),\n",
-                                samples[2 * i] - 128,
-                                samples[2 * i + 1] - 128);
-        }
+   pandecode_log("Sample locations:\n");
+   for (int i = 0; i < 33; i++) {
+      pandecode_log("  (%d, %d),\n", samples[2 * i] - 128,
+                    samples[2 * i + 1] - 128);
+   }
 }
 #endif
 
-static void
-pandecode_dcd(const struct MALI_DRAW *p, enum mali_job_type job_type,
-              unsigned gpu_id);
+static void pandecode_dcd(const struct MALI_DRAW *p,
+                          enum mali_job_type job_type, unsigned gpu_id);
 
 /* Information about the framebuffer passed back for additional analysis */
 struct pandecode_fbd {
-        unsigned rt_count;
-        bool has_extra;
+   unsigned rt_count;
+   bool has_extra;
 };
 
 static struct pandecode_fbd
 pandecode_fbd(uint64_t gpu_va, bool is_fragment, unsigned gpu_id)
 {
 #if PAN_ARCH >= 5
-        /* We only see MFBDs on architectures that support them */
-        assert(gpu_va & MALI_FBD_TAG_IS_MFBD);
-        gpu_va &= ~MALI_FBD_TAG_MASK;
+   /* We only see MFBDs on architectures that support them */
+   assert(gpu_va & MALI_FBD_TAG_IS_MFBD);
+   gpu_va &= ~MALI_FBD_TAG_MASK;
 #endif
 
-        const void *PANDECODE_PTR_VAR(fb, (mali_ptr) gpu_va);
-        pan_section_unpack(fb, FRAMEBUFFER, PARAMETERS, params);
+   const void *PANDECODE_PTR_VAR(fb, (mali_ptr)gpu_va);
+   pan_section_unpack(fb, FRAMEBUFFER, PARAMETERS, params);
 
 #if PAN_ARCH >= 6
-        pandecode_sample_locations(fb);
+   pandecode_sample_locations(fb);
 
-        unsigned dcd_size = pan_size(DRAW);
+   unsigned dcd_size = pan_size(DRAW);
 
-        if (params.pre_frame_0 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
-                const void *PANDECODE_PTR_VAR(dcd, params.frame_shader_dcds + (0 * dcd_size));
-                pan_unpack(dcd, DRAW, draw);
-                pandecode_log("Pre frame 0:\n");
-                pandecode_dcd(&draw, MALI_JOB_TYPE_FRAGMENT, gpu_id);
-        }
+   if (params.pre_frame_0 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
+      const void *PANDECODE_PTR_VAR(dcd,
+                                    params.frame_shader_dcds + (0 * dcd_size));
+      pan_unpack(dcd, DRAW, draw);
+      pandecode_log("Pre frame 0:\n");
+      pandecode_dcd(&draw, MALI_JOB_TYPE_FRAGMENT, gpu_id);
+   }
 
-        if (params.pre_frame_1 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
-                const void *PANDECODE_PTR_VAR(dcd, params.frame_shader_dcds + (1 * dcd_size));
-                pan_unpack(dcd, DRAW, draw);
-                pandecode_log("Pre frame 1:\n");
-                pandecode_dcd(&draw, MALI_JOB_TYPE_FRAGMENT, gpu_id);
-        }
+   if (params.pre_frame_1 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
+      const void *PANDECODE_PTR_VAR(dcd,
+                                    params.frame_shader_dcds + (1 * dcd_size));
+      pan_unpack(dcd, DRAW, draw);
+      pandecode_log("Pre frame 1:\n");
+      pandecode_dcd(&draw, MALI_JOB_TYPE_FRAGMENT, gpu_id);
+   }
 
-        if (params.post_frame != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
-                const void *PANDECODE_PTR_VAR(dcd, params.frame_shader_dcds + (2 * dcd_size));
-                pan_unpack(dcd, DRAW, draw);
-                pandecode_log("Post frame:\n");
-                pandecode_dcd(&draw, MALI_JOB_TYPE_FRAGMENT, gpu_id);
-        }
+   if (params.post_frame != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
+      const void *PANDECODE_PTR_VAR(dcd,
+                                    params.frame_shader_dcds + (2 * dcd_size));
+      pan_unpack(dcd, DRAW, draw);
+      pandecode_log("Post frame:\n");
+      pandecode_dcd(&draw, MALI_JOB_TYPE_FRAGMENT, gpu_id);
+   }
 #else
-        DUMP_SECTION(FRAMEBUFFER, LOCAL_STORAGE, fb, "Local Storage:\n");
+   DUMP_SECTION(FRAMEBUFFER, LOCAL_STORAGE, fb, "Local Storage:\n");
 
-        const void *t = pan_section_ptr(fb, FRAMEBUFFER, TILER);
-        const void *w = pan_section_ptr(fb, FRAMEBUFFER, TILER_WEIGHTS);
-        pandecode_midgard_tiler_descriptor(t, w);
+   const void *t = pan_section_ptr(fb, FRAMEBUFFER, TILER);
+   const void *w = pan_section_ptr(fb, FRAMEBUFFER, TILER_WEIGHTS);
+   pandecode_midgard_tiler_descriptor(t, w);
 #endif
 
-        pandecode_log("Framebuffer:\n");
-        pandecode_indent++;
+   pandecode_log("Framebuffer:\n");
+   pandecode_indent++;
 
-        DUMP_UNPACKED(FRAMEBUFFER_PARAMETERS, params, "Parameters:\n");
+   DUMP_UNPACKED(FRAMEBUFFER_PARAMETERS, params, "Parameters:\n");
 
-        pandecode_indent--;
-        pandecode_log("\n");
+   pandecode_indent--;
+   pandecode_log("\n");
 
 #if PAN_ARCH >= 5
-        gpu_va += pan_size(FRAMEBUFFER);
+   gpu_va += pan_size(FRAMEBUFFER);
 
-        if (params.has_zs_crc_extension) {
-                const struct mali_zs_crc_extension_packed *PANDECODE_PTR_VAR(zs_crc, (mali_ptr)gpu_va);
-                DUMP_CL(ZS_CRC_EXTENSION, zs_crc, "ZS CRC Extension:\n");
-                pandecode_log("\n");
+   if (params.has_zs_crc_extension) {
+      const struct mali_zs_crc_extension_packed *PANDECODE_PTR_VAR(
+         zs_crc, (mali_ptr)gpu_va);
+      DUMP_CL(ZS_CRC_EXTENSION, zs_crc, "ZS CRC Extension:\n");
+      pandecode_log("\n");
 
-                gpu_va += pan_size(ZS_CRC_EXTENSION);
-        }
+      gpu_va += pan_size(ZS_CRC_EXTENSION);
+   }
 
-        if (is_fragment)
-                pandecode_render_target(gpu_va, gpu_id, &params);
+   if (is_fragment)
+      pandecode_render_target(gpu_va, gpu_id, &params);
 
-        return (struct pandecode_fbd) {
-                .rt_count = params.render_target_count,
-                .has_extra = params.has_zs_crc_extension,
-        };
+   return (struct pandecode_fbd){
+      .rt_count = params.render_target_count,
+      .has_extra = params.has_zs_crc_extension,
+   };
 #else
-        /* Dummy unpack of the padding section to make sure all words are 0.
-         * No need to call print here since the section is supposed to be empty.
-         */
-        pan_section_unpack(fb, FRAMEBUFFER, PADDING_1, padding1);
-        pan_section_unpack(fb, FRAMEBUFFER, PADDING_2, padding2);
+   /* Dummy unpack of the padding section to make sure all words are 0.
+    * No need to call print here since the section is supposed to be empty.
+    */
+   pan_section_unpack(fb, FRAMEBUFFER, PADDING_1, padding1);
+   pan_section_unpack(fb, FRAMEBUFFER, PADDING_2, padding2);
 
-        return (struct pandecode_fbd) {
-                .rt_count = 1,
-        };
+   return (struct pandecode_fbd){
+      .rt_count = 1,
+   };
 #endif
 }
 
 #if PAN_ARCH <= 7
 static void
-pandecode_attributes(mali_ptr addr, int count,
-                     bool varying, enum mali_job_type job_type)
+pandecode_attributes(mali_ptr addr, int count, bool varying,
+                     enum mali_job_type job_type)
 {
-        char *prefix = varying ? "Varying" : "Attribute";
-        assert(addr);
+   char *prefix = varying ? "Varying" : "Attribute";
+   assert(addr);
 
-        if (!count) {
-                pandecode_log("// warn: No %s records\n", prefix);
-                return;
-        }
+   if (!count) {
+      pandecode_log("// warn: No %s records\n", prefix);
+      return;
+   }
 
-        MAP_ADDR(ATTRIBUTE_BUFFER, addr, cl);
+   MAP_ADDR(ATTRIBUTE_BUFFER, addr, cl);
 
-        for (int i = 0; i < count; ++i) {
-                pan_unpack(cl + i * pan_size(ATTRIBUTE_BUFFER), ATTRIBUTE_BUFFER, temp);
-                DUMP_UNPACKED(ATTRIBUTE_BUFFER, temp, "%s:\n", prefix);
+   for (int i = 0; i < count; ++i) {
+      pan_unpack(cl + i * pan_size(ATTRIBUTE_BUFFER), ATTRIBUTE_BUFFER, temp);
+      DUMP_UNPACKED(ATTRIBUTE_BUFFER, temp, "%s:\n", prefix);
 
-                switch (temp.type) {
-                case MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR_WRITE_REDUCTION:
-                case MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR: {
-                        pan_unpack(cl + (i + 1) * pan_size(ATTRIBUTE_BUFFER),
-                                   ATTRIBUTE_BUFFER_CONTINUATION_NPOT, temp2);
-                        pan_print(pandecode_dump_stream, ATTRIBUTE_BUFFER_CONTINUATION_NPOT,
-                                  temp2, (pandecode_indent + 1) * 2);
-                        i++;
-                        break;
-                }
-                case MALI_ATTRIBUTE_TYPE_3D_LINEAR:
-                case MALI_ATTRIBUTE_TYPE_3D_INTERLEAVED: {
-                        pan_unpack(cl + (i + 1) * pan_size(ATTRIBUTE_BUFFER_CONTINUATION_3D),
-                                   ATTRIBUTE_BUFFER_CONTINUATION_3D, temp2);
-                        pan_print(pandecode_dump_stream, ATTRIBUTE_BUFFER_CONTINUATION_3D,
-                                  temp2, (pandecode_indent + 1) * 2);
-                        i++;
-                        break;
-                }
-                default:
-                        break;
-                }
-        }
-        pandecode_log("\n");
+      switch (temp.type) {
+      case MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR_WRITE_REDUCTION:
+      case MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR: {
+         pan_unpack(cl + (i + 1) * pan_size(ATTRIBUTE_BUFFER),
+                    ATTRIBUTE_BUFFER_CONTINUATION_NPOT, temp2);
+         pan_print(pandecode_dump_stream, ATTRIBUTE_BUFFER_CONTINUATION_NPOT,
+                   temp2, (pandecode_indent + 1) * 2);
+         i++;
+         break;
+      }
+      case MALI_ATTRIBUTE_TYPE_3D_LINEAR:
+      case MALI_ATTRIBUTE_TYPE_3D_INTERLEAVED: {
+         pan_unpack(cl + (i + 1) * pan_size(ATTRIBUTE_BUFFER_CONTINUATION_3D),
+                    ATTRIBUTE_BUFFER_CONTINUATION_3D, temp2);
+         pan_print(pandecode_dump_stream, ATTRIBUTE_BUFFER_CONTINUATION_3D,
+                   temp2, (pandecode_indent + 1) * 2);
+         i++;
+         break;
+      }
+      default:
+         break;
+      }
+   }
+   pandecode_log("\n");
 }
 #endif
 
@@ -347,15 +354,15 @@ pandecode_attributes(mali_ptr addr, int count,
 static mali_ptr
 pandecode_blend(void *descs, int rt_no, mali_ptr frag_shader)
 {
-        pan_unpack(descs + (rt_no * pan_size(BLEND)), BLEND, b);
-        DUMP_UNPACKED(BLEND, b, "Blend RT %d:\n", rt_no);
+   pan_unpack(descs + (rt_no * pan_size(BLEND)), BLEND, b);
+   DUMP_UNPACKED(BLEND, b, "Blend RT %d:\n", rt_no);
 #if PAN_ARCH >= 6
-        if (b.internal.mode != MALI_BLEND_MODE_SHADER)
-                return 0;
+   if (b.internal.mode != MALI_BLEND_MODE_SHADER)
+      return 0;
 
-        return (frag_shader & 0xFFFFFFFF00000000ULL) | b.internal.shader.pc;
+   return (frag_shader & 0xFFFFFFFF00000000ULL) | b.internal.shader.pc;
 #else
-        return b.blend_shader ? (b.shader_pc & ~0xf) : 0;
+   return b.blend_shader ? (b.shader_pc & ~0xf) : 0;
 #endif
 }
 #endif
@@ -364,207 +371,219 @@ pandecode_blend(void *descs, int rt_no, mali_ptr frag_shader)
 static unsigned
 pandecode_attribute_meta(int count, mali_ptr attribute, bool varying)
 {
-        unsigned max = 0;
+   unsigned max = 0;
 
-        for (int i = 0; i < count; ++i, attribute += pan_size(ATTRIBUTE)) {
-                MAP_ADDR(ATTRIBUTE, attribute, cl);
-                pan_unpack(cl, ATTRIBUTE, a);
-                DUMP_UNPACKED(ATTRIBUTE, a, "%s:\n", varying ? "Varying" : "Attribute");
-                max = MAX2(max, a.buffer_index);
-        }
+   for (int i = 0; i < count; ++i, attribute += pan_size(ATTRIBUTE)) {
+      MAP_ADDR(ATTRIBUTE, attribute, cl);
+      pan_unpack(cl, ATTRIBUTE, a);
+      DUMP_UNPACKED(ATTRIBUTE, a, "%s:\n", varying ? "Varying" : "Attribute");
+      max = MAX2(max, a.buffer_index);
+   }
 
-        pandecode_log("\n");
-        return MIN2(max + 1, 256);
+   pandecode_log("\n");
+   return MIN2(max + 1, 256);
 }
 
 /* return bits [lo, hi) of word */
 static u32
 bits(u32 word, u32 lo, u32 hi)
 {
-        if (hi - lo >= 32)
-                return word; // avoid undefined behavior with the shift
+   if (hi - lo >= 32)
+      return word; // avoid undefined behavior with the shift
 
-        if (lo >= 32)
-                return 0;
+   if (lo >= 32)
+      return 0;
 
-        return (word >> lo) & ((1 << (hi - lo)) - 1);
+   return (word >> lo) & ((1 << (hi - lo)) - 1);
 }
 
 static void
 pandecode_invocation(const void *i)
 {
-        /* Decode invocation_count. See the comment before the definition of
-         * invocation_count for an explanation.
-         */
-        pan_unpack(i, INVOCATION, invocation);
+   /* Decode invocation_count. See the comment before the definition of
+    * invocation_count for an explanation.
+    */
+   pan_unpack(i, INVOCATION, invocation);
 
-        unsigned size_x = bits(invocation.invocations, 0, invocation.size_y_shift) + 1;
-        unsigned size_y = bits(invocation.invocations, invocation.size_y_shift, invocation.size_z_shift) + 1;
-        unsigned size_z = bits(invocation.invocations, invocation.size_z_shift, invocation.workgroups_x_shift) + 1;
+   unsigned size_x =
+      bits(invocation.invocations, 0, invocation.size_y_shift) + 1;
+   unsigned size_y = bits(invocation.invocations, invocation.size_y_shift,
+                          invocation.size_z_shift) +
+                     1;
+   unsigned size_z = bits(invocation.invocations, invocation.size_z_shift,
+                          invocation.workgroups_x_shift) +
+                     1;
 
-        unsigned groups_x = bits(invocation.invocations, invocation.workgroups_x_shift, invocation.workgroups_y_shift) + 1;
-        unsigned groups_y = bits(invocation.invocations, invocation.workgroups_y_shift, invocation.workgroups_z_shift) + 1;
-        unsigned groups_z = bits(invocation.invocations, invocation.workgroups_z_shift, 32) + 1;
+   unsigned groups_x =
+      bits(invocation.invocations, invocation.workgroups_x_shift,
+           invocation.workgroups_y_shift) +
+      1;
+   unsigned groups_y =
+      bits(invocation.invocations, invocation.workgroups_y_shift,
+           invocation.workgroups_z_shift) +
+      1;
+   unsigned groups_z =
+      bits(invocation.invocations, invocation.workgroups_z_shift, 32) + 1;
 
-        pandecode_log("Invocation (%d, %d, %d) x (%d, %d, %d)\n",
-                      size_x, size_y, size_z,
-                      groups_x, groups_y, groups_z);
+   pandecode_log("Invocation (%d, %d, %d) x (%d, %d, %d)\n", size_x, size_y,
+                 size_z, groups_x, groups_y, groups_z);
 
-        DUMP_UNPACKED(INVOCATION, invocation, "Invocation:\n")
+   DUMP_UNPACKED(INVOCATION, invocation, "Invocation:\n")
 }
 #endif
 
 static void
 pandecode_primitive(const void *p)
 {
-        pan_unpack(p, PRIMITIVE, primitive);
-        DUMP_UNPACKED(PRIMITIVE, primitive, "Primitive:\n");
+   pan_unpack(p, PRIMITIVE, primitive);
+   DUMP_UNPACKED(PRIMITIVE, primitive, "Primitive:\n");
 
 #if PAN_ARCH <= 7
-        /* Validate an index buffer is present if we need one. TODO: verify
-         * relationship between invocation_count and index_count */
+   /* Validate an index buffer is present if we need one. TODO: verify
+    * relationship between invocation_count and index_count */
 
-        if (primitive.indices) {
-                /* Grab the size */
-                unsigned size = (primitive.index_type == MALI_INDEX_TYPE_UINT32) ?
-                        sizeof(uint32_t) : primitive.index_type;
+   if (primitive.indices) {
+      /* Grab the size */
+      unsigned size = (primitive.index_type == MALI_INDEX_TYPE_UINT32)
+                         ? sizeof(uint32_t)
+                         : primitive.index_type;
 
-                /* Ensure we got a size, and if so, validate the index buffer
-                 * is large enough to hold a full set of indices of the given
-                 * size */
+      /* Ensure we got a size, and if so, validate the index buffer
+       * is large enough to hold a full set of indices of the given
+       * size */
 
-                if (!size)
-                        pandecode_log("// XXX: index size missing\n");
-                else
-                        pandecode_validate_buffer(primitive.indices, primitive.index_count * size);
-        } else if (primitive.index_type)
-                pandecode_log("// XXX: unexpected index size\n");
+      if (!size)
+         pandecode_log("// XXX: index size missing\n");
+      else
+         pandecode_validate_buffer(primitive.indices,
+                                   primitive.index_count * size);
+   } else if (primitive.index_type)
+      pandecode_log("// XXX: unexpected index size\n");
 #endif
 }
 
 static void
 pandecode_primitive_size(const void *s, bool constant)
 {
-        pan_unpack(s, PRIMITIVE_SIZE, ps);
-        if (ps.size_array == 0x0)
-                return;
+   pan_unpack(s, PRIMITIVE_SIZE, ps);
+   if (ps.size_array == 0x0)
+      return;
 
-        DUMP_UNPACKED(PRIMITIVE_SIZE, ps, "Primitive Size:\n")
+   DUMP_UNPACKED(PRIMITIVE_SIZE, ps, "Primitive Size:\n")
 }
 
 #if PAN_ARCH <= 7
 static void
 pandecode_uniform_buffers(mali_ptr pubufs, int ubufs_count)
 {
-        uint64_t *PANDECODE_PTR_VAR(ubufs, pubufs);
+   uint64_t *PANDECODE_PTR_VAR(ubufs, pubufs);
 
-        for (int i = 0; i < ubufs_count; i++) {
-                mali_ptr addr = (ubufs[i] >> 10) << 2;
-                unsigned size = addr ? (((ubufs[i] & ((1 << 10) - 1)) + 1) * 16) : 0;
+   for (int i = 0; i < ubufs_count; i++) {
+      mali_ptr addr = (ubufs[i] >> 10) << 2;
+      unsigned size = addr ? (((ubufs[i] & ((1 << 10) - 1)) + 1) * 16) : 0;
 
-                pandecode_validate_buffer(addr, size);
+      pandecode_validate_buffer(addr, size);
 
-                char *ptr = pointer_as_memory_reference(addr);
-                pandecode_log("ubuf_%d[%u] = %s;\n", i, size, ptr);
-                free(ptr);
-        }
+      char *ptr = pointer_as_memory_reference(addr);
+      pandecode_log("ubuf_%d[%u] = %s;\n", i, size, ptr);
+      free(ptr);
+   }
 
-        pandecode_log("\n");
+   pandecode_log("\n");
 }
 
 static void
 pandecode_uniforms(mali_ptr uniforms, unsigned uniform_count)
 {
-        pandecode_validate_buffer(uniforms, uniform_count * 16);
+   pandecode_validate_buffer(uniforms, uniform_count * 16);
 
-        char *ptr = pointer_as_memory_reference(uniforms);
-        pandecode_log("vec4 uniforms[%u] = %s;\n", uniform_count, ptr);
-        free(ptr);
-        pandecode_log("\n");
+   char *ptr = pointer_as_memory_reference(uniforms);
+   pandecode_log("vec4 uniforms[%u] = %s;\n", uniform_count, ptr);
+   free(ptr);
+   pandecode_log("\n");
 }
 #endif
 
 static void
 pandecode_shader_disassemble(mali_ptr shader_ptr, int type, unsigned gpu_id)
 {
-        uint8_t *PANDECODE_PTR_VAR(code, shader_ptr);
+   uint8_t *PANDECODE_PTR_VAR(code, shader_ptr);
 
-        /* Compute maximum possible size */
-        struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing(shader_ptr);
-        size_t sz = mem->length - (shader_ptr - mem->gpu_va);
+   /* Compute maximum possible size */
+   struct pandecode_mapped_memory *mem =
+      pandecode_find_mapped_gpu_mem_containing(shader_ptr);
+   size_t sz = mem->length - (shader_ptr - mem->gpu_va);
 
-        /* Print some boilerplate to clearly denote the assembly (which doesn't
-         * obey indentation rules), and actually do the disassembly! */
+   /* Print some boilerplate to clearly denote the assembly (which doesn't
+    * obey indentation rules), and actually do the disassembly! */
 
-        pandecode_log_cont("\n\n");
+   pandecode_log_cont("\n\n");
 
 #if PAN_ARCH >= 9
-        disassemble_valhall(pandecode_dump_stream, (const uint64_t *) code, sz, true);
+   disassemble_valhall(pandecode_dump_stream, (const uint64_t *)code, sz, true);
 #elif PAN_ARCH >= 6 && PAN_ARCH <= 7
-        disassemble_bifrost(pandecode_dump_stream, code, sz, false);
+   disassemble_bifrost(pandecode_dump_stream, code, sz, false);
 #else
-        disassemble_midgard(pandecode_dump_stream, code, sz, gpu_id, true);
+   disassemble_midgard(pandecode_dump_stream, code, sz, gpu_id, true);
 #endif
 
-        pandecode_log_cont("\n\n");
+   pandecode_log_cont("\n\n");
 }
 
 #if PAN_ARCH <= 7
 static void
-pandecode_texture_payload(mali_ptr payload,
-                          enum mali_texture_dimension dim,
-                          enum mali_texture_layout layout,
-                          bool manual_stride,
-                          uint8_t levels,
-                          uint16_t nr_samples,
+pandecode_texture_payload(mali_ptr payload, enum mali_texture_dimension dim,
+                          enum mali_texture_layout layout, bool manual_stride,
+                          uint8_t levels, uint16_t nr_samples,
                           uint16_t array_size)
 {
-        pandecode_log(".payload = {\n");
-        pandecode_indent++;
+   pandecode_log(".payload = {\n");
+   pandecode_indent++;
 
-        /* A bunch of bitmap pointers follow.
-         * We work out the correct number,
-         * based on the mipmap/cubemap
-         * properties, but dump extra
-         * possibilities to futureproof */
+   /* A bunch of bitmap pointers follow.
+    * We work out the correct number,
+    * based on the mipmap/cubemap
+    * properties, but dump extra
+    * possibilities to futureproof */
 
-        int bitmap_count = levels;
+   int bitmap_count = levels;
 
-        /* Miptree for each face */
-        if (dim == MALI_TEXTURE_DIMENSION_CUBE)
-                bitmap_count *= 6;
+   /* Miptree for each face */
+   if (dim == MALI_TEXTURE_DIMENSION_CUBE)
+      bitmap_count *= 6;
 
-        /* Array of layers */
-        bitmap_count *= nr_samples;
+   /* Array of layers */
+   bitmap_count *= nr_samples;
 
-        /* Array of textures */
-        bitmap_count *= array_size;
+   /* Array of textures */
+   bitmap_count *= array_size;
 
-        /* Stride for each element */
-        if (manual_stride)
-                bitmap_count *= 2;
+   /* Stride for each element */
+   if (manual_stride)
+      bitmap_count *= 2;
 
-        mali_ptr *pointers_and_strides = pandecode_fetch_gpu_mem(payload,
-                        sizeof(mali_ptr) * bitmap_count);
-        for (int i = 0; i < bitmap_count; ++i) {
-                /* How we dump depends if this is a stride or a pointer */
+   mali_ptr *pointers_and_strides =
+      pandecode_fetch_gpu_mem(payload, sizeof(mali_ptr) * bitmap_count);
+   for (int i = 0; i < bitmap_count; ++i) {
+      /* How we dump depends if this is a stride or a pointer */
 
-                if (manual_stride && (i & 1)) {
-                        /* signed 32-bit snuck in as a 64-bit pointer */
-                        uint64_t stride_set = pointers_and_strides[i];
-                        int32_t row_stride = stride_set;
-                        int32_t surface_stride = stride_set >> 32;
-                        pandecode_log("(mali_ptr) %d /* surface stride */ %d /* row stride */, \n",
-                                      surface_stride, row_stride);
-                } else {
-                        char *a = pointer_as_memory_reference(pointers_and_strides[i]);
-                        pandecode_log("%s, \n", a);
-                        free(a);
-                }
-        }
+      if (manual_stride && (i & 1)) {
+         /* signed 32-bit snuck in as a 64-bit pointer */
+         uint64_t stride_set = pointers_and_strides[i];
+         int32_t row_stride = stride_set;
+         int32_t surface_stride = stride_set >> 32;
+         pandecode_log(
+            "(mali_ptr) %d /* surface stride */ %d /* row stride */, \n",
+            surface_stride, row_stride);
+      } else {
+         char *a = pointer_as_memory_reference(pointers_and_strides[i]);
+         pandecode_log("%s, \n", a);
+         free(a);
+      }
+   }
 
-        pandecode_indent--;
-        pandecode_log("},\n");
+   pandecode_indent--;
+   pandecode_log("},\n");
 }
 #endif
 
@@ -572,45 +591,45 @@ pandecode_texture_payload(mali_ptr payload,
 static void
 pandecode_texture(mali_ptr u, unsigned tex)
 {
-        const uint8_t *cl = pandecode_fetch_gpu_mem(u, pan_size(TEXTURE));
+   const uint8_t *cl = pandecode_fetch_gpu_mem(u, pan_size(TEXTURE));
 
-        pan_unpack(cl, TEXTURE, temp);
-        DUMP_UNPACKED(TEXTURE, temp, "Texture:\n")
+   pan_unpack(cl, TEXTURE, temp);
+   DUMP_UNPACKED(TEXTURE, temp, "Texture:\n")
 
-        pandecode_indent++;
-        unsigned nr_samples = temp.dimension == MALI_TEXTURE_DIMENSION_3D ?
-                              1 : temp.sample_count;
-        pandecode_texture_payload(u + pan_size(TEXTURE),
-                        temp.dimension, temp.texel_ordering, temp.manual_stride,
-                        temp.levels, nr_samples, temp.array_size);
-        pandecode_indent--;
+   pandecode_indent++;
+   unsigned nr_samples =
+      temp.dimension == MALI_TEXTURE_DIMENSION_3D ? 1 : temp.sample_count;
+   pandecode_texture_payload(u + pan_size(TEXTURE), temp.dimension,
+                             temp.texel_ordering, temp.manual_stride,
+                             temp.levels, nr_samples, temp.array_size);
+   pandecode_indent--;
 }
 #else
 static void
 pandecode_texture(const void *cl, unsigned tex)
 {
-        pan_unpack(cl, TEXTURE, temp);
-        DUMP_UNPACKED(TEXTURE, temp, "Texture:\n")
+   pan_unpack(cl, TEXTURE, temp);
+   DUMP_UNPACKED(TEXTURE, temp, "Texture:\n")
 
-        pandecode_indent++;
+   pandecode_indent++;
 
 #if PAN_ARCH >= 9
-        int plane_count = temp.levels * temp.array_size;
+   int plane_count = temp.levels * temp.array_size;
 
-        /* Miptree for each face */
-        if (temp.dimension == MALI_TEXTURE_DIMENSION_CUBE)
-                plane_count *= 6;
+   /* Miptree for each face */
+   if (temp.dimension == MALI_TEXTURE_DIMENSION_CUBE)
+      plane_count *= 6;
 
-        for (unsigned i = 0; i < plane_count; ++i)
-                DUMP_ADDR(PLANE, temp.surfaces + i * pan_size(PLANE), "Plane %u:\n", i);
+   for (unsigned i = 0; i < plane_count; ++i)
+      DUMP_ADDR(PLANE, temp.surfaces + i * pan_size(PLANE), "Plane %u:\n", i);
 #else
-        unsigned nr_samples = temp.dimension == MALI_TEXTURE_DIMENSION_3D ?
-                              1 : temp.sample_count;
+   unsigned nr_samples =
+      temp.dimension == MALI_TEXTURE_DIMENSION_3D ? 1 : temp.sample_count;
 
-        pandecode_texture_payload(temp.surfaces, temp.dimension, temp.texel_ordering,
-                                  true, temp.levels, nr_samples, temp.array_size);
+   pandecode_texture_payload(temp.surfaces, temp.dimension, temp.texel_ordering,
+                             true, temp.levels, nr_samples, temp.array_size);
 #endif
-        pandecode_indent--;
+   pandecode_indent--;
 }
 #endif
 
@@ -618,49 +637,50 @@ pandecode_texture(const void *cl, unsigned tex)
 static void
 pandecode_textures(mali_ptr textures, unsigned texture_count)
 {
-        if (!textures)
-                return;
+   if (!textures)
+      return;
 
-        pandecode_log("Textures %"PRIx64":\n", textures);
-        pandecode_indent++;
+   pandecode_log("Textures %" PRIx64 ":\n", textures);
+   pandecode_indent++;
 
 #if PAN_ARCH >= 6
-        const void *cl = pandecode_fetch_gpu_mem(textures, pan_size(TEXTURE) *
-                                                           texture_count);
+   const void *cl =
+      pandecode_fetch_gpu_mem(textures, pan_size(TEXTURE) * texture_count);
 
-        for (unsigned tex = 0; tex < texture_count; ++tex)
-                pandecode_texture(cl + pan_size(TEXTURE) * tex, tex);
+   for (unsigned tex = 0; tex < texture_count; ++tex)
+      pandecode_texture(cl + pan_size(TEXTURE) * tex, tex);
 #else
-        mali_ptr *PANDECODE_PTR_VAR(u, textures);
+   mali_ptr *PANDECODE_PTR_VAR(u, textures);
 
-        for (int tex = 0; tex < texture_count; ++tex) {
-                mali_ptr *PANDECODE_PTR_VAR(u, textures + tex * sizeof(mali_ptr));
-                char *a = pointer_as_memory_reference(*u);
-                pandecode_log("%s,\n", a);
-                free(a);
-        }
+   for (int tex = 0; tex < texture_count; ++tex) {
+      mali_ptr *PANDECODE_PTR_VAR(u, textures + tex * sizeof(mali_ptr));
+      char *a = pointer_as_memory_reference(*u);
+      pandecode_log("%s,\n", a);
+      free(a);
+   }
 
-        /* Now, finally, descend down into the texture descriptor */
-        for (unsigned tex = 0; tex < texture_count; ++tex) {
-                mali_ptr *PANDECODE_PTR_VAR(u, textures + tex * sizeof(mali_ptr));
-                pandecode_texture(*u, tex);
-        }
+   /* Now, finally, descend down into the texture descriptor */
+   for (unsigned tex = 0; tex < texture_count; ++tex) {
+      mali_ptr *PANDECODE_PTR_VAR(u, textures + tex * sizeof(mali_ptr));
+      pandecode_texture(*u, tex);
+   }
 #endif
-        pandecode_indent--;
-        pandecode_log("\n");
+   pandecode_indent--;
+   pandecode_log("\n");
 }
 
 static void
 pandecode_samplers(mali_ptr samplers, unsigned sampler_count)
 {
-        pandecode_log("Samplers %"PRIx64":\n", samplers);
-        pandecode_indent++;
+   pandecode_log("Samplers %" PRIx64 ":\n", samplers);
+   pandecode_indent++;
 
-        for (int i = 0; i < sampler_count; ++i)
-                DUMP_ADDR(SAMPLER, samplers + (pan_size(SAMPLER) * i), "Sampler %d:\n", i);
+   for (int i = 0; i < sampler_count; ++i)
+      DUMP_ADDR(SAMPLER, samplers + (pan_size(SAMPLER) * i), "Sampler %d:\n",
+                i);
 
-        pandecode_indent--;
-        pandecode_log("\n");
+   pandecode_indent--;
+   pandecode_log("\n");
 }
 
 static void
@@ -668,144 +688,147 @@ pandecode_dcd(const struct MALI_DRAW *p, enum mali_job_type job_type,
               unsigned gpu_id)
 {
 #if PAN_ARCH >= 5
-        struct pandecode_fbd fbd_info = {
-                .rt_count = 1
-        };
+   struct pandecode_fbd fbd_info = {.rt_count = 1};
 #endif
 
-        if (PAN_ARCH >= 6 || (PAN_ARCH == 5 && job_type != MALI_JOB_TYPE_TILER)) {
+   if (PAN_ARCH >= 6 || (PAN_ARCH == 5 && job_type != MALI_JOB_TYPE_TILER)) {
 #if PAN_ARCH >= 5
-                pandecode_local_storage(p->thread_storage & ~1);
+      pandecode_local_storage(p->thread_storage & ~1);
 #endif
-	} else {
+   } else {
 #if PAN_ARCH <= 5
-                pandecode_fbd(p->fbd, false, gpu_id);
+      pandecode_fbd(p->fbd, false, gpu_id);
 #endif
-        }
+   }
 
-        int varying_count = 0, attribute_count = 0, uniform_count = 0, uniform_buffer_count = 0;
-        int texture_count = 0, sampler_count = 0;
+   int varying_count = 0, attribute_count = 0, uniform_count = 0,
+       uniform_buffer_count = 0;
+   int texture_count = 0, sampler_count = 0;
 
-        if (p->state) {
-                uint32_t *cl = pandecode_fetch_gpu_mem(p->state, pan_size(RENDERER_STATE));
+   if (p->state) {
+      uint32_t *cl =
+         pandecode_fetch_gpu_mem(p->state, pan_size(RENDERER_STATE));
 
-                pan_unpack(cl, RENDERER_STATE, state);
+      pan_unpack(cl, RENDERER_STATE, state);
 
-                if (state.shader.shader & ~0xF)
-                        pandecode_shader_disassemble(state.shader.shader & ~0xF, job_type, gpu_id);
+      if (state.shader.shader & ~0xF)
+         pandecode_shader_disassemble(state.shader.shader & ~0xF, job_type,
+                                      gpu_id);
 
 #if PAN_ARCH >= 6
-                bool idvs = (job_type == MALI_JOB_TYPE_INDEXED_VERTEX);
+      bool idvs = (job_type == MALI_JOB_TYPE_INDEXED_VERTEX);
 
-                if (idvs && state.secondary_shader)
-                        pandecode_shader_disassemble(state.secondary_shader, job_type, gpu_id);
+      if (idvs && state.secondary_shader)
+         pandecode_shader_disassemble(state.secondary_shader, job_type, gpu_id);
 #endif
-                DUMP_UNPACKED(RENDERER_STATE, state, "State:\n");
-                pandecode_indent++;
+      DUMP_UNPACKED(RENDERER_STATE, state, "State:\n");
+      pandecode_indent++;
 
-                /* Save for dumps */
-                attribute_count = state.shader.attribute_count;
-                varying_count = state.shader.varying_count;
-                texture_count = state.shader.texture_count;
-                sampler_count = state.shader.sampler_count;
-                uniform_buffer_count = state.properties.uniform_buffer_count;
+      /* Save for dumps */
+      attribute_count = state.shader.attribute_count;
+      varying_count = state.shader.varying_count;
+      texture_count = state.shader.texture_count;
+      sampler_count = state.shader.sampler_count;
+      uniform_buffer_count = state.properties.uniform_buffer_count;
 
 #if PAN_ARCH >= 6
-                uniform_count = state.preload.uniform_count;
+      uniform_count = state.preload.uniform_count;
 #else
-                uniform_count = state.properties.uniform_count;
+      uniform_count = state.properties.uniform_count;
 #endif
 
 #if PAN_ARCH == 4
-                mali_ptr shader = state.blend_shader & ~0xF;
-                if (state.multisample_misc.blend_shader && shader)
-                        pandecode_shader_disassemble(shader, job_type, gpu_id);
+      mali_ptr shader = state.blend_shader & ~0xF;
+      if (state.multisample_misc.blend_shader && shader)
+         pandecode_shader_disassemble(shader, job_type, gpu_id);
 #endif
-                pandecode_indent--;
-                pandecode_log("\n");
+      pandecode_indent--;
+      pandecode_log("\n");
 
-                /* MRT blend fields are used whenever MFBD is used, with
-                 * per-RT descriptors */
+      /* MRT blend fields are used whenever MFBD is used, with
+       * per-RT descriptors */
 
 #if PAN_ARCH >= 5
-                if ((job_type == MALI_JOB_TYPE_TILER || job_type == MALI_JOB_TYPE_FRAGMENT) &&
-                    (PAN_ARCH >= 6 || p->thread_storage & MALI_FBD_TAG_IS_MFBD)) {
-                        void* blend_base = ((void *) cl) + pan_size(RENDERER_STATE);
+      if ((job_type == MALI_JOB_TYPE_TILER ||
+           job_type == MALI_JOB_TYPE_FRAGMENT) &&
+          (PAN_ARCH >= 6 || p->thread_storage & MALI_FBD_TAG_IS_MFBD)) {
+         void *blend_base = ((void *)cl) + pan_size(RENDERER_STATE);
 
-                        for (unsigned i = 0; i < fbd_info.rt_count; i++) {
-                                mali_ptr shader =
-                                        pandecode_blend(blend_base, i,
-                                                        state.shader.shader);
-                                if (shader & ~0xF)
-                                        pandecode_shader_disassemble(shader, job_type,
-                                                                           gpu_id);
-                        }
-                }
+         for (unsigned i = 0; i < fbd_info.rt_count; i++) {
+            mali_ptr shader =
+               pandecode_blend(blend_base, i, state.shader.shader);
+            if (shader & ~0xF)
+               pandecode_shader_disassemble(shader, job_type, gpu_id);
+         }
+      }
 #endif
-        } else
-                pandecode_log("// XXX: missing shader descriptor\n");
+   } else
+      pandecode_log("// XXX: missing shader descriptor\n");
 
-        if (p->viewport) {
-                DUMP_ADDR(VIEWPORT, p->viewport, "Viewport:\n");
-                pandecode_log("\n");
-        }
+   if (p->viewport) {
+      DUMP_ADDR(VIEWPORT, p->viewport, "Viewport:\n");
+      pandecode_log("\n");
+   }
 
-        unsigned max_attr_index = 0;
+   unsigned max_attr_index = 0;
 
-        if (p->attributes)
-                max_attr_index = pandecode_attribute_meta(attribute_count, p->attributes, false);
+   if (p->attributes)
+      max_attr_index =
+         pandecode_attribute_meta(attribute_count, p->attributes, false);
 
-        if (p->attribute_buffers)
-                pandecode_attributes(p->attribute_buffers, max_attr_index, false, job_type);
+   if (p->attribute_buffers)
+      pandecode_attributes(p->attribute_buffers, max_attr_index, false,
+                           job_type);
 
-        if (p->varyings) {
-                varying_count = pandecode_attribute_meta(varying_count, p->varyings, true);
-        }
+   if (p->varyings) {
+      varying_count =
+         pandecode_attribute_meta(varying_count, p->varyings, true);
+   }
 
-        if (p->varying_buffers)
-                pandecode_attributes(p->varying_buffers, varying_count, true, job_type);
+   if (p->varying_buffers)
+      pandecode_attributes(p->varying_buffers, varying_count, true, job_type);
 
-        if (p->uniform_buffers) {
-                if (uniform_buffer_count)
-                        pandecode_uniform_buffers(p->uniform_buffers, uniform_buffer_count);
-                else
-                        pandecode_log("// warn: UBOs specified but not referenced\n");
-        } else if (uniform_buffer_count)
-                pandecode_log("// XXX: UBOs referenced but not specified\n");
+   if (p->uniform_buffers) {
+      if (uniform_buffer_count)
+         pandecode_uniform_buffers(p->uniform_buffers, uniform_buffer_count);
+      else
+         pandecode_log("// warn: UBOs specified but not referenced\n");
+   } else if (uniform_buffer_count)
+      pandecode_log("// XXX: UBOs referenced but not specified\n");
 
-        /* We don't want to actually dump uniforms, but we do need to validate
-         * that the counts we were given are sane */
+   /* We don't want to actually dump uniforms, but we do need to validate
+    * that the counts we were given are sane */
 
-        if (p->push_uniforms) {
-                if (uniform_count)
-                        pandecode_uniforms(p->push_uniforms, uniform_count);
-                else
-                        pandecode_log("// warn: Uniforms specified but not referenced\n");
-        } else if (uniform_count)
-                pandecode_log("// XXX: Uniforms referenced but not specified\n");
+   if (p->push_uniforms) {
+      if (uniform_count)
+         pandecode_uniforms(p->push_uniforms, uniform_count);
+      else
+         pandecode_log("// warn: Uniforms specified but not referenced\n");
+   } else if (uniform_count)
+      pandecode_log("// XXX: Uniforms referenced but not specified\n");
 
-        if (p->textures)
-                pandecode_textures(p->textures, texture_count);
+   if (p->textures)
+      pandecode_textures(p->textures, texture_count);
 
-        if (p->samplers)
-                pandecode_samplers(p->samplers, sampler_count);
+   if (p->samplers)
+      pandecode_samplers(p->samplers, sampler_count);
 }
 
 static void
 pandecode_vertex_compute_geometry_job(const struct MALI_JOB_HEADER *h,
                                       mali_ptr job, unsigned gpu_id)
 {
-        struct mali_compute_job_packed *PANDECODE_PTR_VAR(p, job);
-        pan_section_unpack(p, COMPUTE_JOB, DRAW, draw);
-        pandecode_dcd(&draw, h->type, gpu_id);
+   struct mali_compute_job_packed *PANDECODE_PTR_VAR(p, job);
+   pan_section_unpack(p, COMPUTE_JOB, DRAW, draw);
+   pandecode_dcd(&draw, h->type, gpu_id);
 
-        pandecode_log("Vertex Job Payload:\n");
-        pandecode_indent++;
-        pandecode_invocation(pan_section_ptr(p, COMPUTE_JOB, INVOCATION));
-        DUMP_SECTION(COMPUTE_JOB, PARAMETERS, p, "Vertex Job Parameters:\n");
-        DUMP_UNPACKED(DRAW, draw, "Draw:\n");
-        pandecode_indent--;
-        pandecode_log("\n");
+   pandecode_log("Vertex Job Payload:\n");
+   pandecode_indent++;
+   pandecode_invocation(pan_section_ptr(p, COMPUTE_JOB, INVOCATION));
+   DUMP_SECTION(COMPUTE_JOB, PARAMETERS, p, "Vertex Job Parameters:\n");
+   DUMP_UNPACKED(DRAW, draw, "Draw:\n");
+   pandecode_indent--;
+   pandecode_log("\n");
 }
 #endif
 
@@ -813,318 +836,324 @@ pandecode_vertex_compute_geometry_job(const struct MALI_JOB_HEADER *h,
 static void
 pandecode_tiler(mali_ptr gpu_va)
 {
-        pan_unpack(PANDECODE_PTR(gpu_va, void), TILER_CONTEXT, t);
+   pan_unpack(PANDECODE_PTR(gpu_va, void), TILER_CONTEXT, t);
 
-        if (t.heap) {
-                pan_unpack(PANDECODE_PTR(t.heap, void), TILER_HEAP, h);
-                DUMP_UNPACKED(TILER_HEAP, h, "Tiler Heap:\n");
-        }
+   if (t.heap) {
+      pan_unpack(PANDECODE_PTR(t.heap, void), TILER_HEAP, h);
+      DUMP_UNPACKED(TILER_HEAP, h, "Tiler Heap:\n");
+   }
 
-        DUMP_UNPACKED(TILER_CONTEXT, t, "Tiler:\n");
+   DUMP_UNPACKED(TILER_CONTEXT, t, "Tiler:\n");
 }
 
 #if PAN_ARCH <= 7
 static void
-pandecode_indexed_vertex_job(const struct MALI_JOB_HEADER *h,
-                             mali_ptr job, unsigned gpu_id)
+pandecode_indexed_vertex_job(const struct MALI_JOB_HEADER *h, mali_ptr job,
+                             unsigned gpu_id)
 {
-        struct mali_indexed_vertex_job_packed *PANDECODE_PTR_VAR(p, job);
+   struct mali_indexed_vertex_job_packed *PANDECODE_PTR_VAR(p, job);
 
-        pandecode_log("Vertex:\n");
-        pan_section_unpack(p, INDEXED_VERTEX_JOB, VERTEX_DRAW, vert_draw);
-        pandecode_dcd(&vert_draw, h->type, gpu_id);
-        DUMP_UNPACKED(DRAW, vert_draw, "Vertex Draw:\n");
+   pandecode_log("Vertex:\n");
+   pan_section_unpack(p, INDEXED_VERTEX_JOB, VERTEX_DRAW, vert_draw);
+   pandecode_dcd(&vert_draw, h->type, gpu_id);
+   DUMP_UNPACKED(DRAW, vert_draw, "Vertex Draw:\n");
 
-        pandecode_log("Fragment:\n");
-        pan_section_unpack(p, INDEXED_VERTEX_JOB, FRAGMENT_DRAW, frag_draw);
-        pandecode_dcd(&frag_draw, MALI_JOB_TYPE_FRAGMENT, gpu_id);
-        DUMP_UNPACKED(DRAW, frag_draw, "Fragment Draw:\n");
+   pandecode_log("Fragment:\n");
+   pan_section_unpack(p, INDEXED_VERTEX_JOB, FRAGMENT_DRAW, frag_draw);
+   pandecode_dcd(&frag_draw, MALI_JOB_TYPE_FRAGMENT, gpu_id);
+   DUMP_UNPACKED(DRAW, frag_draw, "Fragment Draw:\n");
 
-        pan_section_unpack(p, INDEXED_VERTEX_JOB, TILER, tiler_ptr);
-        pandecode_log("Tiler Job Payload:\n");
-        pandecode_indent++;
-        pandecode_tiler(tiler_ptr.address);
-        pandecode_indent--;
+   pan_section_unpack(p, INDEXED_VERTEX_JOB, TILER, tiler_ptr);
+   pandecode_log("Tiler Job Payload:\n");
+   pandecode_indent++;
+   pandecode_tiler(tiler_ptr.address);
+   pandecode_indent--;
 
-        pandecode_invocation(pan_section_ptr(p, INDEXED_VERTEX_JOB, INVOCATION));
-        pandecode_primitive(pan_section_ptr(p, INDEXED_VERTEX_JOB, PRIMITIVE));
+   pandecode_invocation(pan_section_ptr(p, INDEXED_VERTEX_JOB, INVOCATION));
+   pandecode_primitive(pan_section_ptr(p, INDEXED_VERTEX_JOB, PRIMITIVE));
 
-        /* TODO: gl_PointSize on Bifrost */
-        pandecode_primitive_size(pan_section_ptr(p, INDEXED_VERTEX_JOB, PRIMITIVE_SIZE), true);
+   /* TODO: gl_PointSize on Bifrost */
+   pandecode_primitive_size(
+      pan_section_ptr(p, INDEXED_VERTEX_JOB, PRIMITIVE_SIZE), true);
 
-        pan_section_unpack(p, INDEXED_VERTEX_JOB, PADDING, padding);
+   pan_section_unpack(p, INDEXED_VERTEX_JOB, PADDING, padding);
 }
 #endif
 #endif
 
 static void
-pandecode_tiler_job(const struct MALI_JOB_HEADER *h,
-                    mali_ptr job, unsigned gpu_id)
+pandecode_tiler_job(const struct MALI_JOB_HEADER *h, mali_ptr job,
+                    unsigned gpu_id)
 {
-        struct mali_tiler_job_packed *PANDECODE_PTR_VAR(p, job);
-        pan_section_unpack(p, TILER_JOB, DRAW, draw);
-        pandecode_dcd(&draw, h->type, gpu_id);
-        pandecode_log("Tiler Job Payload:\n");
-        pandecode_indent++;
+   struct mali_tiler_job_packed *PANDECODE_PTR_VAR(p, job);
+   pan_section_unpack(p, TILER_JOB, DRAW, draw);
+   pandecode_dcd(&draw, h->type, gpu_id);
+   pandecode_log("Tiler Job Payload:\n");
+   pandecode_indent++;
 
 #if PAN_ARCH <= 7
-        pandecode_invocation(pan_section_ptr(p, TILER_JOB, INVOCATION));
+   pandecode_invocation(pan_section_ptr(p, TILER_JOB, INVOCATION));
 #endif
 
-        pandecode_primitive(pan_section_ptr(p, TILER_JOB, PRIMITIVE));
-        DUMP_UNPACKED(DRAW, draw, "Draw:\n");
+   pandecode_primitive(pan_section_ptr(p, TILER_JOB, PRIMITIVE));
+   DUMP_UNPACKED(DRAW, draw, "Draw:\n");
 
 #if PAN_ARCH >= 6
-        pan_section_unpack(p, TILER_JOB, TILER, tiler_ptr);
-        pandecode_tiler(tiler_ptr.address);
+   pan_section_unpack(p, TILER_JOB, TILER, tiler_ptr);
+   pandecode_tiler(tiler_ptr.address);
 
-        /* TODO: gl_PointSize on Bifrost */
-        pandecode_primitive_size(pan_section_ptr(p, TILER_JOB, PRIMITIVE_SIZE), true);
+   /* TODO: gl_PointSize on Bifrost */
+   pandecode_primitive_size(pan_section_ptr(p, TILER_JOB, PRIMITIVE_SIZE),
+                            true);
 
 #if PAN_ARCH >= 9
-        DUMP_SECTION(TILER_JOB, INSTANCE_COUNT, p, "Instance count:\n");
-        DUMP_SECTION(TILER_JOB, VERTEX_COUNT, p, "Vertex count:\n");
-        DUMP_SECTION(TILER_JOB, SCISSOR, p, "Scissor:\n");
-        DUMP_SECTION(TILER_JOB, INDICES, p, "Indices:\n");
+   DUMP_SECTION(TILER_JOB, INSTANCE_COUNT, p, "Instance count:\n");
+   DUMP_SECTION(TILER_JOB, VERTEX_COUNT, p, "Vertex count:\n");
+   DUMP_SECTION(TILER_JOB, SCISSOR, p, "Scissor:\n");
+   DUMP_SECTION(TILER_JOB, INDICES, p, "Indices:\n");
 #else
-        pan_section_unpack(p, TILER_JOB, PADDING, padding);
+   pan_section_unpack(p, TILER_JOB, PADDING, padding);
 #endif
 
 #else
-        pan_section_unpack(p, TILER_JOB, PRIMITIVE, primitive);
-        pandecode_primitive_size(pan_section_ptr(p, TILER_JOB, PRIMITIVE_SIZE),
-                                 primitive.point_size_array_format == MALI_POINT_SIZE_ARRAY_FORMAT_NONE);
+   pan_section_unpack(p, TILER_JOB, PRIMITIVE, primitive);
+   pandecode_primitive_size(
+      pan_section_ptr(p, TILER_JOB, PRIMITIVE_SIZE),
+      primitive.point_size_array_format == MALI_POINT_SIZE_ARRAY_FORMAT_NONE);
 #endif
-        pandecode_indent--;
-        pandecode_log("\n");
+   pandecode_indent--;
+   pandecode_log("\n");
 }
 
 static void
 pandecode_fragment_job(mali_ptr job, unsigned gpu_id)
 {
-        struct mali_fragment_job_packed *PANDECODE_PTR_VAR(p, job);
-        pan_section_unpack(p, FRAGMENT_JOB, PAYLOAD, s);
+   struct mali_fragment_job_packed *PANDECODE_PTR_VAR(p, job);
+   pan_section_unpack(p, FRAGMENT_JOB, PAYLOAD, s);
 
-        UNUSED struct pandecode_fbd info = pandecode_fbd(s.framebuffer, true, gpu_id);
+   UNUSED struct pandecode_fbd info =
+      pandecode_fbd(s.framebuffer, true, gpu_id);
 
 #if PAN_ARCH >= 5
-        unsigned expected_tag = 0;
+   unsigned expected_tag = 0;
 
-        /* Compute the tag for the tagged pointer. This contains the type of
-         * FBD (MFBD/SFBD), and in the case of an MFBD, information about which
-         * additional structures follow the MFBD header (an extra payload or
-         * not, as well as a count of render targets) */
+   /* Compute the tag for the tagged pointer. This contains the type of
+    * FBD (MFBD/SFBD), and in the case of an MFBD, information about which
+    * additional structures follow the MFBD header (an extra payload or
+    * not, as well as a count of render targets) */
 
-        expected_tag = MALI_FBD_TAG_IS_MFBD;
-        if (info.has_extra)
-                expected_tag |= MALI_FBD_TAG_HAS_ZS_RT;
+   expected_tag = MALI_FBD_TAG_IS_MFBD;
+   if (info.has_extra)
+      expected_tag |= MALI_FBD_TAG_HAS_ZS_RT;
 
-        expected_tag |= MALI_FBD_TAG_IS_MFBD | (MALI_POSITIVE(info.rt_count) << 2);
+   expected_tag |= MALI_FBD_TAG_IS_MFBD | (MALI_POSITIVE(info.rt_count) << 2);
 #endif
 
-        DUMP_UNPACKED(FRAGMENT_JOB_PAYLOAD, s, "Fragment Job Payload:\n");
+   DUMP_UNPACKED(FRAGMENT_JOB_PAYLOAD, s, "Fragment Job Payload:\n");
 
 #if PAN_ARCH >= 5
-        /* The FBD is a tagged pointer */
+   /* The FBD is a tagged pointer */
 
-        unsigned tag = (s.framebuffer & MALI_FBD_TAG_MASK);
+   unsigned tag = (s.framebuffer & MALI_FBD_TAG_MASK);
 
-        if (tag != expected_tag)
-                pandecode_log("// XXX: expected FBD tag %X but got %X\n", expected_tag, tag);
+   if (tag != expected_tag)
+      pandecode_log("// XXX: expected FBD tag %X but got %X\n", expected_tag,
+                    tag);
 #endif
 
-        pandecode_log("\n");
+   pandecode_log("\n");
 }
 
 static void
 pandecode_write_value_job(mali_ptr job)
 {
-        struct mali_write_value_job_packed *PANDECODE_PTR_VAR(p, job);
-        pan_section_unpack(p, WRITE_VALUE_JOB, PAYLOAD, u);
-        DUMP_SECTION(WRITE_VALUE_JOB, PAYLOAD, p, "Write Value Payload:\n");
-        pandecode_log("\n");
+   struct mali_write_value_job_packed *PANDECODE_PTR_VAR(p, job);
+   pan_section_unpack(p, WRITE_VALUE_JOB, PAYLOAD, u);
+   DUMP_SECTION(WRITE_VALUE_JOB, PAYLOAD, p, "Write Value Payload:\n");
+   pandecode_log("\n");
 }
 
 static void
 pandecode_cache_flush_job(mali_ptr job)
 {
-        struct mali_cache_flush_job_packed *PANDECODE_PTR_VAR(p, job);
-        pan_section_unpack(p, CACHE_FLUSH_JOB, PAYLOAD, u);
-        DUMP_SECTION(CACHE_FLUSH_JOB, PAYLOAD, p, "Cache Flush Payload:\n");
-        pandecode_log("\n");
+   struct mali_cache_flush_job_packed *PANDECODE_PTR_VAR(p, job);
+   pan_section_unpack(p, CACHE_FLUSH_JOB, PAYLOAD, u);
+   DUMP_SECTION(CACHE_FLUSH_JOB, PAYLOAD, p, "Cache Flush Payload:\n");
+   pandecode_log("\n");
 }
 
 #if PAN_ARCH >= 9
 static void
 dump_fau(mali_ptr addr, unsigned count, const char *name)
 {
-        const uint32_t *PANDECODE_PTR_VAR(raw, addr);
+   const uint32_t *PANDECODE_PTR_VAR(raw, addr);
 
-        pandecode_validate_buffer(addr, count * 8);
+   pandecode_validate_buffer(addr, count * 8);
 
-        fprintf(pandecode_dump_stream, "%s:\n", name);
-        for (unsigned i = 0; i < count; ++i) {
-                fprintf(pandecode_dump_stream, "  %08X %08X\n",
-                                raw[2*i], raw[2*i + 1]);
-        }
-        fprintf(pandecode_dump_stream, "\n");
+   fprintf(pandecode_dump_stream, "%s:\n", name);
+   for (unsigned i = 0; i < count; ++i) {
+      fprintf(pandecode_dump_stream, "  %08X %08X\n", raw[2 * i],
+              raw[2 * i + 1]);
+   }
+   fprintf(pandecode_dump_stream, "\n");
 }
 
 static mali_ptr
 pandecode_shader(mali_ptr addr, const char *label, unsigned gpu_id)
 {
-        MAP_ADDR(SHADER_PROGRAM, addr, cl);
-        pan_unpack(cl, SHADER_PROGRAM, desc);
+   MAP_ADDR(SHADER_PROGRAM, addr, cl);
+   pan_unpack(cl, SHADER_PROGRAM, desc);
 
-        assert(desc.type == 8);
+   assert(desc.type == 8);
 
-        DUMP_UNPACKED(SHADER_PROGRAM, desc, "%s Shader:\n", label);
-        pandecode_shader_disassemble(desc.binary, 0, gpu_id);
-        return desc.binary;
+   DUMP_UNPACKED(SHADER_PROGRAM, desc, "%s Shader:\n", label);
+   pandecode_shader_disassemble(desc.binary, 0, gpu_id);
+   return desc.binary;
 }
 
 static void
 pandecode_resources(mali_ptr addr, unsigned size)
 {
-        const uint8_t *cl = pandecode_fetch_gpu_mem(addr, size);
-        assert((size % 0x20) == 0);
+   const uint8_t *cl = pandecode_fetch_gpu_mem(addr, size);
+   assert((size % 0x20) == 0);
 
-        for (unsigned i = 0; i < size; i += 0x20) {
-                unsigned type = (cl[i] & 0xF);
+   for (unsigned i = 0; i < size; i += 0x20) {
+      unsigned type = (cl[i] & 0xF);
 
-                switch (type) {
-                case MALI_DESCRIPTOR_TYPE_SAMPLER:
-                        DUMP_CL(SAMPLER, cl + i, "Sampler:\n");
-                        break;
-                case MALI_DESCRIPTOR_TYPE_TEXTURE:
-                        pandecode_texture(cl + i, i);
-                        break;
-                case MALI_DESCRIPTOR_TYPE_ATTRIBUTE:
-                        DUMP_CL(ATTRIBUTE, cl + i, "Attribute:\n");
-                        break;
-                case MALI_DESCRIPTOR_TYPE_BUFFER:
-                        DUMP_CL(BUFFER, cl + i, "Buffer:\n");
-                        break;
-                default:
-                        fprintf(pandecode_dump_stream, "Unknown descriptor type %X\n", type);
-                        break;
-                }
-        }
+      switch (type) {
+      case MALI_DESCRIPTOR_TYPE_SAMPLER:
+         DUMP_CL(SAMPLER, cl + i, "Sampler:\n");
+         break;
+      case MALI_DESCRIPTOR_TYPE_TEXTURE:
+         pandecode_texture(cl + i, i);
+         break;
+      case MALI_DESCRIPTOR_TYPE_ATTRIBUTE:
+         DUMP_CL(ATTRIBUTE, cl + i, "Attribute:\n");
+         break;
+      case MALI_DESCRIPTOR_TYPE_BUFFER:
+         DUMP_CL(BUFFER, cl + i, "Buffer:\n");
+         break;
+      default:
+         fprintf(pandecode_dump_stream, "Unknown descriptor type %X\n", type);
+         break;
+      }
+   }
 }
 
 static void
 pandecode_resource_tables(mali_ptr addr, const char *label)
 {
-        unsigned count = addr & 0x3F;
-        addr = addr & ~0x3F;
+   unsigned count = addr & 0x3F;
+   addr = addr & ~0x3F;
 
-        const uint8_t *cl = pandecode_fetch_gpu_mem(addr, MALI_RESOURCE_LENGTH * count);
+   const uint8_t *cl =
+      pandecode_fetch_gpu_mem(addr, MALI_RESOURCE_LENGTH * count);
 
-        for (unsigned i = 0; i < count; ++i) {
-                pan_unpack(cl + i * MALI_RESOURCE_LENGTH, RESOURCE, entry);
-                DUMP_UNPACKED(RESOURCE, entry, "Entry %u:\n", i);
+   for (unsigned i = 0; i < count; ++i) {
+      pan_unpack(cl + i * MALI_RESOURCE_LENGTH, RESOURCE, entry);
+      DUMP_UNPACKED(RESOURCE, entry, "Entry %u:\n", i);
 
-                pandecode_indent += 2;
-                if (entry.address)
-                        pandecode_resources(entry.address, entry.size);
-                pandecode_indent -= 2;
-        }
+      pandecode_indent += 2;
+      if (entry.address)
+         pandecode_resources(entry.address, entry.size);
+      pandecode_indent -= 2;
+   }
 }
 
 static void
 pandecode_depth_stencil(mali_ptr addr)
 {
-        MAP_ADDR(DEPTH_STENCIL, addr, cl);
-        pan_unpack(cl, DEPTH_STENCIL, desc);
-        DUMP_UNPACKED(DEPTH_STENCIL, desc, "Depth/stencil");
+   MAP_ADDR(DEPTH_STENCIL, addr, cl);
+   pan_unpack(cl, DEPTH_STENCIL, desc);
+   DUMP_UNPACKED(DEPTH_STENCIL, desc, "Depth/stencil");
 }
 
 static void
 pandecode_shader_environment(const struct MALI_SHADER_ENVIRONMENT *p,
                              unsigned gpu_id)
 {
-        if (p->shader)
-                pandecode_shader(p->shader, "Shader", gpu_id);
+   if (p->shader)
+      pandecode_shader(p->shader, "Shader", gpu_id);
 
-        if (p->resources)
-                pandecode_resource_tables(p->resources, "Resources");
+   if (p->resources)
+      pandecode_resource_tables(p->resources, "Resources");
 
-        if (p->thread_storage)
-                pandecode_local_storage(p->thread_storage);
+   if (p->thread_storage)
+      pandecode_local_storage(p->thread_storage);
 
-        if (p->fau)
-                dump_fau(p->fau, p->fau_count, "FAU");
+   if (p->fau)
+      dump_fau(p->fau, p->fau_count, "FAU");
 }
 
 static void
 pandecode_dcd(const struct MALI_DRAW *p, enum mali_job_type job_type,
               unsigned gpu_id)
 {
-        mali_ptr frag_shader = 0;
+   mali_ptr frag_shader = 0;
 
-        pandecode_depth_stencil(p->depth_stencil);
+   pandecode_depth_stencil(p->depth_stencil);
 
-        for (unsigned i = 0; i < p->blend_count; ++i) {
-                struct mali_blend_packed *PANDECODE_PTR_VAR(blend_descs, p->blend);
+   for (unsigned i = 0; i < p->blend_count; ++i) {
+      struct mali_blend_packed *PANDECODE_PTR_VAR(blend_descs, p->blend);
 
-                mali_ptr blend_shader = pandecode_blend(blend_descs, i, frag_shader);
-                if (blend_shader) {
-                        fprintf(pandecode_dump_stream, "Blend shader %u", i);
-                        pandecode_shader_disassemble(blend_shader, 0, gpu_id);
-                }
-        }
+      mali_ptr blend_shader = pandecode_blend(blend_descs, i, frag_shader);
+      if (blend_shader) {
+         fprintf(pandecode_dump_stream, "Blend shader %u", i);
+         pandecode_shader_disassemble(blend_shader, 0, gpu_id);
+      }
+   }
 
-        pandecode_shader_environment(&p->shader, gpu_id);
-        DUMP_UNPACKED(DRAW, *p, "Draw:\n");
+   pandecode_shader_environment(&p->shader, gpu_id);
+   DUMP_UNPACKED(DRAW, *p, "Draw:\n");
 }
 
 static void
 pandecode_malloc_vertex_job(mali_ptr job, unsigned gpu_id)
 {
-        struct mali_malloc_vertex_job_packed *PANDECODE_PTR_VAR(p, job);
+   struct mali_malloc_vertex_job_packed *PANDECODE_PTR_VAR(p, job);
 
-        DUMP_SECTION(MALLOC_VERTEX_JOB, PRIMITIVE, p, "Primitive:\n");
-        DUMP_SECTION(MALLOC_VERTEX_JOB, INSTANCE_COUNT, p, "Instance count:\n");
-        DUMP_SECTION(MALLOC_VERTEX_JOB, ALLOCATION, p, "Allocation:\n");
-        DUMP_SECTION(MALLOC_VERTEX_JOB, TILER, p, "Tiler:\n");
-        DUMP_SECTION(MALLOC_VERTEX_JOB, SCISSOR, p, "Scissor:\n");
-        DUMP_SECTION(MALLOC_VERTEX_JOB, PRIMITIVE_SIZE, p, "Primitive Size:\n");
-        DUMP_SECTION(MALLOC_VERTEX_JOB, INDICES, p, "Indices:\n");
+   DUMP_SECTION(MALLOC_VERTEX_JOB, PRIMITIVE, p, "Primitive:\n");
+   DUMP_SECTION(MALLOC_VERTEX_JOB, INSTANCE_COUNT, p, "Instance count:\n");
+   DUMP_SECTION(MALLOC_VERTEX_JOB, ALLOCATION, p, "Allocation:\n");
+   DUMP_SECTION(MALLOC_VERTEX_JOB, TILER, p, "Tiler:\n");
+   DUMP_SECTION(MALLOC_VERTEX_JOB, SCISSOR, p, "Scissor:\n");
+   DUMP_SECTION(MALLOC_VERTEX_JOB, PRIMITIVE_SIZE, p, "Primitive Size:\n");
+   DUMP_SECTION(MALLOC_VERTEX_JOB, INDICES, p, "Indices:\n");
 
-        pan_section_unpack(p, MALLOC_VERTEX_JOB, DRAW, dcd);
+   pan_section_unpack(p, MALLOC_VERTEX_JOB, DRAW, dcd);
 
-        pan_section_unpack(p, MALLOC_VERTEX_JOB, TILER, tiler_ptr);
-        pandecode_log("Tiler Job Payload:\n");
-        pandecode_indent++;
-        if (tiler_ptr.address)
-                pandecode_tiler(tiler_ptr.address);
-        else
-                pandecode_log("<omitted>\n");
-        pandecode_indent--;
+   pan_section_unpack(p, MALLOC_VERTEX_JOB, TILER, tiler_ptr);
+   pandecode_log("Tiler Job Payload:\n");
+   pandecode_indent++;
+   if (tiler_ptr.address)
+      pandecode_tiler(tiler_ptr.address);
+   else
+      pandecode_log("<omitted>\n");
+   pandecode_indent--;
 
-        pandecode_dcd(&dcd, 0, gpu_id);
+   pandecode_dcd(&dcd, 0, gpu_id);
 
-        pan_section_unpack(p, MALLOC_VERTEX_JOB, POSITION, position);
-        pan_section_unpack(p, MALLOC_VERTEX_JOB, VARYING, varying);
-        pandecode_shader_environment(&position, gpu_id);
-        pandecode_shader_environment(&varying, gpu_id);
+   pan_section_unpack(p, MALLOC_VERTEX_JOB, POSITION, position);
+   pan_section_unpack(p, MALLOC_VERTEX_JOB, VARYING, varying);
+   pandecode_shader_environment(&position, gpu_id);
+   pandecode_shader_environment(&varying, gpu_id);
 }
 
 static void
 pandecode_compute_job(mali_ptr job, unsigned gpu_id)
 {
-	struct mali_compute_job_packed *PANDECODE_PTR_VAR(p, job);
-	pan_section_unpack(p, COMPUTE_JOB, PAYLOAD, payload);
+   struct mali_compute_job_packed *PANDECODE_PTR_VAR(p, job);
+   pan_section_unpack(p, COMPUTE_JOB, PAYLOAD, payload);
 
-	pandecode_shader(payload.compute.shader, "Shader", gpu_id);
-	if (payload.compute.thread_storage)
-		pandecode_local_storage(payload.compute.thread_storage);
-	if (payload.compute.fau)
-		dump_fau(payload.compute.fau, payload.compute.fau_count, "FAU");
-	if (payload.compute.resources)
-		pandecode_resource_tables(payload.compute.resources, "Resources");
+   pandecode_shader(payload.compute.shader, "Shader", gpu_id);
+   if (payload.compute.thread_storage)
+      pandecode_local_storage(payload.compute.thread_storage);
+   if (payload.compute.fau)
+      dump_fau(payload.compute.fau, payload.compute.fau_count, "FAU");
+   if (payload.compute.resources)
+      pandecode_resource_tables(payload.compute.resources, "Resources");
 
-	DUMP_UNPACKED(COMPUTE_PAYLOAD, payload, "Compute:\n");
+   DUMP_UNPACKED(COMPUTE_PAYLOAD, payload, "Compute:\n");
 }
 #endif
 
@@ -1136,99 +1165,99 @@ pandecode_compute_job(mali_ptr job, unsigned gpu_id)
 void
 GENX(pandecode_jc)(mali_ptr jc_gpu_va, unsigned gpu_id)
 {
-        pandecode_dump_file_open();
+   pandecode_dump_file_open();
 
-        struct set *va_set = _mesa_pointer_set_create(NULL);
-        struct set_entry *entry = NULL;
+   struct set *va_set = _mesa_pointer_set_create(NULL);
+   struct set_entry *entry = NULL;
 
-        mali_ptr next_job = 0;
+   mali_ptr next_job = 0;
 
-        do {
-                struct mali_job_header_packed *hdr =
-                        PANDECODE_PTR(jc_gpu_va, struct mali_job_header_packed);
+   do {
+      struct mali_job_header_packed *hdr =
+         PANDECODE_PTR(jc_gpu_va, struct mali_job_header_packed);
 
-                entry = _mesa_set_search(va_set, hdr);
-                if (entry !=  NULL) {
-                        fprintf(stdout, "Job list has a cycle\n");
-                        break;
-                }
+      entry = _mesa_set_search(va_set, hdr);
+      if (entry != NULL) {
+         fprintf(stdout, "Job list has a cycle\n");
+         break;
+      }
 
-                pan_unpack(hdr, JOB_HEADER, h);
-                next_job = h.next;
+      pan_unpack(hdr, JOB_HEADER, h);
+      next_job = h.next;
 
-                DUMP_UNPACKED(JOB_HEADER, h, "Job Header (%" PRIx64 "):\n", jc_gpu_va);
-                pandecode_log("\n");
+      DUMP_UNPACKED(JOB_HEADER, h, "Job Header (%" PRIx64 "):\n", jc_gpu_va);
+      pandecode_log("\n");
 
-                switch (h.type) {
-                case MALI_JOB_TYPE_WRITE_VALUE:
-                        pandecode_write_value_job(jc_gpu_va);
-                        break;
+      switch (h.type) {
+      case MALI_JOB_TYPE_WRITE_VALUE:
+         pandecode_write_value_job(jc_gpu_va);
+         break;
 
-                case MALI_JOB_TYPE_CACHE_FLUSH:
-                        pandecode_cache_flush_job(jc_gpu_va);
-                        break;
+      case MALI_JOB_TYPE_CACHE_FLUSH:
+         pandecode_cache_flush_job(jc_gpu_va);
+         break;
 
-                case MALI_JOB_TYPE_TILER:
-                        pandecode_tiler_job(&h, jc_gpu_va, gpu_id);
-                        break;
+      case MALI_JOB_TYPE_TILER:
+         pandecode_tiler_job(&h, jc_gpu_va, gpu_id);
+         break;
 
 #if PAN_ARCH <= 7
-                case MALI_JOB_TYPE_VERTEX:
-                case MALI_JOB_TYPE_COMPUTE:
-                        pandecode_vertex_compute_geometry_job(&h, jc_gpu_va, gpu_id);
-                        break;
+      case MALI_JOB_TYPE_VERTEX:
+      case MALI_JOB_TYPE_COMPUTE:
+         pandecode_vertex_compute_geometry_job(&h, jc_gpu_va, gpu_id);
+         break;
 
 #if PAN_ARCH >= 6
-                case MALI_JOB_TYPE_INDEXED_VERTEX:
-                        pandecode_indexed_vertex_job(&h, jc_gpu_va, gpu_id);
-                        break;
+      case MALI_JOB_TYPE_INDEXED_VERTEX:
+         pandecode_indexed_vertex_job(&h, jc_gpu_va, gpu_id);
+         break;
 #endif
 #else
-		case MALI_JOB_TYPE_COMPUTE:
-			pandecode_compute_job(jc_gpu_va, gpu_id);
-			break;
+      case MALI_JOB_TYPE_COMPUTE:
+         pandecode_compute_job(jc_gpu_va, gpu_id);
+         break;
 
-		case MALI_JOB_TYPE_MALLOC_VERTEX:
-			pandecode_malloc_vertex_job(jc_gpu_va, gpu_id);
-			break;
+      case MALI_JOB_TYPE_MALLOC_VERTEX:
+         pandecode_malloc_vertex_job(jc_gpu_va, gpu_id);
+         break;
 #endif
 
-                case MALI_JOB_TYPE_FRAGMENT:
-                        pandecode_fragment_job(jc_gpu_va, gpu_id);
-                        break;
+      case MALI_JOB_TYPE_FRAGMENT:
+         pandecode_fragment_job(jc_gpu_va, gpu_id);
+         break;
 
-                default:
-                        break;
-                }
+      default:
+         break;
+      }
 
-                /* Track the latest visited job CPU VA to detect cycles */
-                _mesa_set_add(va_set, hdr);
+      /* Track the latest visited job CPU VA to detect cycles */
+      _mesa_set_add(va_set, hdr);
 
-        } while ((jc_gpu_va = next_job));
+   } while ((jc_gpu_va = next_job));
 
-        _mesa_set_destroy(va_set, NULL);
+   _mesa_set_destroy(va_set, NULL);
 
-        fflush(pandecode_dump_stream);
-        pandecode_map_read_write();
+   fflush(pandecode_dump_stream);
+   pandecode_map_read_write();
 }
 
 void
 GENX(pandecode_abort_on_fault)(mali_ptr jc_gpu_va)
 {
-        mali_ptr next_job = 0;
+   mali_ptr next_job = 0;
 
-        do {
-                pan_unpack(PANDECODE_PTR(jc_gpu_va, struct mali_job_header_packed),
-                           JOB_HEADER, h);
-                next_job = h.next;
+   do {
+      pan_unpack(PANDECODE_PTR(jc_gpu_va, struct mali_job_header_packed),
+                 JOB_HEADER, h);
+      next_job = h.next;
 
-                /* Ensure the job is marked COMPLETE */
-                if (h.exception_status != 0x1) {
-                        fprintf(stderr, "Incomplete job or timeout\n");
-                        fflush(NULL);
-                        abort();
-                }
-        } while ((jc_gpu_va = next_job));
+      /* Ensure the job is marked COMPLETE */
+      if (h.exception_status != 0x1) {
+         fprintf(stderr, "Incomplete job or timeout\n");
+         fflush(NULL);
+         abort();
+      }
+   } while ((jc_gpu_va = next_job));
 
-        pandecode_map_read_write();
+   pandecode_map_read_write();
 }
diff --git a/src/panfrost/lib/genxml/decode.h b/src/panfrost/lib/genxml/decode.h
index 6fa6014eb0e..862532b2d44 100644
--- a/src/panfrost/lib/genxml/decode.h
+++ b/src/panfrost/lib/genxml/decode.h
@@ -36,54 +36,54 @@ extern FILE *pandecode_dump_stream;
 void pandecode_dump_file_open(void);
 
 struct pandecode_mapped_memory {
-        struct rb_node node;
-        size_t length;
-        void *addr;
-        uint64_t gpu_va;
-        bool ro;
-        char name[32];
+   struct rb_node node;
+   size_t length;
+   void *addr;
+   uint64_t gpu_va;
+   bool ro;
+   char name[32];
 };
 
 char *pointer_as_memory_reference(uint64_t ptr);
 
-struct pandecode_mapped_memory *pandecode_find_mapped_gpu_mem_containing(uint64_t addr);
+struct pandecode_mapped_memory *
+pandecode_find_mapped_gpu_mem_containing(uint64_t addr);
 
 void pandecode_map_read_write(void);
 
 void pandecode_dump_mappings(void);
 
 static inline void *
-__pandecode_fetch_gpu_mem(uint64_t gpu_va, size_t size,
-                          int line, const char *filename)
+__pandecode_fetch_gpu_mem(uint64_t gpu_va, size_t size, int line,
+                          const char *filename)
 {
-        const struct pandecode_mapped_memory *mem =
-                pandecode_find_mapped_gpu_mem_containing(gpu_va);
+   const struct pandecode_mapped_memory *mem =
+      pandecode_find_mapped_gpu_mem_containing(gpu_va);
 
-        if (!mem) {
-                fprintf(stderr, "Access to unknown memory %" PRIx64 " in %s:%d\n",
-                        gpu_va, filename, line);
-                assert(0);
-        }
+   if (!mem) {
+      fprintf(stderr, "Access to unknown memory %" PRIx64 " in %s:%d\n", gpu_va,
+              filename, line);
+      assert(0);
+   }
 
-        assert(size + (gpu_va - mem->gpu_va) <= mem->length);
+   assert(size + (gpu_va - mem->gpu_va) <= mem->length);
 
-        return mem->addr + gpu_va - mem->gpu_va;
+   return mem->addr + gpu_va - mem->gpu_va;
 }
 
-#define pandecode_fetch_gpu_mem(gpu_va, size) \
-	__pandecode_fetch_gpu_mem(gpu_va, size, __LINE__, __FILE__)
+#define pandecode_fetch_gpu_mem(gpu_va, size)                                  \
+   __pandecode_fetch_gpu_mem(gpu_va, size, __LINE__, __FILE__)
 
 /* Returns a validated pointer to mapped GPU memory with the given pointer type,
  * size automatically determined from the pointer type
  */
-#define PANDECODE_PTR(gpu_va, type) \
-	((type*)(__pandecode_fetch_gpu_mem(gpu_va, sizeof(type), \
-					 __LINE__, __FILE__)))
+#define PANDECODE_PTR(gpu_va, type)                                            \
+   ((type *)(__pandecode_fetch_gpu_mem(gpu_va, sizeof(type), __LINE__,         \
+                                       __FILE__)))
 
 /* Usage: <variable type> PANDECODE_PTR_VAR(name, gpu_va) */
-#define PANDECODE_PTR_VAR(name, gpu_va) \
-	name = __pandecode_fetch_gpu_mem(gpu_va, sizeof(*name), \
-				       __LINE__, __FILE__)
+#define PANDECODE_PTR_VAR(name, gpu_va)                                        \
+   name = __pandecode_fetch_gpu_mem(gpu_va, sizeof(*name), __LINE__, __FILE__)
 
 /* Forward declare for all supported gens to permit thunking */
 void pandecode_jc_v4(mali_ptr jc_gpu_va, unsigned gpu_id);
@@ -101,44 +101,44 @@ void pandecode_abort_on_fault_v9(mali_ptr jc_gpu_va);
 static inline void
 pan_hexdump(FILE *fp, const uint8_t *hex, size_t cnt, bool with_strings)
 {
-        for (unsigned i = 0; i < cnt; ++i) {
-                if ((i & 0xF) == 0)
-                        fprintf(fp, "%06X  ", i);
+   for (unsigned i = 0; i < cnt; ++i) {
+      if ((i & 0xF) == 0)
+         fprintf(fp, "%06X  ", i);
 
-                uint8_t v = hex[i];
+      uint8_t v = hex[i];
 
-                if (v == 0 && (i & 0xF) == 0) {
-                        /* Check if we're starting an aligned run of zeroes */
-                        unsigned zero_count = 0;
+      if (v == 0 && (i & 0xF) == 0) {
+         /* Check if we're starting an aligned run of zeroes */
+         unsigned zero_count = 0;
 
-                        for (unsigned j = i; j < cnt; ++j) {
-                                if (hex[j] == 0)
-                                        zero_count++;
-                                else
-                                        break;
-                        }
+         for (unsigned j = i; j < cnt; ++j) {
+            if (hex[j] == 0)
+               zero_count++;
+            else
+               break;
+         }
 
-                        if (zero_count >= 32) {
-                                fprintf(fp, "*\n");
-                                i += (zero_count & ~0xF) - 1;
-                                continue;
-                        }
-                }
+         if (zero_count >= 32) {
+            fprintf(fp, "*\n");
+            i += (zero_count & ~0xF) - 1;
+            continue;
+         }
+      }
 
-                fprintf(fp, "%02X ", hex[i]);
-                if ((i & 0xF) == 0xF && with_strings) {
-                        fprintf(fp, " | ");
-                        for (unsigned j = i & ~0xF; j <= i; ++j) {
-                                uint8_t c = hex[j];
-                                fputc((c < 32 || c > 128) ? '.' : c, fp);
-                        }
-                }
+      fprintf(fp, "%02X ", hex[i]);
+      if ((i & 0xF) == 0xF && with_strings) {
+         fprintf(fp, " | ");
+         for (unsigned j = i & ~0xF; j <= i; ++j) {
+            uint8_t c = hex[j];
+            fputc((c < 32 || c > 128) ? '.' : c, fp);
+         }
+      }
 
-                if ((i & 0xF) == 0xF)
-                        fprintf(fp, "\n");
-        }
+      if ((i & 0xF) == 0xF)
+         fprintf(fp, "\n");
+   }
 
-        fprintf(fp, "\n");
+   fprintf(fp, "\n");
 }
 
 #endif /* __MMAP_TRACE_H__ */
diff --git a/src/panfrost/lib/genxml/decode_common.c b/src/panfrost/lib/genxml/decode_common.c
index ecc02387175..76cec531ed9 100644
--- a/src/panfrost/lib/genxml/decode_common.c
+++ b/src/panfrost/lib/genxml/decode_common.c
@@ -23,18 +23,18 @@
  * SOFTWARE.
  */
 
-#include <stdio.h>
-#include <stdlib.h>
 #include <assert.h>
 #include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
 #include <sys/mman.h>
 
-#include "decode.h"
 #include "util/macros.h"
+#include "util/simple_mtx.h"
 #include "util/u_debug.h"
 #include "util/u_dynarray.h"
-#include "util/simple_mtx.h"
+#include "decode.h"
 
 FILE *pandecode_dump_stream;
 
@@ -46,8 +46,8 @@ static struct util_dynarray ro_mappings;
 
 static simple_mtx_t pandecode_lock = SIMPLE_MTX_INITIALIZER;
 
-#define to_mapped_memory(x) \
-	rb_node_data(struct pandecode_mapped_memory, x, node)
+#define to_mapped_memory(x)                                                    \
+   rb_node_data(struct pandecode_mapped_memory, x, node)
 
 /*
  * Compare a GPU VA to a node, considering a GPU VA to be equal to a node if it
@@ -57,147 +57,147 @@ static simple_mtx_t pandecode_lock = SIMPLE_MTX_INITIALIZER;
 static int
 pandecode_cmp_key(const struct rb_node *lhs, const void *key)
 {
-        struct pandecode_mapped_memory *mem = to_mapped_memory(lhs);
-        uint64_t *gpu_va = (uint64_t *) key;
+   struct pandecode_mapped_memory *mem = to_mapped_memory(lhs);
+   uint64_t *gpu_va = (uint64_t *)key;
 
-        if (mem->gpu_va <= *gpu_va && *gpu_va < (mem->gpu_va + mem->length))
-                return 0;
-        else
-                return mem->gpu_va - *gpu_va;
+   if (mem->gpu_va <= *gpu_va && *gpu_va < (mem->gpu_va + mem->length))
+      return 0;
+   else
+      return mem->gpu_va - *gpu_va;
 }
 
 static int
 pandecode_cmp(const struct rb_node *lhs, const struct rb_node *rhs)
 {
-        return to_mapped_memory(lhs)->gpu_va - to_mapped_memory(rhs)->gpu_va;
+   return to_mapped_memory(lhs)->gpu_va - to_mapped_memory(rhs)->gpu_va;
 }
 
 static struct pandecode_mapped_memory *
 pandecode_find_mapped_gpu_mem_containing_rw(uint64_t addr)
 {
-        simple_mtx_assert_locked(&pandecode_lock);
+   simple_mtx_assert_locked(&pandecode_lock);
 
-        struct rb_node *node = rb_tree_search(&mmap_tree, &addr, pandecode_cmp_key);
+   struct rb_node *node = rb_tree_search(&mmap_tree, &addr, pandecode_cmp_key);
 
-        return to_mapped_memory(node);
+   return to_mapped_memory(node);
 }
 
 struct pandecode_mapped_memory *
 pandecode_find_mapped_gpu_mem_containing(uint64_t addr)
 {
-        simple_mtx_assert_locked(&pandecode_lock);
+   simple_mtx_assert_locked(&pandecode_lock);
 
-        struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing_rw(addr);
+   struct pandecode_mapped_memory *mem =
+      pandecode_find_mapped_gpu_mem_containing_rw(addr);
 
-        if (mem && mem->addr && !mem->ro) {
-                mprotect(mem->addr, mem->length, PROT_READ);
-                mem->ro = true;
-                util_dynarray_append(&ro_mappings, struct pandecode_mapped_memory *, mem);
-        }
+   if (mem && mem->addr && !mem->ro) {
+      mprotect(mem->addr, mem->length, PROT_READ);
+      mem->ro = true;
+      util_dynarray_append(&ro_mappings, struct pandecode_mapped_memory *, mem);
+   }
 
-        return mem;
+   return mem;
 }
 
 void
 pandecode_map_read_write(void)
 {
-        simple_mtx_assert_locked(&pandecode_lock);
+   simple_mtx_assert_locked(&pandecode_lock);
 
-        util_dynarray_foreach(&ro_mappings, struct pandecode_mapped_memory *, mem) {
-                (*mem)->ro = false;
-                mprotect((*mem)->addr, (*mem)->length, PROT_READ | PROT_WRITE);
-        }
-        util_dynarray_clear(&ro_mappings);
+   util_dynarray_foreach(&ro_mappings, struct pandecode_mapped_memory *, mem) {
+      (*mem)->ro = false;
+      mprotect((*mem)->addr, (*mem)->length, PROT_READ | PROT_WRITE);
+   }
+   util_dynarray_clear(&ro_mappings);
 }
 
 static void
-pandecode_add_name(struct pandecode_mapped_memory *mem, uint64_t gpu_va, const char *name)
+pandecode_add_name(struct pandecode_mapped_memory *mem, uint64_t gpu_va,
+                   const char *name)
 {
-        simple_mtx_assert_locked(&pandecode_lock);
+   simple_mtx_assert_locked(&pandecode_lock);
 
-        if (!name) {
-                /* If we don't have a name, assign one */
+   if (!name) {
+      /* If we don't have a name, assign one */
 
-                snprintf(mem->name, sizeof(mem->name) - 1,
-                         "memory_%" PRIx64, gpu_va);
-        } else {
-                assert((strlen(name) + 1) < sizeof(mem->name));
-                memcpy(mem->name, name, strlen(name) + 1);
-        }
+      snprintf(mem->name, sizeof(mem->name) - 1, "memory_%" PRIx64, gpu_va);
+   } else {
+      assert((strlen(name) + 1) < sizeof(mem->name));
+      memcpy(mem->name, name, strlen(name) + 1);
+   }
 }
 
 void
 pandecode_inject_mmap(uint64_t gpu_va, void *cpu, unsigned sz, const char *name)
 {
-        simple_mtx_lock(&pandecode_lock);
+   simple_mtx_lock(&pandecode_lock);
 
-        /* First, search if we already mapped this and are just updating an address */
+   /* First, search if we already mapped this and are just updating an address */
 
-        struct pandecode_mapped_memory *existing =
-                pandecode_find_mapped_gpu_mem_containing_rw(gpu_va);
+   struct pandecode_mapped_memory *existing =
+      pandecode_find_mapped_gpu_mem_containing_rw(gpu_va);
 
-        if (existing && existing->gpu_va == gpu_va) {
-                existing->length = sz;
-                existing->addr = cpu;
-                pandecode_add_name(existing, gpu_va, name);
-        } else {
-                /* Otherwise, add a fresh mapping */
-                struct pandecode_mapped_memory *mapped_mem = NULL;
+   if (existing && existing->gpu_va == gpu_va) {
+      existing->length = sz;
+      existing->addr = cpu;
+      pandecode_add_name(existing, gpu_va, name);
+   } else {
+      /* Otherwise, add a fresh mapping */
+      struct pandecode_mapped_memory *mapped_mem = NULL;
 
-                mapped_mem = calloc(1, sizeof(*mapped_mem));
-                mapped_mem->gpu_va = gpu_va;
-                mapped_mem->length = sz;
-                mapped_mem->addr = cpu;
-                pandecode_add_name(mapped_mem, gpu_va, name);
+      mapped_mem = calloc(1, sizeof(*mapped_mem));
+      mapped_mem->gpu_va = gpu_va;
+      mapped_mem->length = sz;
+      mapped_mem->addr = cpu;
+      pandecode_add_name(mapped_mem, gpu_va, name);
 
-                /* Add it to the tree */
-                rb_tree_insert(&mmap_tree, &mapped_mem->node, pandecode_cmp);
-        }
+      /* Add it to the tree */
+      rb_tree_insert(&mmap_tree, &mapped_mem->node, pandecode_cmp);
+   }
 
-        simple_mtx_unlock(&pandecode_lock);
+   simple_mtx_unlock(&pandecode_lock);
 }
 
 void
 pandecode_inject_free(uint64_t gpu_va, unsigned sz)
 {
-        simple_mtx_lock(&pandecode_lock);
+   simple_mtx_lock(&pandecode_lock);
 
-        struct pandecode_mapped_memory *mem =
-                pandecode_find_mapped_gpu_mem_containing_rw(gpu_va);
+   struct pandecode_mapped_memory *mem =
+      pandecode_find_mapped_gpu_mem_containing_rw(gpu_va);
 
-        if (mem) {
-                assert(mem->gpu_va == gpu_va);
-                assert(mem->length == sz);
+   if (mem) {
+      assert(mem->gpu_va == gpu_va);
+      assert(mem->length == sz);
 
-                rb_tree_remove(&mmap_tree, &mem->node);
-                free(mem);
-        }
+      rb_tree_remove(&mmap_tree, &mem->node);
+      free(mem);
+   }
 
-        simple_mtx_unlock(&pandecode_lock);
+   simple_mtx_unlock(&pandecode_lock);
 }
 
 char *
 pointer_as_memory_reference(uint64_t ptr)
 {
-        simple_mtx_assert_locked(&pandecode_lock);
+   simple_mtx_assert_locked(&pandecode_lock);
 
-        struct pandecode_mapped_memory *mapped;
-        char *out = malloc(128);
+   struct pandecode_mapped_memory *mapped;
+   char *out = malloc(128);
 
-        /* Try to find the corresponding mapped zone */
+   /* Try to find the corresponding mapped zone */
 
-        mapped = pandecode_find_mapped_gpu_mem_containing_rw(ptr);
+   mapped = pandecode_find_mapped_gpu_mem_containing_rw(ptr);
 
-        if (mapped) {
-                snprintf(out, 128, "%s + %d", mapped->name, (int) (ptr - mapped->gpu_va));
-                return out;
-        }
+   if (mapped) {
+      snprintf(out, 128, "%s + %d", mapped->name, (int)(ptr - mapped->gpu_va));
+      return out;
+   }
 
-        /* Just use the raw address if other options are exhausted */
-
-        snprintf(out, 128, "0x%" PRIx64, ptr);
-        return out;
+   /* Just use the raw address if other options are exhausted */
 
+   snprintf(out, 128, "0x%" PRIx64, ptr);
+   return out;
 }
 
 static int pandecode_dump_frame_count = 0;
@@ -207,129 +207,153 @@ static bool force_stderr = false;
 void
 pandecode_dump_file_open(void)
 {
-        simple_mtx_assert_locked(&pandecode_lock);
+   simple_mtx_assert_locked(&pandecode_lock);
 
-        if (pandecode_dump_stream)
-                return;
+   if (pandecode_dump_stream)
+      return;
 
-        /* This does a getenv every frame, so it is possible to use
-         * setenv to change the base at runtime.
-         */
-        const char *dump_file_base = debug_get_option("PANDECODE_DUMP_FILE", "pandecode.dump");
-        if (force_stderr || !strcmp(dump_file_base, "stderr"))
-                pandecode_dump_stream = stderr;
-        else {
-                char buffer[1024];
-                snprintf(buffer, sizeof(buffer), "%s.%04d", dump_file_base, pandecode_dump_frame_count);
-                printf("pandecode: dump command stream to file %s\n", buffer);
-                pandecode_dump_stream = fopen(buffer, "w");
-                if (!pandecode_dump_stream)
-                        fprintf(stderr,
-                                "pandecode: failed to open command stream log file %s\n",
-                                buffer);
-        }
+   /* This does a getenv every frame, so it is possible to use
+    * setenv to change the base at runtime.
+    */
+   const char *dump_file_base =
+      debug_get_option("PANDECODE_DUMP_FILE", "pandecode.dump");
+   if (force_stderr || !strcmp(dump_file_base, "stderr"))
+      pandecode_dump_stream = stderr;
+   else {
+      char buffer[1024];
+      snprintf(buffer, sizeof(buffer), "%s.%04d", dump_file_base,
+               pandecode_dump_frame_count);
+      printf("pandecode: dump command stream to file %s\n", buffer);
+      pandecode_dump_stream = fopen(buffer, "w");
+      if (!pandecode_dump_stream)
+         fprintf(stderr,
+                 "pandecode: failed to open command stream log file %s\n",
+                 buffer);
+   }
 }
 
 static void
 pandecode_dump_file_close(void)
 {
-        simple_mtx_assert_locked(&pandecode_lock);
+   simple_mtx_assert_locked(&pandecode_lock);
 
-        if (pandecode_dump_stream && pandecode_dump_stream != stderr) {
-                if (fclose(pandecode_dump_stream))
-                        perror("pandecode: dump file");
+   if (pandecode_dump_stream && pandecode_dump_stream != stderr) {
+      if (fclose(pandecode_dump_stream))
+         perror("pandecode: dump file");
 
-                pandecode_dump_stream = NULL;
-        }
+      pandecode_dump_stream = NULL;
+   }
 }
 
 void
 pandecode_initialize(bool to_stderr)
 {
-        force_stderr = to_stderr;
-        rb_tree_init(&mmap_tree);
-        util_dynarray_init(&ro_mappings, NULL);
+   force_stderr = to_stderr;
+   rb_tree_init(&mmap_tree);
+   util_dynarray_init(&ro_mappings, NULL);
 }
 
 void
 pandecode_next_frame(void)
 {
-        simple_mtx_lock(&pandecode_lock);
+   simple_mtx_lock(&pandecode_lock);
 
-        pandecode_dump_file_close();
-        pandecode_dump_frame_count++;
+   pandecode_dump_file_close();
+   pandecode_dump_frame_count++;
 
-        simple_mtx_unlock(&pandecode_lock);
+   simple_mtx_unlock(&pandecode_lock);
 }
 
 void
 pandecode_close(void)
 {
-        simple_mtx_lock(&pandecode_lock);
+   simple_mtx_lock(&pandecode_lock);
 
-        rb_tree_foreach_safe(struct pandecode_mapped_memory, it, &mmap_tree, node) {
-                rb_tree_remove(&mmap_tree, &it->node);
-                free(it);
-        }
+   rb_tree_foreach_safe(struct pandecode_mapped_memory, it, &mmap_tree, node) {
+      rb_tree_remove(&mmap_tree, &it->node);
+      free(it);
+   }
 
-        util_dynarray_fini(&ro_mappings);
-        pandecode_dump_file_close();
+   util_dynarray_fini(&ro_mappings);
+   pandecode_dump_file_close();
 
-        simple_mtx_unlock(&pandecode_lock);
+   simple_mtx_unlock(&pandecode_lock);
 }
 
 void
 pandecode_dump_mappings(void)
 {
-        simple_mtx_lock(&pandecode_lock);
+   simple_mtx_lock(&pandecode_lock);
 
-        pandecode_dump_file_open();
+   pandecode_dump_file_open();
 
-        rb_tree_foreach(struct pandecode_mapped_memory, it, &mmap_tree, node) {
-                if (!it->addr || !it->length)
-                        continue;
+   rb_tree_foreach(struct pandecode_mapped_memory, it, &mmap_tree, node) {
+      if (!it->addr || !it->length)
+         continue;
 
-                fprintf(pandecode_dump_stream, "Buffer: %s gpu %" PRIx64 "\n\n",
-                        it->name, it->gpu_va);
+      fprintf(pandecode_dump_stream, "Buffer: %s gpu %" PRIx64 "\n\n", it->name,
+              it->gpu_va);
 
-                pan_hexdump(pandecode_dump_stream, it->addr, it->length, false);
-                fprintf(pandecode_dump_stream, "\n");
-        }
+      pan_hexdump(pandecode_dump_stream, it->addr, it->length, false);
+      fprintf(pandecode_dump_stream, "\n");
+   }
 
-        fflush(pandecode_dump_stream);
-        simple_mtx_unlock(&pandecode_lock);
+   fflush(pandecode_dump_stream);
+   simple_mtx_unlock(&pandecode_lock);
 }
 
 void
 pandecode_abort_on_fault(mali_ptr jc_gpu_va, unsigned gpu_id)
 {
-        simple_mtx_lock(&pandecode_lock);
+   simple_mtx_lock(&pandecode_lock);
 
-        switch (pan_arch(gpu_id)) {
-        case 4: pandecode_abort_on_fault_v4(jc_gpu_va); break;
-        case 5: pandecode_abort_on_fault_v5(jc_gpu_va); break;
-        case 6: pandecode_abort_on_fault_v6(jc_gpu_va); break;
-        case 7: pandecode_abort_on_fault_v7(jc_gpu_va); break;
-        case 9: pandecode_abort_on_fault_v9(jc_gpu_va); break;
-        default: unreachable("Unsupported architecture");
-        }
+   switch (pan_arch(gpu_id)) {
+   case 4:
+      pandecode_abort_on_fault_v4(jc_gpu_va);
+      break;
+   case 5:
+      pandecode_abort_on_fault_v5(jc_gpu_va);
+      break;
+   case 6:
+      pandecode_abort_on_fault_v6(jc_gpu_va);
+      break;
+   case 7:
+      pandecode_abort_on_fault_v7(jc_gpu_va);
+      break;
+   case 9:
+      pandecode_abort_on_fault_v9(jc_gpu_va);
+      break;
+   default:
+      unreachable("Unsupported architecture");
+   }
 
-        simple_mtx_unlock(&pandecode_lock);
+   simple_mtx_unlock(&pandecode_lock);
 }
 
 void
 pandecode_jc(mali_ptr jc_gpu_va, unsigned gpu_id)
 {
-        simple_mtx_lock(&pandecode_lock);
+   simple_mtx_lock(&pandecode_lock);
 
-        switch (pan_arch(gpu_id)) {
-        case 4: pandecode_jc_v4(jc_gpu_va, gpu_id); break;
-        case 5: pandecode_jc_v5(jc_gpu_va, gpu_id); break;
-        case 6: pandecode_jc_v6(jc_gpu_va, gpu_id); break;
-        case 7: pandecode_jc_v7(jc_gpu_va, gpu_id); break;
-        case 9: pandecode_jc_v9(jc_gpu_va, gpu_id); break;
-        default: unreachable("Unsupported architecture");
-        }
+   switch (pan_arch(gpu_id)) {
+   case 4:
+      pandecode_jc_v4(jc_gpu_va, gpu_id);
+      break;
+   case 5:
+      pandecode_jc_v5(jc_gpu_va, gpu_id);
+      break;
+   case 6:
+      pandecode_jc_v6(jc_gpu_va, gpu_id);
+      break;
+   case 7:
+      pandecode_jc_v7(jc_gpu_va, gpu_id);
+      break;
+   case 9:
+      pandecode_jc_v9(jc_gpu_va, gpu_id);
+      break;
+   default:
+      unreachable("Unsupported architecture");
+   }
 
-        simple_mtx_unlock(&pandecode_lock);
+   simple_mtx_unlock(&pandecode_lock);
 }
diff --git a/src/panfrost/lib/genxml/gen_macros.h b/src/panfrost/lib/genxml/gen_macros.h
index 1ef4b53a508..b15f52c4181 100644
--- a/src/panfrost/lib/genxml/gen_macros.h
+++ b/src/panfrost/lib/genxml/gen_macros.h
@@ -56,45 +56,45 @@
 static inline unsigned
 pan_arch(unsigned gpu_id)
 {
-        switch (gpu_id) {
-        case 0x600:
-        case 0x620:
-        case 0x720:
-                return 4;
-        case 0x750:
-        case 0x820:
-        case 0x830:
-        case 0x860:
-        case 0x880:
-                return 5;
-        default:
-                return gpu_id >> 12;
-        }
+   switch (gpu_id) {
+   case 0x600:
+   case 0x620:
+   case 0x720:
+      return 4;
+   case 0x750:
+   case 0x820:
+   case 0x830:
+   case 0x860:
+   case 0x880:
+      return 5;
+   default:
+      return gpu_id >> 12;
+   }
 }
 
 /* Base macro defined on the command line. */
 #ifndef PAN_ARCH
-#  include "genxml/common_pack.h"
+#include "genxml/common_pack.h"
 #else
 
 /* Suffixing macros */
 #if (PAN_ARCH == 4)
-#  define GENX(X) X##_v4
-#  include "genxml/v4_pack.h"
+#define GENX(X) X##_v4
+#include "genxml/v4_pack.h"
 #elif (PAN_ARCH == 5)
-#  define GENX(X) X##_v5
-#  include "genxml/v5_pack.h"
+#define GENX(X) X##_v5
+#include "genxml/v5_pack.h"
 #elif (PAN_ARCH == 6)
-#  define GENX(X) X##_v6
-#  include "genxml/v6_pack.h"
+#define GENX(X) X##_v6
+#include "genxml/v6_pack.h"
 #elif (PAN_ARCH == 7)
-#  define GENX(X) X##_v7
-#  include "genxml/v7_pack.h"
+#define GENX(X) X##_v7
+#include "genxml/v7_pack.h"
 #elif (PAN_ARCH == 9)
-#  define GENX(X) X##_v9
-#  include "genxml/v9_pack.h"
+#define GENX(X) X##_v9
+#include "genxml/v9_pack.h"
 #else
-#  error "Need to add suffixing macro for this architecture"
+#error "Need to add suffixing macro for this architecture"
 #endif
 
 #endif /* PAN_ARCH */
diff --git a/src/panfrost/lib/pan_afbc.c b/src/panfrost/lib/pan_afbc.c
index 151725ded90..97ff6dc70e5 100644
--- a/src/panfrost/lib/pan_afbc.c
+++ b/src/panfrost/lib/pan_afbc.c
@@ -50,8 +50,8 @@
  * must also be cache-line aligned, so there can sometimes be a bit of padding
  * between the header and body.
  *
- * As an example, a 64x64 RGBA framebuffer contains 64/16 = 4 tiles horizontally and
- * 4 tiles vertically. There are 4*4=16 tiles in total, each containing 16
+ * As an example, a 64x64 RGBA framebuffer contains 64/16 = 4 tiles horizontally
+ * and 4 tiles vertically. There are 4*4=16 tiles in total, each containing 16
  * bytes of metadata, so there is a 16*16=256 byte header. 64x64 is already
  * tile aligned, so the body is 64*64 * 4 bytes per pixel = 16384 bytes of
  * body.
@@ -69,45 +69,45 @@
 static enum pipe_format
 unswizzled_format(enum pipe_format format)
 {
-        switch (format) {
-        case PIPE_FORMAT_A8_UNORM:
-        case PIPE_FORMAT_L8_UNORM:
-        case PIPE_FORMAT_I8_UNORM:
-                return PIPE_FORMAT_R8_UNORM;
+   switch (format) {
+   case PIPE_FORMAT_A8_UNORM:
+   case PIPE_FORMAT_L8_UNORM:
+   case PIPE_FORMAT_I8_UNORM:
+      return PIPE_FORMAT_R8_UNORM;
 
-        case PIPE_FORMAT_L8A8_UNORM:
-                return PIPE_FORMAT_R8G8_UNORM;
+   case PIPE_FORMAT_L8A8_UNORM:
+      return PIPE_FORMAT_R8G8_UNORM;
 
-        case PIPE_FORMAT_B8G8R8_UNORM:
-                return PIPE_FORMAT_R8G8B8_UNORM;
+   case PIPE_FORMAT_B8G8R8_UNORM:
+      return PIPE_FORMAT_R8G8B8_UNORM;
 
-        case PIPE_FORMAT_R8G8B8X8_UNORM:
-        case PIPE_FORMAT_B8G8R8A8_UNORM:
-        case PIPE_FORMAT_B8G8R8X8_UNORM:
-        case PIPE_FORMAT_A8R8G8B8_UNORM:
-        case PIPE_FORMAT_X8R8G8B8_UNORM:
-        case PIPE_FORMAT_X8B8G8R8_UNORM:
-        case PIPE_FORMAT_A8B8G8R8_UNORM:
-                return PIPE_FORMAT_R8G8B8A8_UNORM;
+   case PIPE_FORMAT_R8G8B8X8_UNORM:
+   case PIPE_FORMAT_B8G8R8A8_UNORM:
+   case PIPE_FORMAT_B8G8R8X8_UNORM:
+   case PIPE_FORMAT_A8R8G8B8_UNORM:
+   case PIPE_FORMAT_X8R8G8B8_UNORM:
+   case PIPE_FORMAT_X8B8G8R8_UNORM:
+   case PIPE_FORMAT_A8B8G8R8_UNORM:
+      return PIPE_FORMAT_R8G8B8A8_UNORM;
 
-        case PIPE_FORMAT_B5G6R5_UNORM:
-                return PIPE_FORMAT_R5G6B5_UNORM;
+   case PIPE_FORMAT_B5G6R5_UNORM:
+      return PIPE_FORMAT_R5G6B5_UNORM;
 
-        case PIPE_FORMAT_B5G5R5A1_UNORM:
-                return PIPE_FORMAT_R5G5B5A1_UNORM;
+   case PIPE_FORMAT_B5G5R5A1_UNORM:
+      return PIPE_FORMAT_R5G5B5A1_UNORM;
 
-        case PIPE_FORMAT_R10G10B10X2_UNORM:
-        case PIPE_FORMAT_B10G10R10A2_UNORM:
-        case PIPE_FORMAT_B10G10R10X2_UNORM:
-                return PIPE_FORMAT_R10G10B10A2_UNORM;
+   case PIPE_FORMAT_R10G10B10X2_UNORM:
+   case PIPE_FORMAT_B10G10R10A2_UNORM:
+   case PIPE_FORMAT_B10G10R10X2_UNORM:
+      return PIPE_FORMAT_R10G10B10A2_UNORM;
 
-        case PIPE_FORMAT_A4B4G4R4_UNORM:
-        case PIPE_FORMAT_B4G4R4A4_UNORM:
-                return PIPE_FORMAT_R4G4B4A4_UNORM;
+   case PIPE_FORMAT_A4B4G4R4_UNORM:
+   case PIPE_FORMAT_B4G4R4A4_UNORM:
+      return PIPE_FORMAT_R4G4B4A4_UNORM;
 
-        default:
-                return format;
-        }
+   default:
+      return format;
+   }
 }
 
 /* AFBC supports compressing a few canonical formats. Additional formats are
@@ -118,29 +118,29 @@ unswizzled_format(enum pipe_format format)
 enum pan_afbc_mode
 panfrost_afbc_format(unsigned arch, enum pipe_format format)
 {
-        /* Luminance-alpha not supported for AFBC on v7+ */
-        switch (format) {
-        case PIPE_FORMAT_A8_UNORM:
-        case PIPE_FORMAT_L8_UNORM:
-        case PIPE_FORMAT_I8_UNORM:
-        case PIPE_FORMAT_L8A8_UNORM:
-                if (arch >= 7)
-                        return PAN_AFBC_MODE_INVALID;
-                else
-                        break;
-        default:
-                break;
-        }
+   /* Luminance-alpha not supported for AFBC on v7+ */
+   switch (format) {
+   case PIPE_FORMAT_A8_UNORM:
+   case PIPE_FORMAT_L8_UNORM:
+   case PIPE_FORMAT_I8_UNORM:
+   case PIPE_FORMAT_L8A8_UNORM:
+      if (arch >= 7)
+         return PAN_AFBC_MODE_INVALID;
+      else
+         break;
+   default:
+      break;
+   }
 
-        /* sRGB does not change the pixel format itself, only the
-         * interpretation. The interpretation is handled by conversion hardware
-         * independent to the compression hardware, so we can compress sRGB
-         * formats by using the corresponding linear format.
-         */
-        format = util_format_linear(format);
+   /* sRGB does not change the pixel format itself, only the
+    * interpretation. The interpretation is handled by conversion hardware
+    * independent to the compression hardware, so we can compress sRGB
+    * formats by using the corresponding linear format.
+    */
+   format = util_format_linear(format);
 
-        /* We handle swizzling orthogonally to AFBC */
-        format = unswizzled_format(format);
+   /* We handle swizzling orthogonally to AFBC */
+   format = unswizzled_format(format);
 
    /* clang-format off */
    switch (format) {
@@ -166,9 +166,10 @@ panfrost_afbc_format(unsigned arch, enum pipe_format format)
 /* A format may be compressed as AFBC if it has an AFBC internal format */
 
 bool
-panfrost_format_supports_afbc(const struct panfrost_device *dev, enum pipe_format format)
+panfrost_format_supports_afbc(const struct panfrost_device *dev,
+                              enum pipe_format format)
 {
-        return panfrost_afbc_format(dev->arch, format) != PAN_AFBC_MODE_INVALID;
+   return panfrost_afbc_format(dev->arch, format) != PAN_AFBC_MODE_INVALID;
 }
 
 /* The lossless colour transform (AFBC_FORMAT_MOD_YTR) requires RGB. */
@@ -176,15 +177,14 @@ panfrost_format_supports_afbc(const struct panfrost_device *dev, enum pipe_forma
 bool
 panfrost_afbc_can_ytr(enum pipe_format format)
 {
-        const struct util_format_description *desc =
-                util_format_description(format);
+   const struct util_format_description *desc = util_format_description(format);
 
-        /* YTR is only defined for RGB(A) */
-        if (desc->nr_channels != 3 && desc->nr_channels != 4)
-                return false;
+   /* YTR is only defined for RGB(A) */
+   if (desc->nr_channels != 3 && desc->nr_channels != 4)
+      return false;
 
-        /* The fourth channel if it exists doesn't matter */
-        return desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB;
+   /* The fourth channel if it exists doesn't matter */
+   return desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB;
 }
 
 /*
@@ -194,5 +194,5 @@ panfrost_afbc_can_ytr(enum pipe_format format)
 bool
 panfrost_afbc_can_tile(const struct panfrost_device *dev)
 {
-        return (dev->arch >= 7);
+   return (dev->arch >= 7);
 }
diff --git a/src/panfrost/lib/pan_attributes.c b/src/panfrost/lib/pan_attributes.c
index 5c57f050dda..b1669bf8d65 100644
--- a/src/panfrost/lib/pan_attributes.c
+++ b/src/panfrost/lib/pan_attributes.c
@@ -39,91 +39,92 @@
 static unsigned
 panfrost_small_padded_vertex_count(unsigned idx)
 {
-        if (idx < 10)
-                return idx;
-        else
-                return (idx + 1) & ~1;
+   if (idx < 10)
+      return idx;
+   else
+      return (idx + 1) & ~1;
 }
 
 static unsigned
 panfrost_large_padded_vertex_count(uint32_t vertex_count)
 {
-        /* First, we have to find the highest set one */
-        unsigned highest = 32 - __builtin_clz(vertex_count);
+   /* First, we have to find the highest set one */
+   unsigned highest = 32 - __builtin_clz(vertex_count);
 
-        /* Using that, we mask out the highest 4-bits */
-        unsigned n = highest - 4;
-        unsigned nibble = (vertex_count >> n) & 0xF;
+   /* Using that, we mask out the highest 4-bits */
+   unsigned n = highest - 4;
+   unsigned nibble = (vertex_count >> n) & 0xF;
 
-        /* Great, we have the nibble. Now we can just try possibilities. Note
-         * that we don't care about the bottom most bit in most cases, and we
-         * know the top bit must be 1 */
+   /* Great, we have the nibble. Now we can just try possibilities. Note
+    * that we don't care about the bottom most bit in most cases, and we
+    * know the top bit must be 1 */
 
-        unsigned middle_two = (nibble >> 1) & 0x3;
+   unsigned middle_two = (nibble >> 1) & 0x3;
 
-        switch (middle_two) {
-        case 0b00:
-                if (!(nibble & 1))
-                        return (1 << n) * 9;
-                else
-                        return (1 << (n + 1)) * 5;
-        case 0b01:
-                return (1 << (n + 2)) * 3;
-        case 0b10:
-                return (1 << (n + 1)) * 7;
-        case 0b11:
-                return (1 << (n + 4));
-        default:
-                return 0; /* unreachable */
-        }
+   switch (middle_two) {
+   case 0b00:
+      if (!(nibble & 1))
+         return (1 << n) * 9;
+      else
+         return (1 << (n + 1)) * 5;
+   case 0b01:
+      return (1 << (n + 2)) * 3;
+   case 0b10:
+      return (1 << (n + 1)) * 7;
+   case 0b11:
+      return (1 << (n + 4));
+   default:
+      return 0; /* unreachable */
+   }
 }
 
 unsigned
 panfrost_padded_vertex_count(unsigned vertex_count)
 {
-        if (vertex_count < 20)
-                return panfrost_small_padded_vertex_count(vertex_count);
-        else
-                return panfrost_large_padded_vertex_count(vertex_count);
+   if (vertex_count < 20)
+      return panfrost_small_padded_vertex_count(vertex_count);
+   else
+      return panfrost_large_padded_vertex_count(vertex_count);
 }
 
 /* The much, much more irritating case -- instancing is enabled. See
  * panfrost_job.h for notes on how this works */
 
 unsigned
-panfrost_compute_magic_divisor(unsigned hw_divisor, unsigned *o_shift, unsigned *extra_flags)
+panfrost_compute_magic_divisor(unsigned hw_divisor, unsigned *o_shift,
+                               unsigned *extra_flags)
 {
-        /* We have a NPOT divisor. Here's the fun one (multipling by
-         * the inverse and shifting) */
+   /* We have a NPOT divisor. Here's the fun one (multipling by
+    * the inverse and shifting) */
 
-        /* floor(log2(d)) */
-        unsigned shift = util_logbase2(hw_divisor);
+   /* floor(log2(d)) */
+   unsigned shift = util_logbase2(hw_divisor);
 
-        /* m = ceil(2^(32 + shift) / d) */
-        uint64_t shift_hi = 32 + shift;
-        uint64_t t = 1ll << shift_hi;
-        double t_f = t;
-        double hw_divisor_d = hw_divisor;
-        double m_f = ceil(t_f / hw_divisor_d);
-        unsigned m = m_f;
+   /* m = ceil(2^(32 + shift) / d) */
+   uint64_t shift_hi = 32 + shift;
+   uint64_t t = 1ll << shift_hi;
+   double t_f = t;
+   double hw_divisor_d = hw_divisor;
+   double m_f = ceil(t_f / hw_divisor_d);
+   unsigned m = m_f;
 
-        /* Default case */
-        uint32_t magic_divisor = m;
+   /* Default case */
+   uint32_t magic_divisor = m;
 
-        /* e = 2^(shift + 32) % d */
-        uint64_t e = t % hw_divisor;
+   /* e = 2^(shift + 32) % d */
+   uint64_t e = t % hw_divisor;
 
-        /* Apply round-down algorithm? e <= 2^shift?. XXX: The blob
-         * seems to use a different condition */
-        if (e <= (1ll << shift)) {
-                magic_divisor = m - 1;
-                *extra_flags = 1;
-        }
+   /* Apply round-down algorithm? e <= 2^shift?. XXX: The blob
+    * seems to use a different condition */
+   if (e <= (1ll << shift)) {
+      magic_divisor = m - 1;
+      *extra_flags = 1;
+   }
 
-        /* Top flag implicitly set */
-        assert(magic_divisor & (1u << 31));
-        magic_divisor &= ~(1u << 31);
-        *o_shift = shift;
+   /* Top flag implicitly set */
+   assert(magic_divisor & (1u << 31));
+   magic_divisor &= ~(1u << 31);
+   *o_shift = shift;
 
-        return magic_divisor;
+   return magic_divisor;
 }
diff --git a/src/panfrost/lib/pan_blend.c b/src/panfrost/lib/pan_blend.c
index e5673a0ead8..768f49f720e 100644
--- a/src/panfrost/lib/pan_blend.c
+++ b/src/panfrost/lib/pan_blend.c
@@ -28,13 +28,13 @@
 #include "pan_shader.h"
 #endif
 
-#include "pan_texture.h"
-#include "panfrost/util/pan_lower_framebuffer.h"
-#include "util/format/u_format.h"
 #include "compiler/nir/nir.h"
 #include "compiler/nir/nir_builder.h"
 #include "compiler/nir/nir_conversion_builder.h"
 #include "compiler/nir/nir_lower_blend.h"
+#include "panfrost/util/pan_lower_framebuffer.h"
+#include "util/format/u_format.h"
+#include "pan_texture.h"
 
 #ifndef PAN_ARCH
 
@@ -43,9 +43,9 @@
 static bool
 factor_is_supported(enum blend_factor factor)
 {
-        return factor != BLEND_FACTOR_SRC_ALPHA_SATURATE &&
-               factor != BLEND_FACTOR_SRC1_COLOR &&
-               factor != BLEND_FACTOR_SRC1_ALPHA;
+   return factor != BLEND_FACTOR_SRC_ALPHA_SATURATE &&
+          factor != BLEND_FACTOR_SRC1_COLOR &&
+          factor != BLEND_FACTOR_SRC1_ALPHA;
 }
 
 /* OpenGL allows encoding (src*dest + dest*src) which is incompatiblle with
@@ -54,71 +54,62 @@ factor_is_supported(enum blend_factor factor)
  * + dest * (2*src) wih the new source_2 value of C. Detect this case. */
 
 static bool
-is_2srcdest(enum blend_func blend_func,
-            enum blend_factor src_factor,
-            bool invert_src,
-            enum blend_factor dest_factor,
-            bool invert_dest,
+is_2srcdest(enum blend_func blend_func, enum blend_factor src_factor,
+            bool invert_src, enum blend_factor dest_factor, bool invert_dest,
             bool is_alpha)
 {
-        return (blend_func == BLEND_FUNC_ADD) &&
-               ((src_factor == BLEND_FACTOR_DST_COLOR) ||
-                ((src_factor == BLEND_FACTOR_DST_ALPHA) && is_alpha)) &&
-               ((dest_factor == BLEND_FACTOR_SRC_COLOR) ||
-                ((dest_factor == BLEND_FACTOR_SRC_ALPHA) && is_alpha)) &&
-               !invert_src && !invert_dest;
+   return (blend_func == BLEND_FUNC_ADD) &&
+          ((src_factor == BLEND_FACTOR_DST_COLOR) ||
+           ((src_factor == BLEND_FACTOR_DST_ALPHA) && is_alpha)) &&
+          ((dest_factor == BLEND_FACTOR_SRC_COLOR) ||
+           ((dest_factor == BLEND_FACTOR_SRC_ALPHA) && is_alpha)) &&
+          !invert_src && !invert_dest;
 }
 
 static bool
 can_fixed_function_equation(enum blend_func blend_func,
-                            enum blend_factor src_factor,
-                            bool invert_src,
-                            enum blend_factor dest_factor,
-                            bool invert_dest,
-                            bool is_alpha,
-                            bool supports_2src)
+                            enum blend_factor src_factor, bool invert_src,
+                            enum blend_factor dest_factor, bool invert_dest,
+                            bool is_alpha, bool supports_2src)
 {
-        if (is_2srcdest(blend_func, src_factor, invert_src,
-                       dest_factor, invert_dest, is_alpha)) {
+   if (is_2srcdest(blend_func, src_factor, invert_src, dest_factor, invert_dest,
+                   is_alpha)) {
 
-                return supports_2src;
-        }
+      return supports_2src;
+   }
 
-        if (blend_func != BLEND_FUNC_ADD &&
-            blend_func != BLEND_FUNC_SUBTRACT &&
-            blend_func != BLEND_FUNC_REVERSE_SUBTRACT)
-                return false;
+   if (blend_func != BLEND_FUNC_ADD && blend_func != BLEND_FUNC_SUBTRACT &&
+       blend_func != BLEND_FUNC_REVERSE_SUBTRACT)
+      return false;
 
-        if (!factor_is_supported(src_factor) ||
-            !factor_is_supported(dest_factor))
-                return false;
+   if (!factor_is_supported(src_factor) || !factor_is_supported(dest_factor))
+      return false;
 
-        if (src_factor != dest_factor &&
-            src_factor != BLEND_FACTOR_ZERO &&
-            dest_factor != BLEND_FACTOR_ZERO)
-                return false;
+   if (src_factor != dest_factor && src_factor != BLEND_FACTOR_ZERO &&
+       dest_factor != BLEND_FACTOR_ZERO)
+      return false;
 
-        return true;
+   return true;
 }
 
 static unsigned
 blend_factor_constant_mask(enum blend_factor factor)
 {
-        if (factor == BLEND_FACTOR_CONSTANT_COLOR)
-                return 0b0111; /* RGB */
-        else if (factor == BLEND_FACTOR_CONSTANT_ALPHA)
-                return 0b1000; /* A */
-        else
-                return 0b0000; /* - */
+   if (factor == BLEND_FACTOR_CONSTANT_COLOR)
+      return 0b0111; /* RGB */
+   else if (factor == BLEND_FACTOR_CONSTANT_ALPHA)
+      return 0b1000; /* A */
+   else
+      return 0b0000; /* - */
 }
 
 unsigned
 pan_blend_constant_mask(const struct pan_blend_equation eq)
 {
-        return blend_factor_constant_mask(eq.rgb_src_factor) |
-               blend_factor_constant_mask(eq.rgb_dst_factor) |
-               blend_factor_constant_mask(eq.alpha_src_factor) |
-               blend_factor_constant_mask(eq.alpha_dst_factor);
+   return blend_factor_constant_mask(eq.rgb_src_factor) |
+          blend_factor_constant_mask(eq.rgb_dst_factor) |
+          blend_factor_constant_mask(eq.alpha_src_factor) |
+          blend_factor_constant_mask(eq.alpha_dst_factor);
 }
 
 /* Only "homogenous" (scalar or vector with all components equal) constants are
@@ -127,14 +118,14 @@ pan_blend_constant_mask(const struct pan_blend_equation eq)
 bool
 pan_blend_is_homogenous_constant(unsigned mask, const float *constants)
 {
-        float constant = pan_blend_get_constant(mask, constants);
+   float constant = pan_blend_get_constant(mask, constants);
 
-        u_foreach_bit(i, mask) {
-                if (constants[i] != constant)
-                        return false;
-        }
+   u_foreach_bit(i, mask) {
+      if (constants[i] != constant)
+         return false;
+   }
 
-        return true;
+   return true;
 }
 
 /* Determines if an equation can run in fixed function */
@@ -143,167 +134,161 @@ bool
 pan_blend_can_fixed_function(const struct pan_blend_equation equation,
                              bool supports_2src)
 {
-        return !equation.blend_enable ||
-               (can_fixed_function_equation(equation.rgb_func,
-                                            equation.rgb_src_factor,
-                                            equation.rgb_invert_src_factor,
-                                            equation.rgb_dst_factor,
-                                            equation.rgb_invert_dst_factor,
-                                            false, supports_2src) &&
-                can_fixed_function_equation(equation.alpha_func,
-                                            equation.alpha_src_factor,
-                                            equation.alpha_invert_src_factor,
-                                            equation.alpha_dst_factor,
-                                            equation.alpha_invert_dst_factor,
-                                            true, supports_2src));
+   return !equation.blend_enable ||
+          (can_fixed_function_equation(
+              equation.rgb_func, equation.rgb_src_factor,
+              equation.rgb_invert_src_factor, equation.rgb_dst_factor,
+              equation.rgb_invert_dst_factor, false, supports_2src) &&
+           can_fixed_function_equation(
+              equation.alpha_func, equation.alpha_src_factor,
+              equation.alpha_invert_src_factor, equation.alpha_dst_factor,
+              equation.alpha_invert_dst_factor, true, supports_2src));
 }
 
 static enum mali_blend_operand_c
 to_c_factor(enum blend_factor factor)
 {
-        switch (factor) {
-        case BLEND_FACTOR_ZERO:
-                return MALI_BLEND_OPERAND_C_ZERO;
+   switch (factor) {
+   case BLEND_FACTOR_ZERO:
+      return MALI_BLEND_OPERAND_C_ZERO;
 
-        case BLEND_FACTOR_SRC_ALPHA:
-                return MALI_BLEND_OPERAND_C_SRC_ALPHA;
+   case BLEND_FACTOR_SRC_ALPHA:
+      return MALI_BLEND_OPERAND_C_SRC_ALPHA;
 
-        case BLEND_FACTOR_DST_ALPHA:
-                return MALI_BLEND_OPERAND_C_DEST_ALPHA;
+   case BLEND_FACTOR_DST_ALPHA:
+      return MALI_BLEND_OPERAND_C_DEST_ALPHA;
 
-        case BLEND_FACTOR_SRC_COLOR:
-                return MALI_BLEND_OPERAND_C_SRC;
+   case BLEND_FACTOR_SRC_COLOR:
+      return MALI_BLEND_OPERAND_C_SRC;
 
-        case BLEND_FACTOR_DST_COLOR:
-                return MALI_BLEND_OPERAND_C_DEST;
+   case BLEND_FACTOR_DST_COLOR:
+      return MALI_BLEND_OPERAND_C_DEST;
 
-        case BLEND_FACTOR_CONSTANT_COLOR:
-        case BLEND_FACTOR_CONSTANT_ALPHA:
-                return MALI_BLEND_OPERAND_C_CONSTANT;
+   case BLEND_FACTOR_CONSTANT_COLOR:
+   case BLEND_FACTOR_CONSTANT_ALPHA:
+      return MALI_BLEND_OPERAND_C_CONSTANT;
 
-        default:
-                unreachable("Unsupported blend factor");
-        }
+   default:
+      unreachable("Unsupported blend factor");
+   }
 }
 
 static void
-to_panfrost_function(enum blend_func blend_func,
-                     enum blend_factor src_factor,
-                     bool invert_src,
-                     enum blend_factor dest_factor,
-                     bool invert_dest,
-                     bool is_alpha,
+to_panfrost_function(enum blend_func blend_func, enum blend_factor src_factor,
+                     bool invert_src, enum blend_factor dest_factor,
+                     bool invert_dest, bool is_alpha,
                      struct MALI_BLEND_FUNCTION *function)
 {
-        assert(can_fixed_function_equation(blend_func, src_factor, invert_src,
-                                           dest_factor, invert_dest, is_alpha, true));
+   assert(can_fixed_function_equation(blend_func, src_factor, invert_src,
+                                      dest_factor, invert_dest, is_alpha,
+                                      true));
 
-        if (src_factor == BLEND_FACTOR_ZERO && !invert_src) {
-                function->a = MALI_BLEND_OPERAND_A_ZERO;
-                function->b = MALI_BLEND_OPERAND_B_DEST;
-                if (blend_func == BLEND_FUNC_SUBTRACT)
-                        function->negate_b = true;
-                function->invert_c = invert_dest;
-                function->c = to_c_factor(dest_factor);
-        } else if (src_factor == BLEND_FACTOR_ZERO && invert_src) {
-                function->a = MALI_BLEND_OPERAND_A_SRC;
-                function->b = MALI_BLEND_OPERAND_B_DEST;
-                if (blend_func == BLEND_FUNC_SUBTRACT)
-                        function->negate_b = true;
-                else if (blend_func == BLEND_FUNC_REVERSE_SUBTRACT)
-                        function->negate_a = true;
-                function->invert_c = invert_dest;
-                function->c = to_c_factor(dest_factor);
-        } else if (dest_factor == BLEND_FACTOR_ZERO && !invert_dest) {
-                function->a = MALI_BLEND_OPERAND_A_ZERO;
-                function->b = MALI_BLEND_OPERAND_B_SRC;
-                if (blend_func == BLEND_FUNC_REVERSE_SUBTRACT)
-                        function->negate_b = true;
-                function->invert_c = invert_src;
-                function->c = to_c_factor(src_factor);
-        } else if (dest_factor == BLEND_FACTOR_ZERO && invert_dest) {
-                function->a = MALI_BLEND_OPERAND_A_DEST;
-                function->b = MALI_BLEND_OPERAND_B_SRC;
-                if (blend_func == BLEND_FUNC_SUBTRACT)
-                        function->negate_a = true;
-                else if (blend_func == BLEND_FUNC_REVERSE_SUBTRACT)
-                        function->negate_b = true;
-                function->invert_c = invert_src;
-                function->c = to_c_factor(src_factor);
-        } else if (src_factor == dest_factor && invert_src == invert_dest) {
-                function->a = MALI_BLEND_OPERAND_A_ZERO;
-                function->invert_c = invert_src;
-                function->c = to_c_factor(src_factor);
+   if (src_factor == BLEND_FACTOR_ZERO && !invert_src) {
+      function->a = MALI_BLEND_OPERAND_A_ZERO;
+      function->b = MALI_BLEND_OPERAND_B_DEST;
+      if (blend_func == BLEND_FUNC_SUBTRACT)
+         function->negate_b = true;
+      function->invert_c = invert_dest;
+      function->c = to_c_factor(dest_factor);
+   } else if (src_factor == BLEND_FACTOR_ZERO && invert_src) {
+      function->a = MALI_BLEND_OPERAND_A_SRC;
+      function->b = MALI_BLEND_OPERAND_B_DEST;
+      if (blend_func == BLEND_FUNC_SUBTRACT)
+         function->negate_b = true;
+      else if (blend_func == BLEND_FUNC_REVERSE_SUBTRACT)
+         function->negate_a = true;
+      function->invert_c = invert_dest;
+      function->c = to_c_factor(dest_factor);
+   } else if (dest_factor == BLEND_FACTOR_ZERO && !invert_dest) {
+      function->a = MALI_BLEND_OPERAND_A_ZERO;
+      function->b = MALI_BLEND_OPERAND_B_SRC;
+      if (blend_func == BLEND_FUNC_REVERSE_SUBTRACT)
+         function->negate_b = true;
+      function->invert_c = invert_src;
+      function->c = to_c_factor(src_factor);
+   } else if (dest_factor == BLEND_FACTOR_ZERO && invert_dest) {
+      function->a = MALI_BLEND_OPERAND_A_DEST;
+      function->b = MALI_BLEND_OPERAND_B_SRC;
+      if (blend_func == BLEND_FUNC_SUBTRACT)
+         function->negate_a = true;
+      else if (blend_func == BLEND_FUNC_REVERSE_SUBTRACT)
+         function->negate_b = true;
+      function->invert_c = invert_src;
+      function->c = to_c_factor(src_factor);
+   } else if (src_factor == dest_factor && invert_src == invert_dest) {
+      function->a = MALI_BLEND_OPERAND_A_ZERO;
+      function->invert_c = invert_src;
+      function->c = to_c_factor(src_factor);
 
-                switch (blend_func) {
-                case BLEND_FUNC_ADD:
-                        function->b = MALI_BLEND_OPERAND_B_SRC_PLUS_DEST;
-                        break;
-                case BLEND_FUNC_REVERSE_SUBTRACT:
-                        function->negate_b = true;
-                        FALLTHROUGH;
-                case BLEND_FUNC_SUBTRACT:
-                        function->b = MALI_BLEND_OPERAND_B_SRC_MINUS_DEST;
-                        break;
-                default:
-                        unreachable("Invalid blend function");
-                }
-        } else if (is_2srcdest(blend_func, src_factor, invert_src, dest_factor,
-                                invert_dest, is_alpha)) {
-                /* src*dest + dest*src = 2*src*dest = 0 + dest*(2*src) */
-                function->a = MALI_BLEND_OPERAND_A_ZERO;
-                function->b = MALI_BLEND_OPERAND_B_DEST;
-                function->c = MALI_BLEND_OPERAND_C_SRC_X_2;
-        } else {
-                assert(src_factor == dest_factor && invert_src != invert_dest);
+      switch (blend_func) {
+      case BLEND_FUNC_ADD:
+         function->b = MALI_BLEND_OPERAND_B_SRC_PLUS_DEST;
+         break;
+      case BLEND_FUNC_REVERSE_SUBTRACT:
+         function->negate_b = true;
+         FALLTHROUGH;
+      case BLEND_FUNC_SUBTRACT:
+         function->b = MALI_BLEND_OPERAND_B_SRC_MINUS_DEST;
+         break;
+      default:
+         unreachable("Invalid blend function");
+      }
+   } else if (is_2srcdest(blend_func, src_factor, invert_src, dest_factor,
+                          invert_dest, is_alpha)) {
+      /* src*dest + dest*src = 2*src*dest = 0 + dest*(2*src) */
+      function->a = MALI_BLEND_OPERAND_A_ZERO;
+      function->b = MALI_BLEND_OPERAND_B_DEST;
+      function->c = MALI_BLEND_OPERAND_C_SRC_X_2;
+   } else {
+      assert(src_factor == dest_factor && invert_src != invert_dest);
 
-                function->a = MALI_BLEND_OPERAND_A_DEST;
-                function->invert_c = invert_src;
-                function->c = to_c_factor(src_factor);
+      function->a = MALI_BLEND_OPERAND_A_DEST;
+      function->invert_c = invert_src;
+      function->c = to_c_factor(src_factor);
 
-                switch (blend_func) {
-                case BLEND_FUNC_ADD:
-                        function->b = MALI_BLEND_OPERAND_B_SRC_MINUS_DEST;
-                        break;
-                case BLEND_FUNC_REVERSE_SUBTRACT:
-                        function->b = MALI_BLEND_OPERAND_B_SRC_PLUS_DEST;
-                        function->negate_b = true;
-                        break;
-                case BLEND_FUNC_SUBTRACT:
-                        function->b = MALI_BLEND_OPERAND_B_SRC_PLUS_DEST;
-                        function->negate_a = true;
-                        break;
-                default:
-                        unreachable("Invalid blend function\n");
-                }
-        }
+      switch (blend_func) {
+      case BLEND_FUNC_ADD:
+         function->b = MALI_BLEND_OPERAND_B_SRC_MINUS_DEST;
+         break;
+      case BLEND_FUNC_REVERSE_SUBTRACT:
+         function->b = MALI_BLEND_OPERAND_B_SRC_PLUS_DEST;
+         function->negate_b = true;
+         break;
+      case BLEND_FUNC_SUBTRACT:
+         function->b = MALI_BLEND_OPERAND_B_SRC_PLUS_DEST;
+         function->negate_a = true;
+         break;
+      default:
+         unreachable("Invalid blend function\n");
+      }
+   }
 }
 
 bool
 pan_blend_is_opaque(const struct pan_blend_equation equation)
 {
-        /* If a channel is masked out, we can't use opaque mode even if
-         * blending is disabled, since we need a tilebuffer read in there */
-        if (equation.color_mask != 0xF)
-                return false;
+   /* If a channel is masked out, we can't use opaque mode even if
+    * blending is disabled, since we need a tilebuffer read in there */
+   if (equation.color_mask != 0xF)
+      return false;
 
-        /* With nothing masked out, disabled bledning is opaque */
-        if (!equation.blend_enable)
-                return true;
+   /* With nothing masked out, disabled bledning is opaque */
+   if (!equation.blend_enable)
+      return true;
 
-        /* Also detect open-coded opaque blending */
-        return equation.rgb_src_factor == BLEND_FACTOR_ZERO &&
-               equation.rgb_invert_src_factor &&
-               equation.rgb_dst_factor == BLEND_FACTOR_ZERO &&
-               !equation.rgb_invert_dst_factor &&
-               (equation.rgb_func == BLEND_FUNC_ADD ||
-                equation.rgb_func == BLEND_FUNC_SUBTRACT) &&
-               equation.alpha_src_factor == BLEND_FACTOR_ZERO &&
-               equation.alpha_invert_src_factor &&
-               equation.alpha_dst_factor == BLEND_FACTOR_ZERO &&
-               !equation.alpha_invert_dst_factor &&
-               (equation.alpha_func == BLEND_FUNC_ADD ||
-                equation.alpha_func == BLEND_FUNC_SUBTRACT);
+   /* Also detect open-coded opaque blending */
+   return equation.rgb_src_factor == BLEND_FACTOR_ZERO &&
+          equation.rgb_invert_src_factor &&
+          equation.rgb_dst_factor == BLEND_FACTOR_ZERO &&
+          !equation.rgb_invert_dst_factor &&
+          (equation.rgb_func == BLEND_FUNC_ADD ||
+           equation.rgb_func == BLEND_FUNC_SUBTRACT) &&
+          equation.alpha_src_factor == BLEND_FACTOR_ZERO &&
+          equation.alpha_invert_src_factor &&
+          equation.alpha_dst_factor == BLEND_FACTOR_ZERO &&
+          !equation.alpha_invert_dst_factor &&
+          (equation.alpha_func == BLEND_FUNC_ADD ||
+           equation.alpha_func == BLEND_FUNC_SUBTRACT);
 }
 
 /* Check if (factor, invert) represents a constant value of val, assuming
@@ -313,11 +298,11 @@ pan_blend_is_opaque(const struct pan_blend_equation equation)
 static inline bool
 is_factor_01(unsigned factor, bool invert, unsigned val, unsigned srca)
 {
-        assert(val == 0 || val == 1);
-        assert(srca == 0 || srca == 1);
+   assert(val == 0 || val == 1);
+   assert(srca == 0 || srca == 1);
 
-        return ((invert ^ !val) && factor == BLEND_FACTOR_ZERO) ||
-               ((invert ^ srca ^ !val) && factor == BLEND_FACTOR_SRC_ALPHA);
+   return ((invert ^ !val) && factor == BLEND_FACTOR_ZERO) ||
+          ((invert ^ srca ^ !val) && factor == BLEND_FACTOR_SRC_ALPHA);
 }
 
 /* Returns if src alpha = 0 implies the blended colour equals the destination
@@ -340,24 +325,24 @@ is_factor_01(unsigned factor, bool invert, unsigned val, unsigned srca)
 bool
 pan_blend_alpha_zero_nop(const struct pan_blend_equation eq)
 {
-        if (eq.rgb_func != BLEND_FUNC_ADD &&
-            eq.rgb_func != BLEND_FUNC_REVERSE_SUBTRACT)
-                return false;
+   if (eq.rgb_func != BLEND_FUNC_ADD &&
+       eq.rgb_func != BLEND_FUNC_REVERSE_SUBTRACT)
+      return false;
 
-        if (eq.color_mask & 0x8) {
-                if (!is_factor_01(eq.alpha_dst_factor, eq.alpha_invert_dst_factor, 1, 0))
-                        return false;
-        }
+   if (eq.color_mask & 0x8) {
+      if (!is_factor_01(eq.alpha_dst_factor, eq.alpha_invert_dst_factor, 1, 0))
+         return false;
+   }
 
-        if (eq.color_mask & 0x7) {
-                if (!is_factor_01(eq.rgb_dst_factor, eq.rgb_invert_dst_factor, 1, 0))
-                        return false;
+   if (eq.color_mask & 0x7) {
+      if (!is_factor_01(eq.rgb_dst_factor, eq.rgb_invert_dst_factor, 1, 0))
+         return false;
 
-                if (!is_factor_01(eq.rgb_src_factor, eq.rgb_invert_src_factor, 0, 0))
-                        return false;
-        }
+      if (!is_factor_01(eq.rgb_src_factor, eq.rgb_invert_src_factor, 0, 0))
+         return false;
+   }
 
-        return true;
+   return true;
 }
 
 /* Returns if src alpha = 1 implies the blended colour equals the source
@@ -378,25 +363,24 @@ pan_blend_alpha_zero_nop(const struct pan_blend_equation eq)
 bool
 pan_blend_alpha_one_store(const struct pan_blend_equation eq)
 {
-        if (eq.rgb_func != BLEND_FUNC_ADD &&
-            eq.rgb_func != BLEND_FUNC_SUBTRACT)
-                return false;
+   if (eq.rgb_func != BLEND_FUNC_ADD && eq.rgb_func != BLEND_FUNC_SUBTRACT)
+      return false;
 
-        if (eq.color_mask != 0xf)
-                return false;
+   if (eq.color_mask != 0xf)
+      return false;
 
-        return is_factor_01(eq.rgb_src_factor, eq.rgb_invert_src_factor, 1, 1) &&
-               is_factor_01(eq.alpha_src_factor, eq.alpha_invert_src_factor, 1, 1) &&
-               is_factor_01(eq.rgb_dst_factor, eq.rgb_invert_dst_factor, 0, 1) &&
-               is_factor_01(eq.alpha_dst_factor, eq.alpha_invert_dst_factor, 0, 1);
+   return is_factor_01(eq.rgb_src_factor, eq.rgb_invert_src_factor, 1, 1) &&
+          is_factor_01(eq.alpha_src_factor, eq.alpha_invert_src_factor, 1, 1) &&
+          is_factor_01(eq.rgb_dst_factor, eq.rgb_invert_dst_factor, 0, 1) &&
+          is_factor_01(eq.alpha_dst_factor, eq.alpha_invert_dst_factor, 0, 1);
 }
 
 static bool
 is_dest_factor(enum blend_factor factor, bool alpha)
 {
-      return factor == BLEND_FACTOR_DST_ALPHA ||
-             factor == BLEND_FACTOR_DST_COLOR ||
-             (factor == BLEND_FACTOR_SRC_ALPHA_SATURATE && !alpha);
+   return factor == BLEND_FACTOR_DST_ALPHA ||
+          factor == BLEND_FACTOR_DST_COLOR ||
+          (factor == BLEND_FACTOR_SRC_ALPHA_SATURATE && !alpha);
 }
 
 /* Determines if a blend equation reads back the destination. This can occur by
@@ -406,13 +390,13 @@ is_dest_factor(enum blend_factor factor, bool alpha)
 bool
 pan_blend_reads_dest(const struct pan_blend_equation equation)
 {
-        return (equation.color_mask && equation.color_mask != 0xF) ||
-                is_dest_factor(equation.rgb_src_factor, false) ||
-                is_dest_factor(equation.alpha_src_factor, true) ||
-                equation.rgb_dst_factor != BLEND_FACTOR_ZERO ||
-                equation.rgb_invert_dst_factor ||
-                equation.alpha_dst_factor != BLEND_FACTOR_ZERO ||
-                equation.alpha_invert_dst_factor;
+   return (equation.color_mask && equation.color_mask != 0xF) ||
+          is_dest_factor(equation.rgb_src_factor, false) ||
+          is_dest_factor(equation.alpha_src_factor, true) ||
+          equation.rgb_dst_factor != BLEND_FACTOR_ZERO ||
+          equation.rgb_invert_dst_factor ||
+          equation.alpha_dst_factor != BLEND_FACTOR_ZERO ||
+          equation.alpha_invert_dst_factor;
 }
 
 /* Create the descriptor for a fixed blend mode given the corresponding API
@@ -422,72 +406,68 @@ void
 pan_blend_to_fixed_function_equation(const struct pan_blend_equation equation,
                                      struct MALI_BLEND_EQUATION *out)
 {
-        /* If no blending is enabled, default back on `replace` mode */
-        if (!equation.blend_enable) {
-                out->color_mask = equation.color_mask;
-                out->rgb.a = MALI_BLEND_OPERAND_A_SRC;
-                out->rgb.b = MALI_BLEND_OPERAND_B_SRC;
-                out->rgb.c = MALI_BLEND_OPERAND_C_ZERO;
-                out->alpha.a = MALI_BLEND_OPERAND_A_SRC;
-                out->alpha.b = MALI_BLEND_OPERAND_B_SRC;
-                out->alpha.c = MALI_BLEND_OPERAND_C_ZERO;
-                return;
-        }
+   /* If no blending is enabled, default back on `replace` mode */
+   if (!equation.blend_enable) {
+      out->color_mask = equation.color_mask;
+      out->rgb.a = MALI_BLEND_OPERAND_A_SRC;
+      out->rgb.b = MALI_BLEND_OPERAND_B_SRC;
+      out->rgb.c = MALI_BLEND_OPERAND_C_ZERO;
+      out->alpha.a = MALI_BLEND_OPERAND_A_SRC;
+      out->alpha.b = MALI_BLEND_OPERAND_B_SRC;
+      out->alpha.c = MALI_BLEND_OPERAND_C_ZERO;
+      return;
+   }
 
-        /* Compile the fixed-function blend */
-        to_panfrost_function(equation.rgb_func,
-                             equation.rgb_src_factor,
-                             equation.rgb_invert_src_factor,
-                             equation.rgb_dst_factor,
-                             equation.rgb_invert_dst_factor,
-                             false, &out->rgb);
+   /* Compile the fixed-function blend */
+   to_panfrost_function(equation.rgb_func, equation.rgb_src_factor,
+                        equation.rgb_invert_src_factor, equation.rgb_dst_factor,
+                        equation.rgb_invert_dst_factor, false, &out->rgb);
 
-        to_panfrost_function(equation.alpha_func,
-                             equation.alpha_src_factor,
-                             equation.alpha_invert_src_factor,
-                             equation.alpha_dst_factor,
-                             equation.alpha_invert_dst_factor,
-                             true, &out->alpha);
-        out->color_mask = equation.color_mask;
+   to_panfrost_function(equation.alpha_func, equation.alpha_src_factor,
+                        equation.alpha_invert_src_factor,
+                        equation.alpha_dst_factor,
+                        equation.alpha_invert_dst_factor, true, &out->alpha);
+   out->color_mask = equation.color_mask;
 }
 
 uint32_t
 pan_pack_blend(const struct pan_blend_equation equation)
 {
-        STATIC_ASSERT(sizeof(uint32_t) == MALI_BLEND_EQUATION_LENGTH);
+   STATIC_ASSERT(sizeof(uint32_t) == MALI_BLEND_EQUATION_LENGTH);
 
-        uint32_t out = 0;
+   uint32_t out = 0;
 
-        pan_pack(&out, BLEND_EQUATION, cfg) {
-                pan_blend_to_fixed_function_equation(equation, &cfg);
-        }
+   pan_pack(&out, BLEND_EQUATION, cfg) {
+      pan_blend_to_fixed_function_equation(equation, &cfg);
+   }
 
-        return out;
+   return out;
 }
 
-static uint32_t pan_blend_shader_key_hash(const void *key)
+static uint32_t
+pan_blend_shader_key_hash(const void *key)
 {
-        return _mesa_hash_data(key, sizeof(struct pan_blend_shader_key));
+   return _mesa_hash_data(key, sizeof(struct pan_blend_shader_key));
 }
 
-static bool pan_blend_shader_key_equal(const void *a, const void *b)
+static bool
+pan_blend_shader_key_equal(const void *a, const void *b)
 {
-        return !memcmp(a, b, sizeof(struct pan_blend_shader_key));
+   return !memcmp(a, b, sizeof(struct pan_blend_shader_key));
 }
 
 void
 pan_blend_shaders_init(struct panfrost_device *dev)
 {
-        dev->blend_shaders.shaders =
-                _mesa_hash_table_create(NULL, pan_blend_shader_key_hash,
-                                        pan_blend_shader_key_equal);
-        pthread_mutex_init(&dev->blend_shaders.lock, NULL);
+   dev->blend_shaders.shaders = _mesa_hash_table_create(
+      NULL, pan_blend_shader_key_hash, pan_blend_shader_key_equal);
+   pthread_mutex_init(&dev->blend_shaders.lock, NULL);
 }
 
 void
 pan_blend_shaders_cleanup(struct panfrost_device *dev)
 {
-        _mesa_hash_table_destroy(dev->blend_shaders.shaders, NULL);
+   _mesa_hash_table_destroy(dev->blend_shaders.shaders, NULL);
 }
 
 #else /* ifndef PAN_ARCH */
@@ -495,231 +475,248 @@ pan_blend_shaders_cleanup(struct panfrost_device *dev)
 static const char *
 logicop_str(enum pipe_logicop logicop)
 {
-        switch (logicop) {
-        case PIPE_LOGICOP_CLEAR: return "clear";
-        case PIPE_LOGICOP_NOR: return "nor";
-        case PIPE_LOGICOP_AND_INVERTED: return "and-inverted";
-        case PIPE_LOGICOP_COPY_INVERTED: return "copy-inverted";
-        case PIPE_LOGICOP_AND_REVERSE: return "and-reverse";
-        case PIPE_LOGICOP_INVERT: return "invert";
-        case PIPE_LOGICOP_XOR: return "xor";
-        case PIPE_LOGICOP_NAND: return "nand";
-        case PIPE_LOGICOP_AND: return "and";
-        case PIPE_LOGICOP_EQUIV: return "equiv";
-        case PIPE_LOGICOP_NOOP: return "noop";
-        case PIPE_LOGICOP_OR_INVERTED: return "or-inverted";
-        case PIPE_LOGICOP_COPY: return "copy";
-        case PIPE_LOGICOP_OR_REVERSE: return "or-reverse";
-        case PIPE_LOGICOP_OR: return "or";
-        case PIPE_LOGICOP_SET: return "set";
-        default: unreachable("Invalid logicop\n");
-        }
+   switch (logicop) {
+   case PIPE_LOGICOP_CLEAR:
+      return "clear";
+   case PIPE_LOGICOP_NOR:
+      return "nor";
+   case PIPE_LOGICOP_AND_INVERTED:
+      return "and-inverted";
+   case PIPE_LOGICOP_COPY_INVERTED:
+      return "copy-inverted";
+   case PIPE_LOGICOP_AND_REVERSE:
+      return "and-reverse";
+   case PIPE_LOGICOP_INVERT:
+      return "invert";
+   case PIPE_LOGICOP_XOR:
+      return "xor";
+   case PIPE_LOGICOP_NAND:
+      return "nand";
+   case PIPE_LOGICOP_AND:
+      return "and";
+   case PIPE_LOGICOP_EQUIV:
+      return "equiv";
+   case PIPE_LOGICOP_NOOP:
+      return "noop";
+   case PIPE_LOGICOP_OR_INVERTED:
+      return "or-inverted";
+   case PIPE_LOGICOP_COPY:
+      return "copy";
+   case PIPE_LOGICOP_OR_REVERSE:
+      return "or-reverse";
+   case PIPE_LOGICOP_OR:
+      return "or";
+   case PIPE_LOGICOP_SET:
+      return "set";
+   default:
+      unreachable("Invalid logicop\n");
+   }
 }
 
 static void
-get_equation_str(const struct pan_blend_rt_state *rt_state,
-                 char *str, unsigned len)
+get_equation_str(const struct pan_blend_rt_state *rt_state, char *str,
+                 unsigned len)
 {
-        const char *funcs[] = {
-                "add", "sub", "reverse_sub", "min", "max",
-        };
-        const char *factors[] = {
-                "zero", "src_color", "src1_color", "dst_color",
-                "src_alpha", "src1_alpha", "dst_alpha",
-                "const_color", "const_alpha", "src_alpha_sat",
-        };
-        int ret;
+   const char *funcs[] = {
+      "add", "sub", "reverse_sub", "min", "max",
+   };
+   const char *factors[] = {
+      "zero",       "src_color", "src1_color",  "dst_color",   "src_alpha",
+      "src1_alpha", "dst_alpha", "const_color", "const_alpha", "src_alpha_sat",
+   };
+   int ret;
 
-        if (!rt_state->equation.blend_enable) {
-		ret = snprintf(str, len, "replace(%s%s%s%s)",
-                               (rt_state->equation.color_mask & 1) ? "R" : "",
-                               (rt_state->equation.color_mask & 2) ? "G" : "",
-                               (rt_state->equation.color_mask & 4) ? "B" : "",
-                               (rt_state->equation.color_mask & 8) ? "A" : "");
-                assert(ret > 0);
-                return;
-        }
+   if (!rt_state->equation.blend_enable) {
+      ret = snprintf(str, len, "replace(%s%s%s%s)",
+                     (rt_state->equation.color_mask & 1) ? "R" : "",
+                     (rt_state->equation.color_mask & 2) ? "G" : "",
+                     (rt_state->equation.color_mask & 4) ? "B" : "",
+                     (rt_state->equation.color_mask & 8) ? "A" : "");
+      assert(ret > 0);
+      return;
+   }
 
-        if (rt_state->equation.color_mask & 7) {
-                assert(rt_state->equation.rgb_func < ARRAY_SIZE(funcs));
-                assert(rt_state->equation.rgb_src_factor < ARRAY_SIZE(factors));
-                assert(rt_state->equation.rgb_dst_factor < ARRAY_SIZE(factors));
-                ret = snprintf(str, len, "%s%s%s(func=%s,src_factor=%s%s,dst_factor=%s%s)%s",
-                               (rt_state->equation.color_mask & 1) ? "R" : "",
-                               (rt_state->equation.color_mask & 2) ? "G" : "",
-                               (rt_state->equation.color_mask & 4) ? "B" : "",
-                               funcs[rt_state->equation.rgb_func],
-                               rt_state->equation.rgb_invert_src_factor ? "-" : "",
-                               factors[rt_state->equation.rgb_src_factor],
-                               rt_state->equation.rgb_invert_dst_factor ? "-" : "",
-                               factors[rt_state->equation.rgb_dst_factor],
-                               rt_state->equation.color_mask & 8 ? ";" : "");
-                assert(ret > 0);
-                str += ret;
-                len -= ret;
-         }
+   if (rt_state->equation.color_mask & 7) {
+      assert(rt_state->equation.rgb_func < ARRAY_SIZE(funcs));
+      assert(rt_state->equation.rgb_src_factor < ARRAY_SIZE(factors));
+      assert(rt_state->equation.rgb_dst_factor < ARRAY_SIZE(factors));
+      ret =
+         snprintf(str, len, "%s%s%s(func=%s,src_factor=%s%s,dst_factor=%s%s)%s",
+                  (rt_state->equation.color_mask & 1) ? "R" : "",
+                  (rt_state->equation.color_mask & 2) ? "G" : "",
+                  (rt_state->equation.color_mask & 4) ? "B" : "",
+                  funcs[rt_state->equation.rgb_func],
+                  rt_state->equation.rgb_invert_src_factor ? "-" : "",
+                  factors[rt_state->equation.rgb_src_factor],
+                  rt_state->equation.rgb_invert_dst_factor ? "-" : "",
+                  factors[rt_state->equation.rgb_dst_factor],
+                  rt_state->equation.color_mask & 8 ? ";" : "");
+      assert(ret > 0);
+      str += ret;
+      len -= ret;
+   }
 
-        if (rt_state->equation.color_mask & 8) {
-                assert(rt_state->equation.alpha_func < ARRAY_SIZE(funcs));
-                assert(rt_state->equation.alpha_src_factor < ARRAY_SIZE(factors));
-                assert(rt_state->equation.alpha_dst_factor < ARRAY_SIZE(factors));
-                ret = snprintf(str, len, "A(func=%s,src_factor=%s%s,dst_factor=%s%s)",
-                               funcs[rt_state->equation.alpha_func],
-                               rt_state->equation.alpha_invert_src_factor ? "-" : "",
-                               factors[rt_state->equation.alpha_src_factor],
-                               rt_state->equation.alpha_invert_dst_factor ? "-" : "",
-                               factors[rt_state->equation.alpha_dst_factor]);
-                assert(ret > 0);
-                str += ret;
-                len -= ret;
-         }
+   if (rt_state->equation.color_mask & 8) {
+      assert(rt_state->equation.alpha_func < ARRAY_SIZE(funcs));
+      assert(rt_state->equation.alpha_src_factor < ARRAY_SIZE(factors));
+      assert(rt_state->equation.alpha_dst_factor < ARRAY_SIZE(factors));
+      ret = snprintf(str, len, "A(func=%s,src_factor=%s%s,dst_factor=%s%s)",
+                     funcs[rt_state->equation.alpha_func],
+                     rt_state->equation.alpha_invert_src_factor ? "-" : "",
+                     factors[rt_state->equation.alpha_src_factor],
+                     rt_state->equation.alpha_invert_dst_factor ? "-" : "",
+                     factors[rt_state->equation.alpha_dst_factor]);
+      assert(ret > 0);
+      str += ret;
+      len -= ret;
+   }
 }
 
 static bool
 pan_inline_blend_constants(nir_builder *b, nir_instr *instr, void *data)
 {
-        if (instr->type != nir_instr_type_intrinsic)
-                return false;
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
 
-        nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-        if (intr->intrinsic != nir_intrinsic_load_blend_const_color_rgba)
-                return false;
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   if (intr->intrinsic != nir_intrinsic_load_blend_const_color_rgba)
+      return false;
 
-        float *floats = data;
-        const nir_const_value constants[4] = {
-                nir_const_value_for_float(floats[0], 32),
-                nir_const_value_for_float(floats[1], 32),
-                nir_const_value_for_float(floats[2], 32),
-                nir_const_value_for_float(floats[3], 32)
-        };
+   float *floats = data;
+   const nir_const_value constants[4] = {
+      nir_const_value_for_float(floats[0], 32),
+      nir_const_value_for_float(floats[1], 32),
+      nir_const_value_for_float(floats[2], 32),
+      nir_const_value_for_float(floats[3], 32)};
 
-        b->cursor = nir_after_instr(instr);
-        nir_ssa_def *constant = nir_build_imm(b, 4, 32, constants);
-        nir_ssa_def_rewrite_uses(&intr->dest.ssa, constant);
-        nir_instr_remove(instr);
-        return true;
+   b->cursor = nir_after_instr(instr);
+   nir_ssa_def *constant = nir_build_imm(b, 4, 32, constants);
+   nir_ssa_def_rewrite_uses(&intr->dest.ssa, constant);
+   nir_instr_remove(instr);
+   return true;
 }
 
 nir_shader *
 GENX(pan_blend_create_shader)(const struct panfrost_device *dev,
                               const struct pan_blend_state *state,
-                              nir_alu_type src0_type,
-                              nir_alu_type src1_type,
+                              nir_alu_type src0_type, nir_alu_type src1_type,
                               unsigned rt)
 {
-        const struct pan_blend_rt_state *rt_state = &state->rts[rt];
-        char equation_str[128] = { 0 };
+   const struct pan_blend_rt_state *rt_state = &state->rts[rt];
+   char equation_str[128] = {0};
 
-        get_equation_str(rt_state, equation_str, sizeof(equation_str));
+   get_equation_str(rt_state, equation_str, sizeof(equation_str));
 
-        nir_builder b =
-                nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
-                                               GENX(pan_shader_get_compiler_options)(),
-                                               "pan_blend(rt=%d,fmt=%s,nr_samples=%d,%s=%s)",
-                                               rt, util_format_name(rt_state->format),
-                                               rt_state->nr_samples,
-                                               state->logicop_enable ? "logicop" : "equation",
-                                               state->logicop_enable ?
-                                               logicop_str(state->logicop_func) : equation_str);
+   nir_builder b = nir_builder_init_simple_shader(
+      MESA_SHADER_FRAGMENT, GENX(pan_shader_get_compiler_options)(),
+      "pan_blend(rt=%d,fmt=%s,nr_samples=%d,%s=%s)", rt,
+      util_format_name(rt_state->format), rt_state->nr_samples,
+      state->logicop_enable ? "logicop" : "equation",
+      state->logicop_enable ? logicop_str(state->logicop_func) : equation_str);
 
-        const struct util_format_description *format_desc =
-                util_format_description(rt_state->format);
-        nir_alu_type nir_type = pan_unpacked_type_for_format(format_desc);
+   const struct util_format_description *format_desc =
+      util_format_description(rt_state->format);
+   nir_alu_type nir_type = pan_unpacked_type_for_format(format_desc);
 
-        /* Bifrost/Valhall support 16-bit and 32-bit register formats for
-         * LD_TILE/ST_TILE/BLEND, but do not support 8-bit. Rather than making
-         * the fragment output 8-bit and inserting extra conversions in the
-         * compiler, promote the output to 16-bit. The larger size is still
-         * compatible with correct conversion semantics.
-         */
-        if (PAN_ARCH >= 6 && nir_alu_type_get_type_size(nir_type) == 8)
-                nir_type = nir_alu_type_get_base_type(nir_type) | 16;
+   /* Bifrost/Valhall support 16-bit and 32-bit register formats for
+    * LD_TILE/ST_TILE/BLEND, but do not support 8-bit. Rather than making
+    * the fragment output 8-bit and inserting extra conversions in the
+    * compiler, promote the output to 16-bit. The larger size is still
+    * compatible with correct conversion semantics.
+    */
+   if (PAN_ARCH >= 6 && nir_alu_type_get_type_size(nir_type) == 8)
+      nir_type = nir_alu_type_get_base_type(nir_type) | 16;
 
-        enum glsl_base_type glsl_type = nir_get_glsl_base_type_for_nir_type(nir_type);
+   enum glsl_base_type glsl_type =
+      nir_get_glsl_base_type_for_nir_type(nir_type);
 
-        nir_lower_blend_options options = {
-                .logicop_enable = state->logicop_enable,
-                .logicop_func = state->logicop_func,
-                .rt[0].colormask = rt_state->equation.color_mask,
-                .format[0] = rt_state->format,
-        };
+   nir_lower_blend_options options = {
+      .logicop_enable = state->logicop_enable,
+      .logicop_func = state->logicop_func,
+      .rt[0].colormask = rt_state->equation.color_mask,
+      .format[0] = rt_state->format,
+   };
 
-        if (!rt_state->equation.blend_enable) {
-                static const nir_lower_blend_channel replace = {
-                        .func = BLEND_FUNC_ADD,
-                        .src_factor = BLEND_FACTOR_ZERO,
-                        .invert_src_factor = true,
-                        .dst_factor = BLEND_FACTOR_ZERO,
-                        .invert_dst_factor = false,
-                };
+   if (!rt_state->equation.blend_enable) {
+      static const nir_lower_blend_channel replace = {
+         .func = BLEND_FUNC_ADD,
+         .src_factor = BLEND_FACTOR_ZERO,
+         .invert_src_factor = true,
+         .dst_factor = BLEND_FACTOR_ZERO,
+         .invert_dst_factor = false,
+      };
 
-                options.rt[0].rgb = replace;
-                options.rt[0].alpha = replace;
-        } else {
-                options.rt[0].rgb.func = rt_state->equation.rgb_func;
-                options.rt[0].rgb.src_factor = rt_state->equation.rgb_src_factor;
-                options.rt[0].rgb.invert_src_factor = rt_state->equation.rgb_invert_src_factor;
-                options.rt[0].rgb.dst_factor = rt_state->equation.rgb_dst_factor;
-                options.rt[0].rgb.invert_dst_factor = rt_state->equation.rgb_invert_dst_factor;
-                options.rt[0].alpha.func = rt_state->equation.alpha_func;
-                options.rt[0].alpha.src_factor = rt_state->equation.alpha_src_factor;
-                options.rt[0].alpha.invert_src_factor = rt_state->equation.alpha_invert_src_factor;
-                options.rt[0].alpha.dst_factor = rt_state->equation.alpha_dst_factor;
-                options.rt[0].alpha.invert_dst_factor = rt_state->equation.alpha_invert_dst_factor;
-        }
+      options.rt[0].rgb = replace;
+      options.rt[0].alpha = replace;
+   } else {
+      options.rt[0].rgb.func = rt_state->equation.rgb_func;
+      options.rt[0].rgb.src_factor = rt_state->equation.rgb_src_factor;
+      options.rt[0].rgb.invert_src_factor =
+         rt_state->equation.rgb_invert_src_factor;
+      options.rt[0].rgb.dst_factor = rt_state->equation.rgb_dst_factor;
+      options.rt[0].rgb.invert_dst_factor =
+         rt_state->equation.rgb_invert_dst_factor;
+      options.rt[0].alpha.func = rt_state->equation.alpha_func;
+      options.rt[0].alpha.src_factor = rt_state->equation.alpha_src_factor;
+      options.rt[0].alpha.invert_src_factor =
+         rt_state->equation.alpha_invert_src_factor;
+      options.rt[0].alpha.dst_factor = rt_state->equation.alpha_dst_factor;
+      options.rt[0].alpha.invert_dst_factor =
+         rt_state->equation.alpha_invert_dst_factor;
+   }
 
-        nir_alu_type src_types[] = { src0_type ?: nir_type_float32, src1_type ?: nir_type_float32 };
+   nir_alu_type src_types[] = {src0_type ?: nir_type_float32,
+                               src1_type ?: nir_type_float32};
 
-        /* HACK: workaround buggy TGSI shaders (u_blitter) */
-        for (unsigned i = 0; i < ARRAY_SIZE(src_types); ++i) {
-                src_types[i] = nir_alu_type_get_base_type(nir_type) |
-                        nir_alu_type_get_type_size(src_types[i]);
-        }
+   /* HACK: workaround buggy TGSI shaders (u_blitter) */
+   for (unsigned i = 0; i < ARRAY_SIZE(src_types); ++i) {
+      src_types[i] = nir_alu_type_get_base_type(nir_type) |
+                     nir_alu_type_get_type_size(src_types[i]);
+   }
 
-	nir_variable *c_src =
-                nir_variable_create(b.shader, nir_var_shader_in,
-                                    glsl_vector_type(nir_get_glsl_base_type_for_nir_type(src_types[0]), 4),
-                                    "gl_Color");
-        c_src->data.location = VARYING_SLOT_COL0;
-        nir_variable *c_src1 =
-                nir_variable_create(b.shader, nir_var_shader_in,
-                                    glsl_vector_type(nir_get_glsl_base_type_for_nir_type(src_types[1]), 4),
-                                    "gl_Color1");
-        c_src1->data.location = VARYING_SLOT_VAR0;
-        c_src1->data.driver_location = 1;
-        nir_variable *c_out =
-                nir_variable_create(b.shader, nir_var_shader_out,
-                                    glsl_vector_type(glsl_type, 4),
-                                    "gl_FragColor");
-        c_out->data.location = FRAG_RESULT_DATA0;
+   nir_variable *c_src = nir_variable_create(
+      b.shader, nir_var_shader_in,
+      glsl_vector_type(nir_get_glsl_base_type_for_nir_type(src_types[0]), 4),
+      "gl_Color");
+   c_src->data.location = VARYING_SLOT_COL0;
+   nir_variable *c_src1 = nir_variable_create(
+      b.shader, nir_var_shader_in,
+      glsl_vector_type(nir_get_glsl_base_type_for_nir_type(src_types[1]), 4),
+      "gl_Color1");
+   c_src1->data.location = VARYING_SLOT_VAR0;
+   c_src1->data.driver_location = 1;
+   nir_variable *c_out =
+      nir_variable_create(b.shader, nir_var_shader_out,
+                          glsl_vector_type(glsl_type, 4), "gl_FragColor");
+   c_out->data.location = FRAG_RESULT_DATA0;
 
-        nir_ssa_def *s_src[] = {nir_load_var(&b, c_src), nir_load_var(&b, c_src1)};
+   nir_ssa_def *s_src[] = {nir_load_var(&b, c_src), nir_load_var(&b, c_src1)};
 
-        /* On Midgard, the blend shader is responsible for format conversion.
-         * As the OpenGL spec requires integer conversions to saturate, we must
-         * saturate ourselves here. On Bifrost and later, the conversion
-         * hardware handles this automatically.
-         */
-        for (int i = 0; i < ARRAY_SIZE(s_src); ++i) {
-                nir_alu_type T = nir_alu_type_get_base_type(nir_type);
-                bool should_saturate = (PAN_ARCH <= 5) && (T != nir_type_float);
-                s_src[i] = nir_convert_with_rounding(&b, s_src[i],
-                                src_types[i], nir_type,
-                                nir_rounding_mode_undef,
-                                should_saturate);
-        }
+   /* On Midgard, the blend shader is responsible for format conversion.
+    * As the OpenGL spec requires integer conversions to saturate, we must
+    * saturate ourselves here. On Bifrost and later, the conversion
+    * hardware handles this automatically.
+    */
+   for (int i = 0; i < ARRAY_SIZE(s_src); ++i) {
+      nir_alu_type T = nir_alu_type_get_base_type(nir_type);
+      bool should_saturate = (PAN_ARCH <= 5) && (T != nir_type_float);
+      s_src[i] =
+         nir_convert_with_rounding(&b, s_src[i], src_types[i], nir_type,
+                                   nir_rounding_mode_undef, should_saturate);
+   }
 
-        /* Build a trivial blend shader */
-        nir_store_var(&b, c_out, s_src[0], 0xFF);
+   /* Build a trivial blend shader */
+   nir_store_var(&b, c_out, s_src[0], 0xFF);
 
-        options.src1 = s_src[1];
+   options.src1 = s_src[1];
 
-        NIR_PASS_V(b.shader, nir_lower_blend, &options);
-        nir_shader_instructions_pass(b.shader, pan_inline_blend_constants,
-                        nir_metadata_block_index | nir_metadata_dominance,
-                        (void *) state->constants);
+   NIR_PASS_V(b.shader, nir_lower_blend, &options);
+   nir_shader_instructions_pass(
+      b.shader, pan_inline_blend_constants,
+      nir_metadata_block_index | nir_metadata_dominance,
+      (void *)state->constants);
 
-        return b.shader;
+   return b.shader;
 }
 
 #if PAN_ARCH >= 6
@@ -728,55 +725,55 @@ GENX(pan_blend_get_internal_desc)(const struct panfrost_device *dev,
                                   enum pipe_format fmt, unsigned rt,
                                   unsigned force_size, bool dithered)
 {
-        const struct util_format_description *desc = util_format_description(fmt);
-        uint64_t res;
+   const struct util_format_description *desc = util_format_description(fmt);
+   uint64_t res;
 
-        pan_pack(&res, INTERNAL_BLEND, cfg) {
-                cfg.mode = MALI_BLEND_MODE_OPAQUE;
-                cfg.fixed_function.num_comps = desc->nr_channels;
-                cfg.fixed_function.rt = rt;
+   pan_pack(&res, INTERNAL_BLEND, cfg) {
+      cfg.mode = MALI_BLEND_MODE_OPAQUE;
+      cfg.fixed_function.num_comps = desc->nr_channels;
+      cfg.fixed_function.rt = rt;
 
-                nir_alu_type T = pan_unpacked_type_for_format(desc);
+      nir_alu_type T = pan_unpacked_type_for_format(desc);
 
-                if (force_size)
-                        T = nir_alu_type_get_base_type(T) | force_size;
+      if (force_size)
+         T = nir_alu_type_get_base_type(T) | force_size;
 
-                switch (T) {
-                case nir_type_float16:
-                        cfg.fixed_function.conversion.register_format =
-                                MALI_REGISTER_FILE_FORMAT_F16;
-                        break;
-                case nir_type_float32:
-                        cfg.fixed_function.conversion.register_format =
-                                MALI_REGISTER_FILE_FORMAT_F32;
-                        break;
-                case nir_type_int8:
-                case nir_type_int16:
-                        cfg.fixed_function.conversion.register_format =
-                                MALI_REGISTER_FILE_FORMAT_I16;
-                        break;
-                case nir_type_int32:
-                        cfg.fixed_function.conversion.register_format =
-                                MALI_REGISTER_FILE_FORMAT_I32;
-                        break;
-                case nir_type_uint8:
-                case nir_type_uint16:
-                        cfg.fixed_function.conversion.register_format =
-                                MALI_REGISTER_FILE_FORMAT_U16;
-                        break;
-                case nir_type_uint32:
-                        cfg.fixed_function.conversion.register_format =
-                                MALI_REGISTER_FILE_FORMAT_U32;
-                        break;
-                default:
-                        unreachable("Invalid format");
-                }
+      switch (T) {
+      case nir_type_float16:
+         cfg.fixed_function.conversion.register_format =
+            MALI_REGISTER_FILE_FORMAT_F16;
+         break;
+      case nir_type_float32:
+         cfg.fixed_function.conversion.register_format =
+            MALI_REGISTER_FILE_FORMAT_F32;
+         break;
+      case nir_type_int8:
+      case nir_type_int16:
+         cfg.fixed_function.conversion.register_format =
+            MALI_REGISTER_FILE_FORMAT_I16;
+         break;
+      case nir_type_int32:
+         cfg.fixed_function.conversion.register_format =
+            MALI_REGISTER_FILE_FORMAT_I32;
+         break;
+      case nir_type_uint8:
+      case nir_type_uint16:
+         cfg.fixed_function.conversion.register_format =
+            MALI_REGISTER_FILE_FORMAT_U16;
+         break;
+      case nir_type_uint32:
+         cfg.fixed_function.conversion.register_format =
+            MALI_REGISTER_FILE_FORMAT_U32;
+         break;
+      default:
+         unreachable("Invalid format");
+      }
 
-                cfg.fixed_function.conversion.memory_format =
-                         panfrost_format_to_bifrost_blend(dev, fmt, dithered);
-        }
+      cfg.fixed_function.conversion.memory_format =
+         panfrost_format_to_bifrost_blend(dev, fmt, dithered);
+   }
 
-        return res;
+   return res;
 }
 #endif
 
@@ -784,92 +781,93 @@ struct pan_blend_shader_variant *
 GENX(pan_blend_get_shader_locked)(const struct panfrost_device *dev,
                                   const struct pan_blend_state *state,
                                   nir_alu_type src0_type,
-                                  nir_alu_type src1_type,
-                                  unsigned rt)
+                                  nir_alu_type src1_type, unsigned rt)
 {
-        struct pan_blend_shader_key key = {
-                .format = state->rts[rt].format,
-                .src0_type = src0_type,
-                .src1_type = src1_type,
-                .rt = rt,
-                .has_constants = pan_blend_constant_mask(state->rts[rt].equation) != 0,
-                .logicop_enable = state->logicop_enable,
-                .logicop_func = state->logicop_func,
-                .nr_samples = state->rts[rt].nr_samples,
-                .equation = state->rts[rt].equation,
-        };
+   struct pan_blend_shader_key key = {
+      .format = state->rts[rt].format,
+      .src0_type = src0_type,
+      .src1_type = src1_type,
+      .rt = rt,
+      .has_constants = pan_blend_constant_mask(state->rts[rt].equation) != 0,
+      .logicop_enable = state->logicop_enable,
+      .logicop_func = state->logicop_func,
+      .nr_samples = state->rts[rt].nr_samples,
+      .equation = state->rts[rt].equation,
+   };
 
-        /* Blend shaders should only be used for blending on Bifrost onwards */
-        assert(dev->arch <= 5 || !pan_blend_is_opaque(state->rts[rt].equation));
-        assert(state->rts[rt].equation.color_mask != 0);
+   /* Blend shaders should only be used for blending on Bifrost onwards */
+   assert(dev->arch <= 5 || !pan_blend_is_opaque(state->rts[rt].equation));
+   assert(state->rts[rt].equation.color_mask != 0);
 
-        struct hash_entry *he = _mesa_hash_table_search(dev->blend_shaders.shaders, &key);
-        struct pan_blend_shader *shader = he ? he->data : NULL;
+   struct hash_entry *he =
+      _mesa_hash_table_search(dev->blend_shaders.shaders, &key);
+   struct pan_blend_shader *shader = he ? he->data : NULL;
 
-        if (!shader) {
-                shader = rzalloc(dev->blend_shaders.shaders, struct pan_blend_shader);
-                shader->key = key;
-                list_inithead(&shader->variants);
-                _mesa_hash_table_insert(dev->blend_shaders.shaders, &shader->key, shader);
-        }
+   if (!shader) {
+      shader = rzalloc(dev->blend_shaders.shaders, struct pan_blend_shader);
+      shader->key = key;
+      list_inithead(&shader->variants);
+      _mesa_hash_table_insert(dev->blend_shaders.shaders, &shader->key, shader);
+   }
 
-        list_for_each_entry(struct pan_blend_shader_variant, iter,
-                            &shader->variants, node) {
-                if (!key.has_constants ||
-                    !memcmp(iter->constants, state->constants, sizeof(iter->constants))) {
-                        return iter;
-                }
-        }
+   list_for_each_entry(struct pan_blend_shader_variant, iter, &shader->variants,
+                       node) {
+      if (!key.has_constants ||
+          !memcmp(iter->constants, state->constants, sizeof(iter->constants))) {
+         return iter;
+      }
+   }
 
-        struct pan_blend_shader_variant *variant = NULL;
+   struct pan_blend_shader_variant *variant = NULL;
 
-        if (shader->nvariants < PAN_BLEND_SHADER_MAX_VARIANTS) {
-                variant = rzalloc(shader, struct pan_blend_shader_variant);
-                util_dynarray_init(&variant->binary, variant);
-                list_add(&variant->node, &shader->variants);
-                shader->nvariants++;
-        } else {
-                variant = list_last_entry(&shader->variants, struct pan_blend_shader_variant, node);
-                list_del(&variant->node);
-                list_add(&variant->node, &shader->variants);
-                util_dynarray_clear(&variant->binary);
-        }
+   if (shader->nvariants < PAN_BLEND_SHADER_MAX_VARIANTS) {
+      variant = rzalloc(shader, struct pan_blend_shader_variant);
+      util_dynarray_init(&variant->binary, variant);
+      list_add(&variant->node, &shader->variants);
+      shader->nvariants++;
+   } else {
+      variant = list_last_entry(&shader->variants,
+                                struct pan_blend_shader_variant, node);
+      list_del(&variant->node);
+      list_add(&variant->node, &shader->variants);
+      util_dynarray_clear(&variant->binary);
+   }
 
-        memcpy(variant->constants, state->constants, sizeof(variant->constants));
+   memcpy(variant->constants, state->constants, sizeof(variant->constants));
 
-        nir_shader *nir =
-                GENX(pan_blend_create_shader)(dev, state, src0_type, src1_type, rt);
+   nir_shader *nir =
+      GENX(pan_blend_create_shader)(dev, state, src0_type, src1_type, rt);
 
-        /* Compile the NIR shader */
-        struct panfrost_compile_inputs inputs = {
-                .gpu_id = dev->gpu_id,
-                .is_blend = true,
-                .blend.rt = shader->key.rt,
-                .blend.nr_samples = key.nr_samples,
-                .fixed_sysval_ubo = -1,
-                .rt_formats = { key.format },
-        };
+   /* Compile the NIR shader */
+   struct panfrost_compile_inputs inputs = {
+      .gpu_id = dev->gpu_id,
+      .is_blend = true,
+      .blend.rt = shader->key.rt,
+      .blend.nr_samples = key.nr_samples,
+      .fixed_sysval_ubo = -1,
+      .rt_formats = {key.format},
+   };
 
 #if PAN_ARCH >= 6
-        inputs.blend.bifrost_blend_desc =
-                GENX(pan_blend_get_internal_desc)(dev, key.format, key.rt, 0, false);
+   inputs.blend.bifrost_blend_desc =
+      GENX(pan_blend_get_internal_desc)(dev, key.format, key.rt, 0, false);
 #endif
 
-        struct pan_shader_info info;
+   struct pan_shader_info info;
 
-        GENX(pan_shader_compile)(nir, &inputs, &variant->binary, &info);
+   GENX(pan_shader_compile)(nir, &inputs, &variant->binary, &info);
 
-        /* Blend shaders can't have sysvals */
-        assert(info.sysvals.sysval_count == 0);
+   /* Blend shaders can't have sysvals */
+   assert(info.sysvals.sysval_count == 0);
 
-        variant->work_reg_count = info.work_reg_count;
+   variant->work_reg_count = info.work_reg_count;
 
 #if PAN_ARCH <= 5
-        variant->first_tag = info.midgard.first_tag;
+   variant->first_tag = info.midgard.first_tag;
 #endif
 
-        ralloc_free(nir);
+   ralloc_free(nir);
 
-        return variant;
+   return variant;
 }
 #endif /* ifndef PAN_ARCH */
diff --git a/src/panfrost/lib/pan_blend.h b/src/panfrost/lib/pan_blend.h
index 2dec9525c4a..8b826d41b42 100644
--- a/src/panfrost/lib/pan_blend.h
+++ b/src/panfrost/lib/pan_blend.h
@@ -27,10 +27,10 @@
 
 #include "genxml/gen_macros.h"
 
-#include "util/u_dynarray.h"
-#include "util/format/u_format.h"
-#include "compiler/shader_enums.h"
 #include "compiler/nir/nir.h"
+#include "compiler/shader_enums.h"
+#include "util/format/u_format.h"
+#include "util/u_dynarray.h"
 
 #include "panfrost/util/pan_ir.h"
 
@@ -38,84 +38,78 @@ struct MALI_BLEND_EQUATION;
 struct panfrost_device;
 
 struct pan_blend_equation {
-        unsigned blend_enable : 1;
-        enum blend_func rgb_func : 3;
-        unsigned rgb_invert_src_factor : 1;
-        enum blend_factor rgb_src_factor : 4;
-        unsigned rgb_invert_dst_factor : 1;
-        enum blend_factor rgb_dst_factor : 4;
-        enum blend_func alpha_func : 3;
-        unsigned alpha_invert_src_factor : 1;
-        enum blend_factor alpha_src_factor : 4;
-        unsigned alpha_invert_dst_factor : 1;
-        enum blend_factor alpha_dst_factor : 4;
-        unsigned color_mask : 4;
+   unsigned blend_enable              : 1;
+   enum blend_func rgb_func           : 3;
+   unsigned rgb_invert_src_factor     : 1;
+   enum blend_factor rgb_src_factor   : 4;
+   unsigned rgb_invert_dst_factor     : 1;
+   enum blend_factor rgb_dst_factor   : 4;
+   enum blend_func alpha_func         : 3;
+   unsigned alpha_invert_src_factor   : 1;
+   enum blend_factor alpha_src_factor : 4;
+   unsigned alpha_invert_dst_factor   : 1;
+   enum blend_factor alpha_dst_factor : 4;
+   unsigned color_mask                : 4;
 };
 
 struct pan_blend_rt_state {
-        /* RT format */
-        enum pipe_format format;
+   /* RT format */
+   enum pipe_format format;
 
-        /* Number of samples */
-        unsigned nr_samples;
+   /* Number of samples */
+   unsigned nr_samples;
 
-        struct pan_blend_equation equation;
+   struct pan_blend_equation equation;
 };
 
 struct pan_blend_state {
-        bool logicop_enable;
-        enum pipe_logicop logicop_func;
-        float constants[4];
-        unsigned rt_count;
-        struct pan_blend_rt_state rts[8];
+   bool logicop_enable;
+   enum pipe_logicop logicop_func;
+   float constants[4];
+   unsigned rt_count;
+   struct pan_blend_rt_state rts[8];
 };
 
 struct pan_blend_shader_key {
-        enum pipe_format format;
-        nir_alu_type src0_type, src1_type;
-        uint32_t rt : 3;
-        uint32_t has_constants : 1;
-        uint32_t logicop_enable : 1;
-        uint32_t logicop_func:4;
-        uint32_t nr_samples : 5;
-        uint32_t padding : 18;
-        struct pan_blend_equation equation;
+   enum pipe_format format;
+   nir_alu_type src0_type, src1_type;
+   uint32_t rt             : 3;
+   uint32_t has_constants  : 1;
+   uint32_t logicop_enable : 1;
+   uint32_t logicop_func   : 4;
+   uint32_t nr_samples     : 5;
+   uint32_t padding        : 18;
+   struct pan_blend_equation equation;
 };
 
 struct pan_blend_shader_variant {
-        struct list_head node;
-        float constants[4];
-        struct util_dynarray binary;
-        unsigned first_tag;
-        unsigned work_reg_count;
+   struct list_head node;
+   float constants[4];
+   struct util_dynarray binary;
+   unsigned first_tag;
+   unsigned work_reg_count;
 };
 
 #define PAN_BLEND_SHADER_MAX_VARIANTS 32
 
 struct pan_blend_shader {
-        struct pan_blend_shader_key key;
-        unsigned nvariants;
-        struct list_head variants;
+   struct pan_blend_shader_key key;
+   unsigned nvariants;
+   struct list_head variants;
 };
 
-bool
-pan_blend_reads_dest(const struct pan_blend_equation eq);
+bool pan_blend_reads_dest(const struct pan_blend_equation eq);
 
-bool
-pan_blend_can_fixed_function(const struct pan_blend_equation equation,
-                             bool supports_2src);
+bool pan_blend_can_fixed_function(const struct pan_blend_equation equation,
+                                  bool supports_2src);
 
-bool
-pan_blend_is_opaque(const struct pan_blend_equation eq);
+bool pan_blend_is_opaque(const struct pan_blend_equation eq);
 
-bool
-pan_blend_alpha_zero_nop(const struct pan_blend_equation eq);
+bool pan_blend_alpha_zero_nop(const struct pan_blend_equation eq);
 
-bool
-pan_blend_alpha_one_store(const struct pan_blend_equation eq);
+bool pan_blend_alpha_one_store(const struct pan_blend_equation eq);
 
-unsigned
-pan_blend_constant_mask(const struct pan_blend_equation eq);
+unsigned pan_blend_constant_mask(const struct pan_blend_equation eq);
 
 /* Fixed-function blending only supports a single constant, so if multiple bits
  * are set in constant_mask, the constants must match. Therefore we may pick
@@ -124,7 +118,7 @@ pan_blend_constant_mask(const struct pan_blend_equation eq);
 static inline float
 pan_blend_get_constant(unsigned mask, const float *constants)
 {
-        return mask ? constants[ffs(mask) - 1] : 0.0;
+   return mask ? constants[ffs(mask) - 1] : 0.0;
 }
 
 /* v6 doesn't support blend constants in FF blend equations whatsoever, and v7
@@ -134,7 +128,7 @@ pan_blend_get_constant(unsigned mask, const float *constants)
 static inline bool
 pan_blend_supports_constant(unsigned arch, unsigned rt)
 {
-        return !((arch == 6) || (arch == 7 && rt > 0));
+   return !((arch == 6) || (arch == 7 && rt > 0));
 }
 
 /* The SOURCE_2 value is new in Bifrost */
@@ -142,50 +136,39 @@ pan_blend_supports_constant(unsigned arch, unsigned rt)
 static inline bool
 pan_blend_supports_2src(unsigned arch)
 {
-        return (arch >= 6);
+   return (arch >= 6);
 }
 
-bool
-pan_blend_is_homogenous_constant(unsigned mask, const float *constants);
+bool pan_blend_is_homogenous_constant(unsigned mask, const float *constants);
 
-void
-pan_blend_to_fixed_function_equation(const struct pan_blend_equation eq,
-                                     struct MALI_BLEND_EQUATION *equation);
+void pan_blend_to_fixed_function_equation(const struct pan_blend_equation eq,
+                                          struct MALI_BLEND_EQUATION *equation);
 
-uint32_t
-pan_pack_blend(const struct pan_blend_equation equation);
+uint32_t pan_pack_blend(const struct pan_blend_equation equation);
 
-void
-pan_blend_shaders_init(struct panfrost_device *dev);
+void pan_blend_shaders_init(struct panfrost_device *dev);
 
-void
-pan_blend_shaders_cleanup(struct panfrost_device *dev);
+void pan_blend_shaders_cleanup(struct panfrost_device *dev);
 
 #ifdef PAN_ARCH
 
-nir_shader *
-GENX(pan_blend_create_shader)(const struct panfrost_device *dev,
-                              const struct pan_blend_state *state,
-                              nir_alu_type src0_type,
-                              nir_alu_type src1_type,
-                              unsigned rt);
+nir_shader *GENX(pan_blend_create_shader)(const struct panfrost_device *dev,
+                                          const struct pan_blend_state *state,
+                                          nir_alu_type src0_type,
+                                          nir_alu_type src1_type, unsigned rt);
 
 #if PAN_ARCH >= 6
-uint64_t
-GENX(pan_blend_get_internal_desc)(const struct panfrost_device *dev,
-                                  enum pipe_format fmt, unsigned rt,
-                                  unsigned force_size, bool dithered);
+uint64_t GENX(pan_blend_get_internal_desc)(const struct panfrost_device *dev,
+                                           enum pipe_format fmt, unsigned rt,
+                                           unsigned force_size, bool dithered);
 #endif
 
 /* Take blend_shaders.lock before calling this function and release it when
  * you're done with the shader variant object.
  */
-struct pan_blend_shader_variant *
-GENX(pan_blend_get_shader_locked)(const struct panfrost_device *dev,
-                                  const struct pan_blend_state *state,
-                                  nir_alu_type src0_type,
-                                  nir_alu_type src1_type,
-                                  unsigned rt);
+struct pan_blend_shader_variant *GENX(pan_blend_get_shader_locked)(
+   const struct panfrost_device *dev, const struct pan_blend_state *state,
+   nir_alu_type src0_type, nir_alu_type src1_type, unsigned rt);
 #endif
 
 #endif
diff --git a/src/panfrost/lib/pan_blitter.c b/src/panfrost/lib/pan_blitter.c
index e2e2342b5e4..2705bf6acc1 100644
--- a/src/panfrost/lib/pan_blitter.c
+++ b/src/panfrost/lib/pan_blitter.c
@@ -25,18 +25,18 @@
  *   Boris Brezillon <boris.brezillon@collabora.com>
  */
 
+#include "pan_blitter.h"
 #include <math.h>
 #include <stdio.h>
+#include "compiler/nir/nir_builder.h"
+#include "util/u_math.h"
 #include "pan_blend.h"
-#include "pan_blitter.h"
 #include "pan_cs.h"
 #include "pan_encoder.h"
 #include "pan_pool.h"
-#include "pan_shader.h"
 #include "pan_scoreboard.h"
+#include "pan_shader.h"
 #include "pan_texture.h"
-#include "compiler/nir/nir_builder.h"
-#include "util/u_math.h"
 
 #if PAN_ARCH >= 6
 /* On Midgard, the native blit infrastructure (via MFBD preloads) is broken or
@@ -50,153 +50,151 @@
 static enum mali_register_file_format
 blit_type_to_reg_fmt(nir_alu_type in)
 {
-        switch (in) {
-        case nir_type_float32:
-                return MALI_REGISTER_FILE_FORMAT_F32;
-        case nir_type_int32:
-                return MALI_REGISTER_FILE_FORMAT_I32;
-        case nir_type_uint32:
-                return MALI_REGISTER_FILE_FORMAT_U32;
-        default:
-                unreachable("Invalid blit type");
-        }
+   switch (in) {
+   case nir_type_float32:
+      return MALI_REGISTER_FILE_FORMAT_F32;
+   case nir_type_int32:
+      return MALI_REGISTER_FILE_FORMAT_I32;
+   case nir_type_uint32:
+      return MALI_REGISTER_FILE_FORMAT_U32;
+   default:
+      unreachable("Invalid blit type");
+   }
 }
 #endif
 
 struct pan_blit_surface {
-        gl_frag_result loc : 4;
-        nir_alu_type type : 8;
-        enum mali_texture_dimension dim : 2;
-        bool array : 1;
-        unsigned src_samples: 5;
-        unsigned dst_samples: 5;
+   gl_frag_result loc              : 4;
+   nir_alu_type type               : 8;
+   enum mali_texture_dimension dim : 2;
+   bool array                      : 1;
+   unsigned src_samples            : 5;
+   unsigned dst_samples            : 5;
 };
 
 struct pan_blit_shader_key {
-        struct pan_blit_surface surfaces[8];
+   struct pan_blit_surface surfaces[8];
 };
 
 struct pan_blit_shader_data {
-        struct pan_blit_shader_key key;
-        struct pan_shader_info info;
-        mali_ptr address;
-        unsigned blend_ret_offsets[8];
-        nir_alu_type blend_types[8];
+   struct pan_blit_shader_key key;
+   struct pan_shader_info info;
+   mali_ptr address;
+   unsigned blend_ret_offsets[8];
+   nir_alu_type blend_types[8];
 };
 
 struct pan_blit_blend_shader_key {
-        enum pipe_format format;
-        nir_alu_type type;
-        unsigned rt : 3;
-        unsigned nr_samples : 5;
-        unsigned pad : 24;
+   enum pipe_format format;
+   nir_alu_type type;
+   unsigned rt         : 3;
+   unsigned nr_samples : 5;
+   unsigned pad        : 24;
 };
 
 struct pan_blit_blend_shader_data {
-        struct pan_blit_blend_shader_key key;
-        mali_ptr address;
+   struct pan_blit_blend_shader_key key;
+   mali_ptr address;
 };
 
 struct pan_blit_rsd_key {
-        struct {
-                enum pipe_format format;
-                nir_alu_type type : 8;
-                unsigned src_samples : 5;
-                unsigned dst_samples : 5;
-                enum mali_texture_dimension dim : 2;
-                bool array : 1;
-        } rts[8], z, s;
+   struct {
+      enum pipe_format format;
+      nir_alu_type type               : 8;
+      unsigned src_samples            : 5;
+      unsigned dst_samples            : 5;
+      enum mali_texture_dimension dim : 2;
+      bool array                      : 1;
+   } rts[8], z, s;
 };
 
 struct pan_blit_rsd_data {
-        struct pan_blit_rsd_key key;
-        mali_ptr address;
+   struct pan_blit_rsd_key key;
+   mali_ptr address;
 };
 
 #if PAN_ARCH >= 5
 static void
-pan_blitter_emit_blend(const struct panfrost_device *dev,
-                       unsigned rt,
+pan_blitter_emit_blend(const struct panfrost_device *dev, unsigned rt,
                        const struct pan_image_view *iview,
                        const struct pan_blit_shader_data *blit_shader,
-                       mali_ptr blend_shader,
-                       void *out)
+                       mali_ptr blend_shader, void *out)
 {
-        assert(blend_shader == 0 || PAN_ARCH <= 5);
+   assert(blend_shader == 0 || PAN_ARCH <= 5);
 
-        pan_pack(out, BLEND, cfg) {
-                if (!iview) {
-                        cfg.enable = false;
+   pan_pack(out, BLEND, cfg) {
+      if (!iview) {
+         cfg.enable = false;
 #if PAN_ARCH >= 6
-                        cfg.internal.mode = MALI_BLEND_MODE_OFF;
+         cfg.internal.mode = MALI_BLEND_MODE_OFF;
 #endif
-                        continue;
-                }
+         continue;
+      }
 
-                cfg.round_to_fb_precision = true;
-                cfg.srgb = util_format_is_srgb(iview->format);
+      cfg.round_to_fb_precision = true;
+      cfg.srgb = util_format_is_srgb(iview->format);
 
 #if PAN_ARCH >= 6
-                cfg.internal.mode = MALI_BLEND_MODE_OPAQUE;
+      cfg.internal.mode = MALI_BLEND_MODE_OPAQUE;
 #endif
 
-                if (!blend_shader) {
-                        cfg.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
-                        cfg.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
-                        cfg.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
-                        cfg.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
-                        cfg.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
-                        cfg.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
-                        cfg.equation.color_mask = 0xf;
+      if (!blend_shader) {
+         cfg.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
+         cfg.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
+         cfg.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
+         cfg.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
+         cfg.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
+         cfg.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
+         cfg.equation.color_mask = 0xf;
 
 #if PAN_ARCH >= 6
-                        nir_alu_type type = blit_shader->key.surfaces[rt].type;
+         nir_alu_type type = blit_shader->key.surfaces[rt].type;
 
-                        cfg.internal.fixed_function.num_comps = 4;
-                        cfg.internal.fixed_function.conversion.memory_format =
-                                panfrost_format_to_bifrost_blend(dev, iview->format, false);
-                        cfg.internal.fixed_function.conversion.register_format =
-                                blit_type_to_reg_fmt(type);
+         cfg.internal.fixed_function.num_comps = 4;
+         cfg.internal.fixed_function.conversion.memory_format =
+            panfrost_format_to_bifrost_blend(dev, iview->format, false);
+         cfg.internal.fixed_function.conversion.register_format =
+            blit_type_to_reg_fmt(type);
 
-                        cfg.internal.fixed_function.rt = rt;
+         cfg.internal.fixed_function.rt = rt;
 #endif
-                } else {
+      } else {
 #if PAN_ARCH <= 5
-                        cfg.blend_shader = true;
-                        cfg.shader_pc = blend_shader;
+         cfg.blend_shader = true;
+         cfg.shader_pc = blend_shader;
 #endif
-                }
-        }
+      }
+   }
 }
 #endif
 
 struct pan_blitter_views {
-        unsigned rt_count;
-        const struct pan_image_view *src_rts[8];
-        const struct pan_image_view *dst_rts[8];
-        const struct pan_image_view *src_z;
-        const struct pan_image_view *dst_z;
-        const struct pan_image_view *src_s;
-        const struct pan_image_view *dst_s;
+   unsigned rt_count;
+   const struct pan_image_view *src_rts[8];
+   const struct pan_image_view *dst_rts[8];
+   const struct pan_image_view *src_z;
+   const struct pan_image_view *dst_z;
+   const struct pan_image_view *src_s;
+   const struct pan_image_view *dst_s;
 };
 
 static bool
 pan_blitter_is_ms(struct pan_blitter_views *views)
 {
-        for (unsigned i = 0; i < views->rt_count; i++) {
-                if (views->dst_rts[i]) {
-                        if (views->dst_rts[i]->image->layout.nr_samples > 1)
-                               return true;
-                }
-        }
+   for (unsigned i = 0; i < views->rt_count; i++) {
+      if (views->dst_rts[i]) {
+         if (views->dst_rts[i]->image->layout.nr_samples > 1)
+            return true;
+      }
+   }
 
-        if (views->dst_z && views->dst_z->image->layout.nr_samples > 1)
-                return true;
+   if (views->dst_z && views->dst_z->image->layout.nr_samples > 1)
+      return true;
 
-        if (views->dst_s && views->dst_s->image->layout.nr_samples > 1)
-                return true;
+   if (views->dst_s && views->dst_s->image->layout.nr_samples > 1)
+      return true;
 
-        return false;
+   return false;
 }
 
 #if PAN_ARCH >= 5
@@ -204,17 +202,15 @@ static void
 pan_blitter_emit_blends(const struct panfrost_device *dev,
                         const struct pan_blit_shader_data *blit_shader,
                         struct pan_blitter_views *views,
-                        mali_ptr *blend_shaders,
-                        void *out)
+                        mali_ptr *blend_shaders, void *out)
 {
-        for (unsigned i = 0; i < MAX2(views->rt_count, 1); ++i) {
-                void *dest = out + pan_size(BLEND) * i;
-                const struct pan_image_view *rt_view = views->dst_rts[i];
-                mali_ptr blend_shader = blend_shaders ? blend_shaders[i] : 0;
+   for (unsigned i = 0; i < MAX2(views->rt_count, 1); ++i) {
+      void *dest = out + pan_size(BLEND) * i;
+      const struct pan_image_view *rt_view = views->dst_rts[i];
+      mali_ptr blend_shader = blend_shaders ? blend_shaders[i] : 0;
 
-                pan_blitter_emit_blend(dev, i, rt_view, blit_shader,
-                                       blend_shader, dest);
-        }
+      pan_blitter_emit_blend(dev, i, rt_view, blit_shader, blend_shader, dest);
+   }
 }
 #endif
 
@@ -222,169 +218,163 @@ pan_blitter_emit_blends(const struct panfrost_device *dev,
 static void
 pan_blitter_emit_rsd(const struct panfrost_device *dev,
                      const struct pan_blit_shader_data *blit_shader,
-                     struct pan_blitter_views *views,
-                     mali_ptr *blend_shaders,
+                     struct pan_blitter_views *views, mali_ptr *blend_shaders,
                      void *out)
 {
-        UNUSED bool zs = (views->dst_z || views->dst_s);
-        bool ms = pan_blitter_is_ms(views);
+   UNUSED bool zs = (views->dst_z || views->dst_s);
+   bool ms = pan_blitter_is_ms(views);
 
-        pan_pack(out, RENDERER_STATE, cfg) {
-                assert(blit_shader->address);
-                pan_shader_prepare_rsd(&blit_shader->info, blit_shader->address, &cfg);
+   pan_pack(out, RENDERER_STATE, cfg) {
+      assert(blit_shader->address);
+      pan_shader_prepare_rsd(&blit_shader->info, blit_shader->address, &cfg);
 
-                cfg.multisample_misc.sample_mask = 0xFFFF;
-                cfg.multisample_misc.multisample_enable = ms;
-                cfg.multisample_misc.evaluate_per_sample = ms;
-                cfg.multisample_misc.depth_write_mask = views->dst_z != NULL;
-                cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS;
+      cfg.multisample_misc.sample_mask = 0xFFFF;
+      cfg.multisample_misc.multisample_enable = ms;
+      cfg.multisample_misc.evaluate_per_sample = ms;
+      cfg.multisample_misc.depth_write_mask = views->dst_z != NULL;
+      cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS;
 
-                cfg.stencil_mask_misc.stencil_enable = views->dst_s != NULL;
-                cfg.stencil_mask_misc.stencil_mask_front = 0xFF;
-                cfg.stencil_mask_misc.stencil_mask_back = 0xFF;
-                cfg.stencil_front.compare_function = MALI_FUNC_ALWAYS;
-                cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE;
-                cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE;
-                cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE;
-                cfg.stencil_front.mask = 0xFF;
-                cfg.stencil_back = cfg.stencil_front;
+      cfg.stencil_mask_misc.stencil_enable = views->dst_s != NULL;
+      cfg.stencil_mask_misc.stencil_mask_front = 0xFF;
+      cfg.stencil_mask_misc.stencil_mask_back = 0xFF;
+      cfg.stencil_front.compare_function = MALI_FUNC_ALWAYS;
+      cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE;
+      cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE;
+      cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE;
+      cfg.stencil_front.mask = 0xFF;
+      cfg.stencil_back = cfg.stencil_front;
 
 #if PAN_ARCH >= 6
-                if (zs) {
-                        /* Writing Z/S requires late updates */
-                        cfg.properties.zs_update_operation =
-                                MALI_PIXEL_KILL_FORCE_LATE;
-                        cfg.properties.pixel_kill_operation =
-                                MALI_PIXEL_KILL_FORCE_LATE;
-                } else {
-                        /* Skipping ATEST requires forcing Z/S */
-                        cfg.properties.zs_update_operation =
-                                MALI_PIXEL_KILL_STRONG_EARLY;
-                        cfg.properties.pixel_kill_operation =
-                                MALI_PIXEL_KILL_FORCE_EARLY;
-                }
+      if (zs) {
+         /* Writing Z/S requires late updates */
+         cfg.properties.zs_update_operation = MALI_PIXEL_KILL_FORCE_LATE;
+         cfg.properties.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_LATE;
+      } else {
+         /* Skipping ATEST requires forcing Z/S */
+         cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
+         cfg.properties.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
+      }
 
-                /* However, while shaders writing Z/S can normally be killed, on v6
-                 * for frame shaders it can cause GPU timeouts, so only allow colour
-                 * blit shaders to be killed. */
-                cfg.properties.allow_forward_pixel_to_kill = !zs;
+      /* However, while shaders writing Z/S can normally be killed, on v6
+       * for frame shaders it can cause GPU timeouts, so only allow colour
+       * blit shaders to be killed. */
+      cfg.properties.allow_forward_pixel_to_kill = !zs;
 
-                if (PAN_ARCH == 6)
-                        cfg.properties.allow_forward_pixel_to_be_killed = !zs;
+      if (PAN_ARCH == 6)
+         cfg.properties.allow_forward_pixel_to_be_killed = !zs;
 #else
 
-                mali_ptr blend_shader = blend_shaders ?
-                        panfrost_last_nonnull(blend_shaders, MAX2(views->rt_count, 1)) : 0;
+      mali_ptr blend_shader =
+         blend_shaders
+            ? panfrost_last_nonnull(blend_shaders, MAX2(views->rt_count, 1))
+            : 0;
 
-                cfg.properties.work_register_count = 4;
-                cfg.properties.force_early_z = !zs;
-                cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
+      cfg.properties.work_register_count = 4;
+      cfg.properties.force_early_z = !zs;
+      cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
 
-                /* Set even on v5 for erratum workaround */
+      /* Set even on v5 for erratum workaround */
 #if PAN_ARCH == 5
-                cfg.legacy_blend_shader = blend_shader;
+      cfg.legacy_blend_shader = blend_shader;
 #else
-                cfg.blend_shader = blend_shader;
-                cfg.stencil_mask_misc.write_enable = true;
-                cfg.stencil_mask_misc.dither_disable = true;
-                cfg.multisample_misc.blend_shader = !!blend_shader;
-                cfg.blend_shader = blend_shader;
-                if (!cfg.multisample_misc.blend_shader) {
-                        cfg.blend_equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
-                        cfg.blend_equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
-                        cfg.blend_equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
-                        cfg.blend_equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
-                        cfg.blend_equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
-                        cfg.blend_equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
-                        cfg.blend_constant = 0;
+      cfg.blend_shader = blend_shader;
+      cfg.stencil_mask_misc.write_enable = true;
+      cfg.stencil_mask_misc.dither_disable = true;
+      cfg.multisample_misc.blend_shader = !!blend_shader;
+      cfg.blend_shader = blend_shader;
+      if (!cfg.multisample_misc.blend_shader) {
+         cfg.blend_equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
+         cfg.blend_equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
+         cfg.blend_equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
+         cfg.blend_equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
+         cfg.blend_equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
+         cfg.blend_equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
+         cfg.blend_constant = 0;
 
-                        if (views->dst_rts[0] != NULL) {
-                                cfg.stencil_mask_misc.srgb =
-                                        util_format_is_srgb(views->dst_rts[0]->format);
-                                cfg.blend_equation.color_mask = 0xf;
-                        }
-               }
+         if (views->dst_rts[0] != NULL) {
+            cfg.stencil_mask_misc.srgb =
+               util_format_is_srgb(views->dst_rts[0]->format);
+            cfg.blend_equation.color_mask = 0xf;
+         }
+      }
 #endif
 #endif
-        }
+   }
 
 #if PAN_ARCH >= 5
-        pan_blitter_emit_blends(dev, blit_shader, views, blend_shaders,
-                                out + pan_size(RENDERER_STATE));
+   pan_blitter_emit_blends(dev, blit_shader, views, blend_shaders,
+                           out + pan_size(RENDERER_STATE));
 #endif
 }
 #endif
 
 static void
-pan_blitter_get_blend_shaders(struct panfrost_device *dev,
-                              unsigned rt_count,
+pan_blitter_get_blend_shaders(struct panfrost_device *dev, unsigned rt_count,
                               const struct pan_image_view **rts,
                               const struct pan_blit_shader_data *blit_shader,
                               mali_ptr *blend_shaders)
 {
 #if PAN_ARCH <= 5
-        if (!rt_count)
-                return;
+   if (!rt_count)
+      return;
 
-        struct pan_blend_state blend_state = {
-                .rt_count = rt_count,
-        };
+   struct pan_blend_state blend_state = {
+      .rt_count = rt_count,
+   };
 
-        for (unsigned i = 0; i < rt_count; i++) {
-                if (!rts[i] || panfrost_blendable_formats_v7[rts[i]->format].internal)
-                        continue;
+   for (unsigned i = 0; i < rt_count; i++) {
+      if (!rts[i] || panfrost_blendable_formats_v7[rts[i]->format].internal)
+         continue;
 
-                struct pan_blit_blend_shader_key key = {
-                        .format = rts[i]->format,
-                        .rt = i,
-                        .nr_samples = rts[i]->image->layout.nr_samples,
-                        .type = blit_shader->blend_types[i],
-                };
+      struct pan_blit_blend_shader_key key = {
+         .format = rts[i]->format,
+         .rt = i,
+         .nr_samples = rts[i]->image->layout.nr_samples,
+         .type = blit_shader->blend_types[i],
+      };
 
-                pthread_mutex_lock(&dev->blitter.shaders.lock);
-                struct hash_entry *he =
-                        _mesa_hash_table_search(dev->blitter.shaders.blend, &key);
-                struct pan_blit_blend_shader_data *blend_shader = he ? he->data : NULL;
-                if (blend_shader) {
-                         blend_shaders[i] = blend_shader->address;
-                         pthread_mutex_unlock(&dev->blitter.shaders.lock);
-                         continue;
-                }
+      pthread_mutex_lock(&dev->blitter.shaders.lock);
+      struct hash_entry *he =
+         _mesa_hash_table_search(dev->blitter.shaders.blend, &key);
+      struct pan_blit_blend_shader_data *blend_shader = he ? he->data : NULL;
+      if (blend_shader) {
+         blend_shaders[i] = blend_shader->address;
+         pthread_mutex_unlock(&dev->blitter.shaders.lock);
+         continue;
+      }
 
-                blend_shader = rzalloc(dev->blitter.shaders.blend,
-                                       struct pan_blit_blend_shader_data);
-                blend_shader->key = key;
+      blend_shader =
+         rzalloc(dev->blitter.shaders.blend, struct pan_blit_blend_shader_data);
+      blend_shader->key = key;
 
-                blend_state.rts[i] = (struct pan_blend_rt_state) {
-                        .format = rts[i]->format,
-                        .nr_samples = rts[i]->image->layout.nr_samples,
-                        .equation = {
-                                .blend_enable = false,
-                                .color_mask = 0xf,
-                        },
-                };
+      blend_state.rts[i] = (struct pan_blend_rt_state){
+         .format = rts[i]->format,
+         .nr_samples = rts[i]->image->layout.nr_samples,
+         .equation =
+            {
+               .blend_enable = false,
+               .color_mask = 0xf,
+            },
+      };
 
-                pthread_mutex_lock(&dev->blend_shaders.lock);
-                struct pan_blend_shader_variant *b =
-                        GENX(pan_blend_get_shader_locked)(dev, &blend_state,
-                                                          blit_shader->blend_types[i],
-                                                          nir_type_float32, /* unused */
-                                                          i);
+      pthread_mutex_lock(&dev->blend_shaders.lock);
+      struct pan_blend_shader_variant *b = GENX(pan_blend_get_shader_locked)(
+         dev, &blend_state, blit_shader->blend_types[i],
+         nir_type_float32, /* unused */
+         i);
 
-                assert(b->work_reg_count <= 4);
-                struct panfrost_ptr bin =
-                        pan_pool_alloc_aligned(dev->blitter.shaders.pool,
-                                               b->binary.size,
-                                               64);
-                memcpy(bin.cpu, b->binary.data, b->binary.size);
+      assert(b->work_reg_count <= 4);
+      struct panfrost_ptr bin =
+         pan_pool_alloc_aligned(dev->blitter.shaders.pool, b->binary.size, 64);
+      memcpy(bin.cpu, b->binary.data, b->binary.size);
 
-                blend_shader->address = bin.gpu | b->first_tag;
-                pthread_mutex_unlock(&dev->blend_shaders.lock);
-                _mesa_hash_table_insert(dev->blitter.shaders.blend,
-                                        &blend_shader->key, blend_shader);
-                pthread_mutex_unlock(&dev->blitter.shaders.lock);
-                blend_shaders[i] = blend_shader->address;
-        }
+      blend_shader->address = bin.gpu | b->first_tag;
+      pthread_mutex_unlock(&dev->blend_shaders.lock);
+      _mesa_hash_table_insert(dev->blitter.shaders.blend, &blend_shader->key,
+                              blend_shader);
+      pthread_mutex_unlock(&dev->blitter.shaders.lock);
+      blend_shaders[i] = blend_shader->address;
+   }
 #endif
 }
 
@@ -392,287 +382,300 @@ static const struct pan_blit_shader_data *
 pan_blitter_get_blit_shader(struct panfrost_device *dev,
                             const struct pan_blit_shader_key *key)
 {
-        pthread_mutex_lock(&dev->blitter.shaders.lock);
-        struct hash_entry *he = _mesa_hash_table_search(dev->blitter.shaders.blit, key);
-        struct pan_blit_shader_data *shader = he ? he->data : NULL;
+   pthread_mutex_lock(&dev->blitter.shaders.lock);
+   struct hash_entry *he =
+      _mesa_hash_table_search(dev->blitter.shaders.blit, key);
+   struct pan_blit_shader_data *shader = he ? he->data : NULL;
 
-        if (shader)
-                goto out;
+   if (shader)
+      goto out;
 
-        unsigned coord_comps = 0;
-        unsigned sig_offset = 0;
-        char sig[256];
-        bool first = true;
-        for (unsigned i = 0; i < ARRAY_SIZE(key->surfaces); i++) {
-                const char *type_str, *dim_str;
-                if (key->surfaces[i].type == nir_type_invalid)
-                        continue;
+   unsigned coord_comps = 0;
+   unsigned sig_offset = 0;
+   char sig[256];
+   bool first = true;
+   for (unsigned i = 0; i < ARRAY_SIZE(key->surfaces); i++) {
+      const char *type_str, *dim_str;
+      if (key->surfaces[i].type == nir_type_invalid)
+         continue;
 
-                switch (key->surfaces[i].type) {
-                case nir_type_float32: type_str = "float"; break;
-                case nir_type_uint32: type_str = "uint"; break;
-                case nir_type_int32: type_str = "int"; break;
-                default: unreachable("Invalid type\n");
-                }
+      switch (key->surfaces[i].type) {
+      case nir_type_float32:
+         type_str = "float";
+         break;
+      case nir_type_uint32:
+         type_str = "uint";
+         break;
+      case nir_type_int32:
+         type_str = "int";
+         break;
+      default:
+         unreachable("Invalid type\n");
+      }
 
-                switch (key->surfaces[i].dim) {
-                case MALI_TEXTURE_DIMENSION_CUBE: dim_str = "cube"; break;
-                case MALI_TEXTURE_DIMENSION_1D: dim_str = "1D"; break;
-                case MALI_TEXTURE_DIMENSION_2D: dim_str = "2D"; break;
-                case MALI_TEXTURE_DIMENSION_3D: dim_str = "3D"; break;
-                default: unreachable("Invalid dim\n");
-                }
+      switch (key->surfaces[i].dim) {
+      case MALI_TEXTURE_DIMENSION_CUBE:
+         dim_str = "cube";
+         break;
+      case MALI_TEXTURE_DIMENSION_1D:
+         dim_str = "1D";
+         break;
+      case MALI_TEXTURE_DIMENSION_2D:
+         dim_str = "2D";
+         break;
+      case MALI_TEXTURE_DIMENSION_3D:
+         dim_str = "3D";
+         break;
+      default:
+         unreachable("Invalid dim\n");
+      }
 
-                coord_comps = MAX2(coord_comps,
-                                   (key->surfaces[i].dim ? : 3) +
-                                   (key->surfaces[i].array ? 1 : 0));
-                first = false;
+      coord_comps = MAX2(coord_comps, (key->surfaces[i].dim ?: 3) +
+                                         (key->surfaces[i].array ? 1 : 0));
+      first = false;
 
-                if (sig_offset >= sizeof(sig))
-                        continue;
+      if (sig_offset >= sizeof(sig))
+         continue;
 
-                sig_offset += snprintf(sig + sig_offset, sizeof(sig) - sig_offset,
-                                       "%s[%s;%s;%s%s;src_samples=%d,dst_samples=%d]",
-                                       first ? "" : ",",
-                                       gl_frag_result_name(key->surfaces[i].loc),
-                                       type_str, dim_str,
-                                       key->surfaces[i].array ? "[]" : "",
-                                       key->surfaces[i].src_samples,
-                                       key->surfaces[i].dst_samples);
-        }
+      sig_offset +=
+         snprintf(sig + sig_offset, sizeof(sig) - sig_offset,
+                  "%s[%s;%s;%s%s;src_samples=%d,dst_samples=%d]",
+                  first ? "" : ",", gl_frag_result_name(key->surfaces[i].loc),
+                  type_str, dim_str, key->surfaces[i].array ? "[]" : "",
+                  key->surfaces[i].src_samples, key->surfaces[i].dst_samples);
+   }
 
-        nir_builder b =
-                nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
-                                               GENX(pan_shader_get_compiler_options)(),
-                                               "pan_blit(%s)", sig);
-        nir_variable *coord_var =
-                nir_variable_create(b.shader, nir_var_shader_in,
-                                    glsl_vector_type(GLSL_TYPE_FLOAT, coord_comps),
-                                    "coord");
-        coord_var->data.location = VARYING_SLOT_VAR0;
+   nir_builder b = nir_builder_init_simple_shader(
+      MESA_SHADER_FRAGMENT, GENX(pan_shader_get_compiler_options)(),
+      "pan_blit(%s)", sig);
+   nir_variable *coord_var = nir_variable_create(
+      b.shader, nir_var_shader_in,
+      glsl_vector_type(GLSL_TYPE_FLOAT, coord_comps), "coord");
+   coord_var->data.location = VARYING_SLOT_VAR0;
 
-        nir_ssa_def *coord = nir_load_var(&b, coord_var);
+   nir_ssa_def *coord = nir_load_var(&b, coord_var);
 
-        unsigned active_count = 0;
-        for (unsigned i = 0; i < ARRAY_SIZE(key->surfaces); i++) {
-                if (key->surfaces[i].type == nir_type_invalid)
-                        continue;
+   unsigned active_count = 0;
+   for (unsigned i = 0; i < ARRAY_SIZE(key->surfaces); i++) {
+      if (key->surfaces[i].type == nir_type_invalid)
+         continue;
 
-                /* Resolve operations only work for N -> 1 samples. */
-                assert(key->surfaces[i].dst_samples == 1 ||
-                       key->surfaces[i].src_samples == key->surfaces[i].dst_samples);
+      /* Resolve operations only work for N -> 1 samples. */
+      assert(key->surfaces[i].dst_samples == 1 ||
+             key->surfaces[i].src_samples == key->surfaces[i].dst_samples);
 
-                static const char *out_names[] = {
-                        "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7",
-                };
+      static const char *out_names[] = {
+         "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7",
+      };
 
-                unsigned ncomps = key->surfaces[i].loc >= FRAG_RESULT_DATA0 ? 4 : 1;
-                enum glsl_base_type type = nir_get_glsl_base_type_for_nir_type(key->surfaces[i].type);
-                nir_variable *out =
-                        nir_variable_create(b.shader, nir_var_shader_out,
-                                            glsl_vector_type(type, ncomps),
-                                            out_names[active_count]);
-                out->data.location = key->surfaces[i].loc;
-                out->data.driver_location = active_count;
+      unsigned ncomps = key->surfaces[i].loc >= FRAG_RESULT_DATA0 ? 4 : 1;
+      enum glsl_base_type type =
+         nir_get_glsl_base_type_for_nir_type(key->surfaces[i].type);
+      nir_variable *out = nir_variable_create(b.shader, nir_var_shader_out,
+                                              glsl_vector_type(type, ncomps),
+                                              out_names[active_count]);
+      out->data.location = key->surfaces[i].loc;
+      out->data.driver_location = active_count;
 
-                bool resolve = key->surfaces[i].src_samples > key->surfaces[i].dst_samples;
-                bool ms = key->surfaces[i].src_samples > 1;
-                enum glsl_sampler_dim sampler_dim;
+      bool resolve =
+         key->surfaces[i].src_samples > key->surfaces[i].dst_samples;
+      bool ms = key->surfaces[i].src_samples > 1;
+      enum glsl_sampler_dim sampler_dim;
 
-                switch (key->surfaces[i].dim) {
-                case MALI_TEXTURE_DIMENSION_1D:
-                        sampler_dim = GLSL_SAMPLER_DIM_1D;
-                        break;
-                case MALI_TEXTURE_DIMENSION_2D:
-                        sampler_dim = ms ?
-                                      GLSL_SAMPLER_DIM_MS :
-                                      GLSL_SAMPLER_DIM_2D;
-                        break;
-                case MALI_TEXTURE_DIMENSION_3D:
-                        sampler_dim = GLSL_SAMPLER_DIM_3D;
-                        break;
-                case MALI_TEXTURE_DIMENSION_CUBE:
-                        sampler_dim = GLSL_SAMPLER_DIM_CUBE;
-                        break;
-                }
+      switch (key->surfaces[i].dim) {
+      case MALI_TEXTURE_DIMENSION_1D:
+         sampler_dim = GLSL_SAMPLER_DIM_1D;
+         break;
+      case MALI_TEXTURE_DIMENSION_2D:
+         sampler_dim = ms ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
+         break;
+      case MALI_TEXTURE_DIMENSION_3D:
+         sampler_dim = GLSL_SAMPLER_DIM_3D;
+         break;
+      case MALI_TEXTURE_DIMENSION_CUBE:
+         sampler_dim = GLSL_SAMPLER_DIM_CUBE;
+         break;
+      }
 
-                nir_ssa_def *res = NULL;
+      nir_ssa_def *res = NULL;
 
-                if (resolve) {
-                        /* When resolving a float type, we need to calculate
-                         * the average of all samples. For integer resolve, GL
-                         * and Vulkan say that one sample should be chosen
-                         * without telling which. Let's just pick the first one
-                         * in that case.
-                         */
-                        nir_alu_type base_type =
-                                nir_alu_type_get_base_type(key->surfaces[i].type);
-                        unsigned nsamples = base_type == nir_type_float ?
-                                            key->surfaces[i].src_samples : 1;
+      if (resolve) {
+         /* When resolving a float type, we need to calculate
+          * the average of all samples. For integer resolve, GL
+          * and Vulkan say that one sample should be chosen
+          * without telling which. Let's just pick the first one
+          * in that case.
+          */
+         nir_alu_type base_type =
+            nir_alu_type_get_base_type(key->surfaces[i].type);
+         unsigned nsamples =
+            base_type == nir_type_float ? key->surfaces[i].src_samples : 1;
 
-                        for (unsigned s = 0; s < nsamples; s++) {
-                                nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+         for (unsigned s = 0; s < nsamples; s++) {
+            nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
 
-                                tex->op = nir_texop_txf_ms;
-                                tex->dest_type = key->surfaces[i].type;
-                                tex->texture_index = active_count;
-                                tex->is_array = key->surfaces[i].array;
-                                tex->sampler_dim = sampler_dim;
+            tex->op = nir_texop_txf_ms;
+            tex->dest_type = key->surfaces[i].type;
+            tex->texture_index = active_count;
+            tex->is_array = key->surfaces[i].array;
+            tex->sampler_dim = sampler_dim;
 
-                                tex->src[0].src_type = nir_tex_src_coord;
-                                tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord));
-                                tex->coord_components = coord_comps;
+            tex->src[0].src_type = nir_tex_src_coord;
+            tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord));
+            tex->coord_components = coord_comps;
 
-                                tex->src[1].src_type = nir_tex_src_ms_index;
-                                tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, s));
+            tex->src[1].src_type = nir_tex_src_ms_index;
+            tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, s));
 
-                                tex->src[2].src_type = nir_tex_src_lod;
-                                tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0));
-                                nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
-                                nir_builder_instr_insert(&b, &tex->instr);
+            tex->src[2].src_type = nir_tex_src_lod;
+            tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+            nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
+            nir_builder_instr_insert(&b, &tex->instr);
 
-                                res = res ? nir_fadd(&b, res, &tex->dest.ssa) : &tex->dest.ssa;
-			}
+            res = res ? nir_fadd(&b, res, &tex->dest.ssa) : &tex->dest.ssa;
+         }
 
-                        if (base_type == nir_type_float) {
-                                unsigned type_sz =
-                                        nir_alu_type_get_type_size(key->surfaces[i].type);
-                                res = nir_fmul(&b, res,
-                                               nir_imm_floatN_t(&b, 1.0f / nsamples, type_sz));
-                        }
-                } else {
-                        nir_tex_instr *tex =
-                                nir_tex_instr_create(b.shader, ms ? 3 : 1);
+         if (base_type == nir_type_float) {
+            unsigned type_sz =
+               nir_alu_type_get_type_size(key->surfaces[i].type);
+            res = nir_fmul(&b, res,
+                           nir_imm_floatN_t(&b, 1.0f / nsamples, type_sz));
+         }
+      } else {
+         nir_tex_instr *tex = nir_tex_instr_create(b.shader, ms ? 3 : 1);
 
-                        tex->dest_type = key->surfaces[i].type;
-                        tex->texture_index = active_count;
-                        tex->is_array = key->surfaces[i].array;
-                        tex->sampler_dim = sampler_dim;
+         tex->dest_type = key->surfaces[i].type;
+         tex->texture_index = active_count;
+         tex->is_array = key->surfaces[i].array;
+         tex->sampler_dim = sampler_dim;
 
-                        if (ms) {
-                                tex->op = nir_texop_txf_ms;
+         if (ms) {
+            tex->op = nir_texop_txf_ms;
 
-                                tex->src[0].src_type = nir_tex_src_coord;
-                                tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord));
-                                tex->coord_components = coord_comps;
+            tex->src[0].src_type = nir_tex_src_coord;
+            tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord));
+            tex->coord_components = coord_comps;
 
-                                tex->src[1].src_type = nir_tex_src_ms_index;
-                                tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(&b));
+            tex->src[1].src_type = nir_tex_src_ms_index;
+            tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(&b));
 
-                                tex->src[2].src_type = nir_tex_src_lod;
-                                tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0));
-                        } else {
-                                tex->op = nir_texop_txl;
+            tex->src[2].src_type = nir_tex_src_lod;
+            tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+         } else {
+            tex->op = nir_texop_txl;
 
-                                tex->src[0].src_type = nir_tex_src_coord;
-                                tex->src[0].src = nir_src_for_ssa(coord);
-                                tex->coord_components = coord_comps;
-                        }
+            tex->src[0].src_type = nir_tex_src_coord;
+            tex->src[0].src = nir_src_for_ssa(coord);
+            tex->coord_components = coord_comps;
+         }
 
-                        nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
-                        nir_builder_instr_insert(&b, &tex->instr);
-                        res = &tex->dest.ssa;
-                }
+         nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
+         nir_builder_instr_insert(&b, &tex->instr);
+         res = &tex->dest.ssa;
+      }
 
-                assert(res);
+      assert(res);
 
-                if (key->surfaces[i].loc >= FRAG_RESULT_DATA0) {
-                        nir_store_var(&b, out, res, 0xFF);
-                } else {
-                        unsigned c = key->surfaces[i].loc == FRAG_RESULT_STENCIL ? 1 : 0;
-                        nir_store_var(&b, out, nir_channel(&b, res, c), 0xFF);
-                }
-                active_count++;
-        }
+      if (key->surfaces[i].loc >= FRAG_RESULT_DATA0) {
+         nir_store_var(&b, out, res, 0xFF);
+      } else {
+         unsigned c = key->surfaces[i].loc == FRAG_RESULT_STENCIL ? 1 : 0;
+         nir_store_var(&b, out, nir_channel(&b, res, c), 0xFF);
+      }
+      active_count++;
+   }
 
-        struct panfrost_compile_inputs inputs = {
-                .gpu_id = dev->gpu_id,
-                .is_blit = true,
-                .no_idvs = true,
-                .fixed_sysval_ubo = -1,
-        };
-        struct util_dynarray binary;
+   struct panfrost_compile_inputs inputs = {
+      .gpu_id = dev->gpu_id,
+      .is_blit = true,
+      .no_idvs = true,
+      .fixed_sysval_ubo = -1,
+   };
+   struct util_dynarray binary;
 
-        util_dynarray_init(&binary, NULL);
+   util_dynarray_init(&binary, NULL);
 
-        shader = rzalloc(dev->blitter.shaders.blit,
-                         struct pan_blit_shader_data);
+   shader = rzalloc(dev->blitter.shaders.blit, struct pan_blit_shader_data);
 
-        nir_shader_gather_info(b.shader, nir_shader_get_entrypoint(b.shader));
+   nir_shader_gather_info(b.shader, nir_shader_get_entrypoint(b.shader));
 
-        for (unsigned i = 0; i < active_count; ++i)
-                BITSET_SET(b.shader->info.textures_used, i);
+   for (unsigned i = 0; i < active_count; ++i)
+      BITSET_SET(b.shader->info.textures_used, i);
 
-        GENX(pan_shader_compile)(b.shader, &inputs, &binary, &shader->info);
+   GENX(pan_shader_compile)(b.shader, &inputs, &binary, &shader->info);
 
-        /* Blit shaders shouldn't have sysvals */
-        assert(shader->info.sysvals.sysval_count == 0);
+   /* Blit shaders shouldn't have sysvals */
+   assert(shader->info.sysvals.sysval_count == 0);
 
-        shader->key = *key;
-        shader->address =
-                pan_pool_upload_aligned(dev->blitter.shaders.pool,
-                                        binary.data, binary.size,
-                                        PAN_ARCH >= 6 ? 128 : 64);
+   shader->key = *key;
+   shader->address =
+      pan_pool_upload_aligned(dev->blitter.shaders.pool, binary.data,
+                              binary.size, PAN_ARCH >= 6 ? 128 : 64);
 
-        util_dynarray_fini(&binary);
-        ralloc_free(b.shader);
+   util_dynarray_fini(&binary);
+   ralloc_free(b.shader);
 
 #if PAN_ARCH >= 6
-        for (unsigned i = 0; i < ARRAY_SIZE(shader->blend_ret_offsets); i++) {
-                shader->blend_ret_offsets[i] = shader->info.bifrost.blend[i].return_offset;
-                shader->blend_types[i] = shader->info.bifrost.blend[i].type;
-        }
+   for (unsigned i = 0; i < ARRAY_SIZE(shader->blend_ret_offsets); i++) {
+      shader->blend_ret_offsets[i] =
+         shader->info.bifrost.blend[i].return_offset;
+      shader->blend_types[i] = shader->info.bifrost.blend[i].type;
+   }
 #endif
 
-        _mesa_hash_table_insert(dev->blitter.shaders.blit, &shader->key, shader);
+   _mesa_hash_table_insert(dev->blitter.shaders.blit, &shader->key, shader);
 
 out:
-        pthread_mutex_unlock(&dev->blitter.shaders.lock);
-        return shader;
+   pthread_mutex_unlock(&dev->blitter.shaders.lock);
+   return shader;
 }
 
 static struct pan_blit_shader_key
 pan_blitter_get_key(struct pan_blitter_views *views)
 {
-        struct pan_blit_shader_key key = { 0 };
+   struct pan_blit_shader_key key = {0};
 
-        if (views->src_z) {
-                assert(views->dst_z);
-                key.surfaces[0].loc = FRAG_RESULT_DEPTH;
-                key.surfaces[0].type = nir_type_float32;
-                key.surfaces[0].src_samples = views->src_z->image->layout.nr_samples;
-                key.surfaces[0].dst_samples = views->dst_z->image->layout.nr_samples;
-                key.surfaces[0].dim = views->src_z->dim;
-                key.surfaces[0].array = views->src_z->first_layer != views->src_z->last_layer;
-        }
+   if (views->src_z) {
+      assert(views->dst_z);
+      key.surfaces[0].loc = FRAG_RESULT_DEPTH;
+      key.surfaces[0].type = nir_type_float32;
+      key.surfaces[0].src_samples = views->src_z->image->layout.nr_samples;
+      key.surfaces[0].dst_samples = views->dst_z->image->layout.nr_samples;
+      key.surfaces[0].dim = views->src_z->dim;
+      key.surfaces[0].array =
+         views->src_z->first_layer != views->src_z->last_layer;
+   }
 
-        if (views->src_s) {
-                assert(views->dst_s);
-                key.surfaces[1].loc = FRAG_RESULT_STENCIL;
-                key.surfaces[1].type = nir_type_uint32;
-                key.surfaces[1].src_samples = views->src_s->image->layout.nr_samples;
-                key.surfaces[1].dst_samples = views->dst_s->image->layout.nr_samples;
-                key.surfaces[1].dim = views->src_s->dim;
-                key.surfaces[1].array = views->src_s->first_layer != views->src_s->last_layer;
-        }
+   if (views->src_s) {
+      assert(views->dst_s);
+      key.surfaces[1].loc = FRAG_RESULT_STENCIL;
+      key.surfaces[1].type = nir_type_uint32;
+      key.surfaces[1].src_samples = views->src_s->image->layout.nr_samples;
+      key.surfaces[1].dst_samples = views->dst_s->image->layout.nr_samples;
+      key.surfaces[1].dim = views->src_s->dim;
+      key.surfaces[1].array =
+         views->src_s->first_layer != views->src_s->last_layer;
+   }
 
-        for (unsigned i = 0; i < views->rt_count; i++) {
-                if (!views->src_rts[i])
-                        continue;
+   for (unsigned i = 0; i < views->rt_count; i++) {
+      if (!views->src_rts[i])
+         continue;
 
-                assert(views->dst_rts[i]);
-                key.surfaces[i].loc = FRAG_RESULT_DATA0 + i;
-                key.surfaces[i].type =
-                        util_format_is_pure_uint(views->src_rts[i]->format) ? nir_type_uint32 :
-                        util_format_is_pure_sint(views->src_rts[i]->format) ? nir_type_int32 :
-                        nir_type_float32;
-                key.surfaces[i].src_samples = views->src_rts[i]->image->layout.nr_samples;
-                key.surfaces[i].dst_samples = views->dst_rts[i]->image->layout.nr_samples;
-                key.surfaces[i].dim = views->src_rts[i]->dim;
-                key.surfaces[i].array = views->src_rts[i]->first_layer != views->src_rts[i]->last_layer;
-        }
+      assert(views->dst_rts[i]);
+      key.surfaces[i].loc = FRAG_RESULT_DATA0 + i;
+      key.surfaces[i].type =
+         util_format_is_pure_uint(views->src_rts[i]->format) ? nir_type_uint32
+         : util_format_is_pure_sint(views->src_rts[i]->format)
+            ? nir_type_int32
+            : nir_type_float32;
+      key.surfaces[i].src_samples = views->src_rts[i]->image->layout.nr_samples;
+      key.surfaces[i].dst_samples = views->dst_rts[i]->image->layout.nr_samples;
+      key.surfaces[i].dim = views->src_rts[i]->dim;
+      key.surfaces[i].array =
+         views->src_rts[i]->first_layer != views->src_rts[i]->last_layer;
+   }
 
-        return key;
+   return key;
 }
 
 #if PAN_ARCH <= 7
@@ -680,77 +683,75 @@ static mali_ptr
 pan_blitter_get_rsd(struct panfrost_device *dev,
                     struct pan_blitter_views *views)
 {
-        struct pan_blit_rsd_key rsd_key = { 0 };
+   struct pan_blit_rsd_key rsd_key = {0};
 
-        assert(!views->rt_count || (!views->src_z && !views->src_s));
+   assert(!views->rt_count || (!views->src_z && !views->src_s));
 
-        struct pan_blit_shader_key blit_key = pan_blitter_get_key(views);
+   struct pan_blit_shader_key blit_key = pan_blitter_get_key(views);
 
-        if (views->src_z) {
-                assert(views->dst_z);
-                rsd_key.z.format = views->dst_z->format;
-                rsd_key.z.type = blit_key.surfaces[0].type;
-                rsd_key.z.src_samples = blit_key.surfaces[0].src_samples;
-                rsd_key.z.dst_samples = blit_key.surfaces[0].dst_samples;
-                rsd_key.z.dim = blit_key.surfaces[0].dim;
-                rsd_key.z.array = blit_key.surfaces[0].array;
-        }
+   if (views->src_z) {
+      assert(views->dst_z);
+      rsd_key.z.format = views->dst_z->format;
+      rsd_key.z.type = blit_key.surfaces[0].type;
+      rsd_key.z.src_samples = blit_key.surfaces[0].src_samples;
+      rsd_key.z.dst_samples = blit_key.surfaces[0].dst_samples;
+      rsd_key.z.dim = blit_key.surfaces[0].dim;
+      rsd_key.z.array = blit_key.surfaces[0].array;
+   }
 
-        if (views->src_s) {
-                assert(views->dst_s);
-                rsd_key.s.format = views->dst_s->format;
-                rsd_key.s.type = blit_key.surfaces[1].type;
-                rsd_key.s.src_samples = blit_key.surfaces[1].src_samples;
-                rsd_key.s.dst_samples = blit_key.surfaces[1].dst_samples;
-                rsd_key.s.dim = blit_key.surfaces[1].dim;
-                rsd_key.s.array = blit_key.surfaces[1].array;
-        }
+   if (views->src_s) {
+      assert(views->dst_s);
+      rsd_key.s.format = views->dst_s->format;
+      rsd_key.s.type = blit_key.surfaces[1].type;
+      rsd_key.s.src_samples = blit_key.surfaces[1].src_samples;
+      rsd_key.s.dst_samples = blit_key.surfaces[1].dst_samples;
+      rsd_key.s.dim = blit_key.surfaces[1].dim;
+      rsd_key.s.array = blit_key.surfaces[1].array;
+   }
 
-        for (unsigned i = 0; i < views->rt_count; i++) {
-                if (!views->src_rts[i])
-                        continue;
+   for (unsigned i = 0; i < views->rt_count; i++) {
+      if (!views->src_rts[i])
+         continue;
 
-                assert(views->dst_rts[i]);
-                rsd_key.rts[i].format = views->dst_rts[i]->format;
-                rsd_key.rts[i].type = blit_key.surfaces[i].type;
-                rsd_key.rts[i].src_samples = blit_key.surfaces[i].src_samples;
-                rsd_key.rts[i].dst_samples = blit_key.surfaces[i].dst_samples;
-                rsd_key.rts[i].dim = blit_key.surfaces[i].dim;
-                rsd_key.rts[i].array = blit_key.surfaces[i].array;
-        }
+      assert(views->dst_rts[i]);
+      rsd_key.rts[i].format = views->dst_rts[i]->format;
+      rsd_key.rts[i].type = blit_key.surfaces[i].type;
+      rsd_key.rts[i].src_samples = blit_key.surfaces[i].src_samples;
+      rsd_key.rts[i].dst_samples = blit_key.surfaces[i].dst_samples;
+      rsd_key.rts[i].dim = blit_key.surfaces[i].dim;
+      rsd_key.rts[i].array = blit_key.surfaces[i].array;
+   }
 
-        pthread_mutex_lock(&dev->blitter.rsds.lock);
-        struct hash_entry *he =
-                _mesa_hash_table_search(dev->blitter.rsds.rsds, &rsd_key);
-        struct pan_blit_rsd_data *rsd = he ? he->data : NULL;
-        if (rsd)
-                goto out;
+   pthread_mutex_lock(&dev->blitter.rsds.lock);
+   struct hash_entry *he =
+      _mesa_hash_table_search(dev->blitter.rsds.rsds, &rsd_key);
+   struct pan_blit_rsd_data *rsd = he ? he->data : NULL;
+   if (rsd)
+      goto out;
 
-        rsd = rzalloc(dev->blitter.rsds.rsds, struct pan_blit_rsd_data);
-        rsd->key = rsd_key;
+   rsd = rzalloc(dev->blitter.rsds.rsds, struct pan_blit_rsd_data);
+   rsd->key = rsd_key;
 
-        unsigned bd_count = PAN_ARCH >= 5 ? MAX2(views->rt_count, 1) : 0;
-        struct panfrost_ptr rsd_ptr =
-                pan_pool_alloc_desc_aggregate(dev->blitter.rsds.pool,
-                                              PAN_DESC(RENDERER_STATE),
-                                              PAN_DESC_ARRAY(bd_count, BLEND));
+   unsigned bd_count = PAN_ARCH >= 5 ? MAX2(views->rt_count, 1) : 0;
+   struct panfrost_ptr rsd_ptr = pan_pool_alloc_desc_aggregate(
+      dev->blitter.rsds.pool, PAN_DESC(RENDERER_STATE),
+      PAN_DESC_ARRAY(bd_count, BLEND));
 
-        mali_ptr blend_shaders[8] = { 0 };
+   mali_ptr blend_shaders[8] = {0};
 
-        const struct pan_blit_shader_data *blit_shader =
-                pan_blitter_get_blit_shader(dev, &blit_key);
+   const struct pan_blit_shader_data *blit_shader =
+      pan_blitter_get_blit_shader(dev, &blit_key);
 
-        pan_blitter_get_blend_shaders(dev, views->rt_count, views->dst_rts,
-                                      blit_shader, blend_shaders);
+   pan_blitter_get_blend_shaders(dev, views->rt_count, views->dst_rts,
+                                 blit_shader, blend_shaders);
 
-        pan_blitter_emit_rsd(dev, blit_shader, views, blend_shaders,
-                             rsd_ptr.cpu);
-        rsd->address = rsd_ptr.gpu;
-        _mesa_hash_table_insert(dev->blitter.rsds.rsds, &rsd->key, rsd);
+   pan_blitter_emit_rsd(dev, blit_shader, views, blend_shaders, rsd_ptr.cpu);
+   rsd->address = rsd_ptr.gpu;
+   _mesa_hash_table_insert(dev->blitter.rsds.rsds, &rsd->key, rsd);
 
 out:
-        pthread_mutex_unlock(&dev->blitter.rsds.lock);
-        return rsd->address;
+   pthread_mutex_unlock(&dev->blitter.rsds.lock);
+   return rsd->address;
 }
 
 static mali_ptr
@@ -758,246 +759,253 @@ pan_blit_get_rsd(struct panfrost_device *dev,
                  const struct pan_image_view *src_views,
                  const struct pan_image_view *dst_view)
 {
-        const struct util_format_description *desc =
-                util_format_description(src_views[0].format);
+   const struct util_format_description *desc =
+      util_format_description(src_views[0].format);
 
-        struct pan_blitter_views views = { };
+   struct pan_blitter_views views = {};
 
-        if (util_format_has_depth(desc)) {
-                views.src_z = &src_views[0];
-                views.dst_z = dst_view;
-        }
+   if (util_format_has_depth(desc)) {
+      views.src_z = &src_views[0];
+      views.dst_z = dst_view;
+   }
 
-        if (src_views[1].format) {
-                views.src_s = &src_views[1];
-                views.dst_s = dst_view;
-        } else if (util_format_has_stencil(desc)) {
-                views.src_s = &src_views[0];
-                views.dst_s = dst_view;
-        }
+   if (src_views[1].format) {
+      views.src_s = &src_views[1];
+      views.dst_s = dst_view;
+   } else if (util_format_has_stencil(desc)) {
+      views.src_s = &src_views[0];
+      views.dst_s = dst_view;
+   }
 
-        if (!views.src_z && !views.src_s) {
-                views.rt_count = 1;
-                views.src_rts[0] = src_views;
-                views.dst_rts[0] = dst_view;
-        }
+   if (!views.src_z && !views.src_s) {
+      views.rt_count = 1;
+      views.src_rts[0] = src_views;
+      views.dst_rts[0] = dst_view;
+   }
 
-        return pan_blitter_get_rsd(dev, &views);
+   return pan_blitter_get_rsd(dev, &views);
 }
 #endif
 
 static struct pan_blitter_views
-pan_preload_get_views(const struct pan_fb_info *fb, bool zs, struct pan_image_view *patched_s)
+pan_preload_get_views(const struct pan_fb_info *fb, bool zs,
+                      struct pan_image_view *patched_s)
 {
-        struct pan_blitter_views views = { 0 };
+   struct pan_blitter_views views = {0};
 
-        if (zs) {
-                if (fb->zs.preload.z)
-                        views.src_z = views.dst_z = fb->zs.view.zs;
+   if (zs) {
+      if (fb->zs.preload.z)
+         views.src_z = views.dst_z = fb->zs.view.zs;
 
-                if (fb->zs.preload.s) {
-                        const struct pan_image_view *view = fb->zs.view.s ? : fb->zs.view.zs;
-                        enum pipe_format fmt = util_format_get_depth_only(view->format);
+      if (fb->zs.preload.s) {
+         const struct pan_image_view *view = fb->zs.view.s ?: fb->zs.view.zs;
+         enum pipe_format fmt = util_format_get_depth_only(view->format);
 
-                        switch (view->format) {
-                        case PIPE_FORMAT_Z24_UNORM_S8_UINT: fmt = PIPE_FORMAT_X24S8_UINT; break;
-                        case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: fmt = PIPE_FORMAT_X32_S8X24_UINT; break;
-                        default: fmt = view->format; break;
-                        }
+         switch (view->format) {
+         case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+            fmt = PIPE_FORMAT_X24S8_UINT;
+            break;
+         case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+            fmt = PIPE_FORMAT_X32_S8X24_UINT;
+            break;
+         default:
+            fmt = view->format;
+            break;
+         }
 
-                        if (fmt != view->format) {
-                                *patched_s = *view;
-                                patched_s->format = fmt;
-                                views.src_s = views.dst_s = patched_s;
-                        } else {
-                                views.src_s = views.dst_s = view;
-                        }
-                }
-        } else {
-                for (unsigned i = 0; i < fb->rt_count; i++) {
-                        if (fb->rts[i].preload) {
-                                views.src_rts[i] = fb->rts[i].view;
-                                views.dst_rts[i] = fb->rts[i].view;
-                        }
-                }
+         if (fmt != view->format) {
+            *patched_s = *view;
+            patched_s->format = fmt;
+            views.src_s = views.dst_s = patched_s;
+         } else {
+            views.src_s = views.dst_s = view;
+         }
+      }
+   } else {
+      for (unsigned i = 0; i < fb->rt_count; i++) {
+         if (fb->rts[i].preload) {
+            views.src_rts[i] = fb->rts[i].view;
+            views.dst_rts[i] = fb->rts[i].view;
+         }
+      }
 
-                views.rt_count = fb->rt_count;
-        }
+      views.rt_count = fb->rt_count;
+   }
 
-        return views;
+   return views;
 }
 
 static bool
 pan_preload_needed(const struct pan_fb_info *fb, bool zs)
 {
-        if (zs) {
-                if (fb->zs.preload.z || fb->zs.preload.s)
-                        return true;
-        } else {
-                for (unsigned i = 0; i < fb->rt_count; i++) {
-                        if (fb->rts[i].preload)
-                                return true;
-                }
-        }
+   if (zs) {
+      if (fb->zs.preload.z || fb->zs.preload.s)
+         return true;
+   } else {
+      for (unsigned i = 0; i < fb->rt_count; i++) {
+         if (fb->rts[i].preload)
+            return true;
+      }
+   }
 
-        return false;
+   return false;
 }
 
 static mali_ptr
 pan_blitter_emit_varying(struct pan_pool *pool)
 {
-        struct panfrost_ptr varying = pan_pool_alloc_desc(pool, ATTRIBUTE);
+   struct panfrost_ptr varying = pan_pool_alloc_desc(pool, ATTRIBUTE);
 
-        pan_pack(varying.cpu, ATTRIBUTE, cfg) {
-                cfg.buffer_index = 0;
-                cfg.offset_enable = PAN_ARCH <= 5;
-                cfg.format = pool->dev->formats[PIPE_FORMAT_R32G32B32_FLOAT].hw;
+   pan_pack(varying.cpu, ATTRIBUTE, cfg) {
+      cfg.buffer_index = 0;
+      cfg.offset_enable = PAN_ARCH <= 5;
+      cfg.format = pool->dev->formats[PIPE_FORMAT_R32G32B32_FLOAT].hw;
 
 #if PAN_ARCH >= 9
-                cfg.attribute_type = MALI_ATTRIBUTE_TYPE_1D;
-                cfg.table = PAN_TABLE_ATTRIBUTE_BUFFER;
-                cfg.frequency = MALI_ATTRIBUTE_FREQUENCY_VERTEX;
-                cfg.stride = 4 * sizeof(float);
+      cfg.attribute_type = MALI_ATTRIBUTE_TYPE_1D;
+      cfg.table = PAN_TABLE_ATTRIBUTE_BUFFER;
+      cfg.frequency = MALI_ATTRIBUTE_FREQUENCY_VERTEX;
+      cfg.stride = 4 * sizeof(float);
 #endif
-        }
+   }
 
-        return varying.gpu;
+   return varying.gpu;
 }
 
 static mali_ptr
 pan_blitter_emit_varying_buffer(struct pan_pool *pool, mali_ptr coordinates)
 {
 #if PAN_ARCH >= 9
-        struct panfrost_ptr varying_buffer = pan_pool_alloc_desc(pool, BUFFER);
+   struct panfrost_ptr varying_buffer = pan_pool_alloc_desc(pool, BUFFER);
 
-        pan_pack(varying_buffer.cpu, BUFFER, cfg) {
-                cfg.address = coordinates;
-                cfg.size = 4 * sizeof(float) * 4;
-        }
+   pan_pack(varying_buffer.cpu, BUFFER, cfg) {
+      cfg.address = coordinates;
+      cfg.size = 4 * sizeof(float) * 4;
+   }
 #else
-        /* Bifrost needs an empty desc to mark end of prefetching */
-        bool padding_buffer = PAN_ARCH >= 6;
+   /* Bifrost needs an empty desc to mark end of prefetching */
+   bool padding_buffer = PAN_ARCH >= 6;
 
-        struct panfrost_ptr varying_buffer =
-                pan_pool_alloc_desc_array(pool, (padding_buffer ? 2 : 1),
-                                          ATTRIBUTE_BUFFER);
+   struct panfrost_ptr varying_buffer = pan_pool_alloc_desc_array(
+      pool, (padding_buffer ? 2 : 1), ATTRIBUTE_BUFFER);
 
-        pan_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) {
-                cfg.pointer = coordinates;
-                cfg.stride = 4 * sizeof(float);
-                cfg.size = cfg.stride * 4;
-        }
+   pan_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) {
+      cfg.pointer = coordinates;
+      cfg.stride = 4 * sizeof(float);
+      cfg.size = cfg.stride * 4;
+   }
 
-        if (padding_buffer) {
-                pan_pack(varying_buffer.cpu + pan_size(ATTRIBUTE_BUFFER),
-                         ATTRIBUTE_BUFFER, cfg);
-        }
+   if (padding_buffer) {
+      pan_pack(varying_buffer.cpu + pan_size(ATTRIBUTE_BUFFER),
+               ATTRIBUTE_BUFFER, cfg)
+         ;
+   }
 #endif
 
-        return varying_buffer.gpu;
+   return varying_buffer.gpu;
 }
 
 static mali_ptr
-pan_blitter_emit_sampler(struct pan_pool *pool,
-                         bool nearest_filter)
+pan_blitter_emit_sampler(struct pan_pool *pool, bool nearest_filter)
 {
-        struct panfrost_ptr sampler =
-                 pan_pool_alloc_desc(pool, SAMPLER);
+   struct panfrost_ptr sampler = pan_pool_alloc_desc(pool, SAMPLER);
 
-        pan_pack(sampler.cpu, SAMPLER, cfg) {
-                cfg.seamless_cube_map = false;
-                cfg.normalized_coordinates = false;
-                cfg.minify_nearest = nearest_filter;
-                cfg.magnify_nearest = nearest_filter;
-        }
+   pan_pack(sampler.cpu, SAMPLER, cfg) {
+      cfg.seamless_cube_map = false;
+      cfg.normalized_coordinates = false;
+      cfg.minify_nearest = nearest_filter;
+      cfg.magnify_nearest = nearest_filter;
+   }
 
-        return sampler.gpu;
+   return sampler.gpu;
 }
 
 static mali_ptr
-pan_blitter_emit_textures(struct pan_pool *pool,
-                          unsigned tex_count,
+pan_blitter_emit_textures(struct pan_pool *pool, unsigned tex_count,
                           const struct pan_image_view **views)
 {
 #if PAN_ARCH >= 6
-        struct panfrost_ptr textures =
-                pan_pool_alloc_desc_array(pool, tex_count, TEXTURE);
+   struct panfrost_ptr textures =
+      pan_pool_alloc_desc_array(pool, tex_count, TEXTURE);
 
-        for (unsigned i = 0; i < tex_count; i++) {
-                void *texture = textures.cpu + (pan_size(TEXTURE) * i);
-                size_t payload_size =
-                        GENX(panfrost_estimate_texture_payload_size)(views[i]);
-                struct panfrost_ptr surfaces =
-                        pan_pool_alloc_aligned(pool, payload_size, 64);
+   for (unsigned i = 0; i < tex_count; i++) {
+      void *texture = textures.cpu + (pan_size(TEXTURE) * i);
+      size_t payload_size =
+         GENX(panfrost_estimate_texture_payload_size)(views[i]);
+      struct panfrost_ptr surfaces =
+         pan_pool_alloc_aligned(pool, payload_size, 64);
 
-                GENX(panfrost_new_texture)(pool->dev, views[i], texture, &surfaces);
-        }
+      GENX(panfrost_new_texture)(pool->dev, views[i], texture, &surfaces);
+   }
 
-        return textures.gpu;
+   return textures.gpu;
 #else
-        mali_ptr textures[8] = { 0 };
+   mali_ptr textures[8] = {0};
 
-        for (unsigned i = 0; i < tex_count; i++) {
-                size_t sz = pan_size(TEXTURE) +
-                            GENX(panfrost_estimate_texture_payload_size)(views[i]);
-                struct panfrost_ptr texture =
-                        pan_pool_alloc_aligned(pool, sz, pan_alignment(TEXTURE));
-                struct panfrost_ptr surfaces = {
-                        .cpu = texture.cpu + pan_size(TEXTURE),
-                        .gpu = texture.gpu + pan_size(TEXTURE),
-                };
+   for (unsigned i = 0; i < tex_count; i++) {
+      size_t sz = pan_size(TEXTURE) +
+                  GENX(panfrost_estimate_texture_payload_size)(views[i]);
+      struct panfrost_ptr texture =
+         pan_pool_alloc_aligned(pool, sz, pan_alignment(TEXTURE));
+      struct panfrost_ptr surfaces = {
+         .cpu = texture.cpu + pan_size(TEXTURE),
+         .gpu = texture.gpu + pan_size(TEXTURE),
+      };
 
-                GENX(panfrost_new_texture)(pool->dev, views[i], texture.cpu, &surfaces);
-                textures[i] = texture.gpu;
-        }
+      GENX(panfrost_new_texture)(pool->dev, views[i], texture.cpu, &surfaces);
+      textures[i] = texture.gpu;
+   }
 
-        return pan_pool_upload_aligned(pool, textures,
-                                       tex_count * sizeof(mali_ptr),
-                                       sizeof(mali_ptr));
+   return pan_pool_upload_aligned(pool, textures, tex_count * sizeof(mali_ptr),
+                                  sizeof(mali_ptr));
 #endif
 }
 
 static mali_ptr
-pan_preload_emit_textures(struct pan_pool *pool,
-                          const struct pan_fb_info *fb, bool zs,
-                          unsigned *tex_count_out)
+pan_preload_emit_textures(struct pan_pool *pool, const struct pan_fb_info *fb,
+                          bool zs, unsigned *tex_count_out)
 {
-        const struct pan_image_view *views[8];
-        struct pan_image_view patched_s_view;
-        unsigned tex_count = 0;
+   const struct pan_image_view *views[8];
+   struct pan_image_view patched_s_view;
+   unsigned tex_count = 0;
 
-        if (zs) {
-                if (fb->zs.preload.z)
-                        views[tex_count++] = fb->zs.view.zs;
+   if (zs) {
+      if (fb->zs.preload.z)
+         views[tex_count++] = fb->zs.view.zs;
 
-                if (fb->zs.preload.s) {
-                        const struct pan_image_view *view = fb->zs.view.s ? : fb->zs.view.zs;
-                        enum pipe_format fmt = util_format_get_depth_only(view->format);
+      if (fb->zs.preload.s) {
+         const struct pan_image_view *view = fb->zs.view.s ?: fb->zs.view.zs;
+         enum pipe_format fmt = util_format_get_depth_only(view->format);
 
-                        switch (view->format) {
-                        case PIPE_FORMAT_Z24_UNORM_S8_UINT: fmt = PIPE_FORMAT_X24S8_UINT; break;
-                        case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: fmt = PIPE_FORMAT_X32_S8X24_UINT; break;
-                        default: fmt = view->format; break;
-                        }
+         switch (view->format) {
+         case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+            fmt = PIPE_FORMAT_X24S8_UINT;
+            break;
+         case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+            fmt = PIPE_FORMAT_X32_S8X24_UINT;
+            break;
+         default:
+            fmt = view->format;
+            break;
+         }
 
-                        if (fmt != view->format) {
-                                patched_s_view = *view;
-                                patched_s_view.format = fmt;
-                                view = &patched_s_view;
-                        }
-                        views[tex_count++] = view;
-                }
-        } else {
-                for (unsigned i = 0; i < fb->rt_count; i++) {
-                        if (fb->rts[i].preload)
-                                views[tex_count++] = fb->rts[i].view;
-                }
+         if (fmt != view->format) {
+            patched_s_view = *view;
+            patched_s_view.format = fmt;
+            view = &patched_s_view;
+         }
+         views[tex_count++] = view;
+      }
+   } else {
+      for (unsigned i = 0; i < fb->rt_count; i++) {
+         if (fb->rts[i].preload)
+            views[tex_count++] = fb->rts[i].view;
+      }
+   }
 
-        }
+   *tex_count_out = tex_count;
 
-        *tex_count_out = tex_count;
-
-        return pan_blitter_emit_textures(pool, tex_count, views);
+   return pan_blitter_emit_textures(pool, tex_count, views);
 }
 
 #if PAN_ARCH >= 8
@@ -1005,214 +1013,212 @@ pan_preload_emit_textures(struct pan_pool *pool,
 static mali_ptr
 pan_blitter_emit_zs(struct pan_pool *pool, bool z, bool s)
 {
-        struct panfrost_ptr zsd = pan_pool_alloc_desc(pool, DEPTH_STENCIL);
+   struct panfrost_ptr zsd = pan_pool_alloc_desc(pool, DEPTH_STENCIL);
 
-        pan_pack(zsd.cpu, DEPTH_STENCIL, cfg) {
-                cfg.depth_function = MALI_FUNC_ALWAYS;
-                cfg.depth_write_enable = z;
+   pan_pack(zsd.cpu, DEPTH_STENCIL, cfg) {
+      cfg.depth_function = MALI_FUNC_ALWAYS;
+      cfg.depth_write_enable = z;
 
-                if (z)
-                        cfg.depth_source = MALI_DEPTH_SOURCE_SHADER;
+      if (z)
+         cfg.depth_source = MALI_DEPTH_SOURCE_SHADER;
 
-                cfg.stencil_test_enable = s;
-                cfg.stencil_from_shader = s;
+      cfg.stencil_test_enable = s;
+      cfg.stencil_from_shader = s;
 
-                cfg.front_compare_function = MALI_FUNC_ALWAYS;
-                cfg.front_stencil_fail = MALI_STENCIL_OP_REPLACE;
-                cfg.front_depth_fail = MALI_STENCIL_OP_REPLACE;
-                cfg.front_depth_pass = MALI_STENCIL_OP_REPLACE;
-                cfg.front_write_mask = 0xFF;
-                cfg.front_value_mask = 0xFF;
+      cfg.front_compare_function = MALI_FUNC_ALWAYS;
+      cfg.front_stencil_fail = MALI_STENCIL_OP_REPLACE;
+      cfg.front_depth_fail = MALI_STENCIL_OP_REPLACE;
+      cfg.front_depth_pass = MALI_STENCIL_OP_REPLACE;
+      cfg.front_write_mask = 0xFF;
+      cfg.front_value_mask = 0xFF;
 
-                cfg.back_compare_function = MALI_FUNC_ALWAYS;
-                cfg.back_stencil_fail = MALI_STENCIL_OP_REPLACE;
-                cfg.back_depth_fail = MALI_STENCIL_OP_REPLACE;
-                cfg.back_depth_pass = MALI_STENCIL_OP_REPLACE;
-                cfg.back_write_mask = 0xFF;
-                cfg.back_value_mask = 0xFF;
+      cfg.back_compare_function = MALI_FUNC_ALWAYS;
+      cfg.back_stencil_fail = MALI_STENCIL_OP_REPLACE;
+      cfg.back_depth_fail = MALI_STENCIL_OP_REPLACE;
+      cfg.back_depth_pass = MALI_STENCIL_OP_REPLACE;
+      cfg.back_write_mask = 0xFF;
+      cfg.back_value_mask = 0xFF;
 
-                cfg.depth_cull_enable = false;
-        }
+      cfg.depth_cull_enable = false;
+   }
 
-        return zsd.gpu;
+   return zsd.gpu;
 }
 #else
 static mali_ptr
-pan_blitter_emit_viewport(struct pan_pool *pool,
-                          uint16_t minx, uint16_t miny,
+pan_blitter_emit_viewport(struct pan_pool *pool, uint16_t minx, uint16_t miny,
                           uint16_t maxx, uint16_t maxy)
 {
-        struct panfrost_ptr vp = pan_pool_alloc_desc(pool, VIEWPORT);
+   struct panfrost_ptr vp = pan_pool_alloc_desc(pool, VIEWPORT);
 
-        pan_pack(vp.cpu, VIEWPORT, cfg) {
-                cfg.scissor_minimum_x = minx;
-                cfg.scissor_minimum_y = miny;
-                cfg.scissor_maximum_x = maxx;
-                cfg.scissor_maximum_y = maxy;
-        }
+   pan_pack(vp.cpu, VIEWPORT, cfg) {
+      cfg.scissor_minimum_x = minx;
+      cfg.scissor_minimum_y = miny;
+      cfg.scissor_maximum_x = maxx;
+      cfg.scissor_maximum_y = maxy;
+   }
 
-        return vp.gpu;
+   return vp.gpu;
 }
 #endif
 
 static void
-pan_preload_emit_dcd(struct pan_pool *pool,
-                     struct pan_fb_info *fb, bool zs,
-                     mali_ptr coordinates,
-                     mali_ptr tsd, void *out, bool always_write)
+pan_preload_emit_dcd(struct pan_pool *pool, struct pan_fb_info *fb, bool zs,
+                     mali_ptr coordinates, mali_ptr tsd, void *out,
+                     bool always_write)
 {
-        unsigned tex_count = 0;
-        mali_ptr textures = pan_preload_emit_textures(pool, fb, zs, &tex_count);
-        mali_ptr samplers = pan_blitter_emit_sampler(pool, true);
-        mali_ptr varyings = pan_blitter_emit_varying(pool);
-        mali_ptr varying_buffers = pan_blitter_emit_varying_buffer(pool, coordinates);
+   unsigned tex_count = 0;
+   mali_ptr textures = pan_preload_emit_textures(pool, fb, zs, &tex_count);
+   mali_ptr samplers = pan_blitter_emit_sampler(pool, true);
+   mali_ptr varyings = pan_blitter_emit_varying(pool);
+   mali_ptr varying_buffers =
+      pan_blitter_emit_varying_buffer(pool, coordinates);
 
-        /* Tiles updated by blit shaders are still considered clean (separate
-         * for colour and Z/S), allowing us to suppress unnecessary writeback
-         */
-        UNUSED bool clean_fragment_write = !always_write;
+   /* Tiles updated by blit shaders are still considered clean (separate
+    * for colour and Z/S), allowing us to suppress unnecessary writeback
+    */
+   UNUSED bool clean_fragment_write = !always_write;
 
-        /* Image view used when patching stencil formats for combined
-         * depth/stencil preloads.
-         */
-        struct pan_image_view patched_s;
+   /* Image view used when patching stencil formats for combined
+    * depth/stencil preloads.
+    */
+   struct pan_image_view patched_s;
 
-        struct pan_blitter_views views = pan_preload_get_views(fb, zs, &patched_s);
+   struct pan_blitter_views views = pan_preload_get_views(fb, zs, &patched_s);
 
 #if PAN_ARCH <= 7
-        pan_pack(out, DRAW, cfg) {
-                uint16_t minx = 0, miny = 0, maxx, maxy;
+   pan_pack(out, DRAW, cfg) {
+      uint16_t minx = 0, miny = 0, maxx, maxy;
 
-                if (PAN_ARCH == 4) {
-                        maxx = fb->width - 1;
-                        maxy = fb->height - 1;
-                } else {
-                        /* Align on 32x32 tiles */
-                        minx = fb->extent.minx & ~31;
-                        miny = fb->extent.miny & ~31;
-                        maxx = MIN2(ALIGN_POT(fb->extent.maxx + 1, 32), fb->width) - 1;
-                        maxy = MIN2(ALIGN_POT(fb->extent.maxy + 1, 32), fb->height) - 1;
-                }
+      if (PAN_ARCH == 4) {
+         maxx = fb->width - 1;
+         maxy = fb->height - 1;
+      } else {
+         /* Align on 32x32 tiles */
+         minx = fb->extent.minx & ~31;
+         miny = fb->extent.miny & ~31;
+         maxx = MIN2(ALIGN_POT(fb->extent.maxx + 1, 32), fb->width) - 1;
+         maxy = MIN2(ALIGN_POT(fb->extent.maxy + 1, 32), fb->height) - 1;
+      }
 
-                cfg.thread_storage = tsd;
-                cfg.state = pan_blitter_get_rsd(pool->dev, &views);
+      cfg.thread_storage = tsd;
+      cfg.state = pan_blitter_get_rsd(pool->dev, &views);
 
-                cfg.position = coordinates;
-                cfg.viewport =
-                        pan_blitter_emit_viewport(pool, minx, miny, maxx, maxy);
+      cfg.position = coordinates;
+      cfg.viewport = pan_blitter_emit_viewport(pool, minx, miny, maxx, maxy);
 
-                cfg.varyings = varyings;
-                cfg.varying_buffers = varying_buffers;
-                cfg.textures = textures;
-                cfg.samplers = samplers;
+      cfg.varyings = varyings;
+      cfg.varying_buffers = varying_buffers;
+      cfg.textures = textures;
+      cfg.samplers = samplers;
 
 #if PAN_ARCH >= 6
-                cfg.clean_fragment_write = clean_fragment_write;
+      cfg.clean_fragment_write = clean_fragment_write;
 #endif
-        }
+   }
 #else
-        struct panfrost_ptr T;
-        unsigned nr_tables = 12;
+   struct panfrost_ptr T;
+   unsigned nr_tables = 12;
 
-        /* Although individual resources need only 16 byte alignment, the
-         * resource table as a whole must be 64-byte aligned.
-         */
-        T = pan_pool_alloc_aligned(pool, nr_tables * pan_size(RESOURCE), 64);
-        memset(T.cpu, 0, nr_tables * pan_size(RESOURCE));
+   /* Although individual resources need only 16 byte alignment, the
+    * resource table as a whole must be 64-byte aligned.
+    */
+   T = pan_pool_alloc_aligned(pool, nr_tables * pan_size(RESOURCE), 64);
+   memset(T.cpu, 0, nr_tables * pan_size(RESOURCE));
 
-        panfrost_make_resource_table(T, PAN_TABLE_TEXTURE, textures, tex_count);
-        panfrost_make_resource_table(T, PAN_TABLE_SAMPLER, samplers, 1);
-        panfrost_make_resource_table(T, PAN_TABLE_ATTRIBUTE, varyings, 1);
-        panfrost_make_resource_table(T, PAN_TABLE_ATTRIBUTE_BUFFER, varying_buffers, 1);
+   panfrost_make_resource_table(T, PAN_TABLE_TEXTURE, textures, tex_count);
+   panfrost_make_resource_table(T, PAN_TABLE_SAMPLER, samplers, 1);
+   panfrost_make_resource_table(T, PAN_TABLE_ATTRIBUTE, varyings, 1);
+   panfrost_make_resource_table(T, PAN_TABLE_ATTRIBUTE_BUFFER, varying_buffers,
+                                1);
 
-        struct pan_blit_shader_key key = pan_blitter_get_key(&views);
-        const struct pan_blit_shader_data *blit_shader =
-                pan_blitter_get_blit_shader(pool->dev, &key);
+   struct pan_blit_shader_key key = pan_blitter_get_key(&views);
+   const struct pan_blit_shader_data *blit_shader =
+      pan_blitter_get_blit_shader(pool->dev, &key);
 
-        bool z = fb->zs.preload.z;
-        bool s = fb->zs.preload.s;
-        bool ms = pan_blitter_is_ms(&views);
+   bool z = fb->zs.preload.z;
+   bool s = fb->zs.preload.s;
+   bool ms = pan_blitter_is_ms(&views);
 
-        struct panfrost_ptr spd = pan_pool_alloc_desc(pool, SHADER_PROGRAM);
-        pan_pack(spd.cpu, SHADER_PROGRAM, cfg) {
-                cfg.stage = MALI_SHADER_STAGE_FRAGMENT;
-                cfg.primary_shader = true;
-                cfg.register_allocation = MALI_SHADER_REGISTER_ALLOCATION_32_PER_THREAD;
-                cfg.binary = blit_shader->address;
-                cfg.preload.r48_r63 = blit_shader->info.preload >> 48;
-        }
+   struct panfrost_ptr spd = pan_pool_alloc_desc(pool, SHADER_PROGRAM);
+   pan_pack(spd.cpu, SHADER_PROGRAM, cfg) {
+      cfg.stage = MALI_SHADER_STAGE_FRAGMENT;
+      cfg.primary_shader = true;
+      cfg.register_allocation = MALI_SHADER_REGISTER_ALLOCATION_32_PER_THREAD;
+      cfg.binary = blit_shader->address;
+      cfg.preload.r48_r63 = blit_shader->info.preload >> 48;
+   }
 
-        unsigned bd_count = views.rt_count;
-        struct panfrost_ptr blend = pan_pool_alloc_desc_array(pool, bd_count, BLEND);
+   unsigned bd_count = views.rt_count;
+   struct panfrost_ptr blend = pan_pool_alloc_desc_array(pool, bd_count, BLEND);
 
-        if (!zs) {
-                pan_blitter_emit_blends(pool->dev, blit_shader, &views, NULL,
-                                        blend.cpu);
-        }
+   if (!zs) {
+      pan_blitter_emit_blends(pool->dev, blit_shader, &views, NULL, blend.cpu);
+   }
 
-        pan_pack(out, DRAW, cfg) {
-                if (zs) {
-                        /* ZS_EMIT requires late update/kill */
-                        cfg.zs_update_operation = MALI_PIXEL_KILL_FORCE_LATE;
-                        cfg.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_LATE;
-                        cfg.blend_count = 0;
-                } else {
-                        /* Skipping ATEST requires forcing Z/S */
-                        cfg.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
-                        cfg.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
+   pan_pack(out, DRAW, cfg) {
+      if (zs) {
+         /* ZS_EMIT requires late update/kill */
+         cfg.zs_update_operation = MALI_PIXEL_KILL_FORCE_LATE;
+         cfg.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_LATE;
+         cfg.blend_count = 0;
+      } else {
+         /* Skipping ATEST requires forcing Z/S */
+         cfg.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
+         cfg.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
 
-                        cfg.blend = blend.gpu;
-                        cfg.blend_count = bd_count;
-                        cfg.render_target_mask = 0x1;
-                }
+         cfg.blend = blend.gpu;
+         cfg.blend_count = bd_count;
+         cfg.render_target_mask = 0x1;
+      }
 
-                cfg.allow_forward_pixel_to_kill = !zs;
-                cfg.allow_forward_pixel_to_be_killed = true;
-                cfg.depth_stencil = pan_blitter_emit_zs(pool, z, s);
-                cfg.sample_mask = 0xFFFF;
-                cfg.multisample_enable = ms;
-                cfg.evaluate_per_sample = ms;
-                cfg.maximum_z = 1.0;
-                cfg.clean_fragment_write = clean_fragment_write;
-                cfg.shader.resources = T.gpu | nr_tables;
-                cfg.shader.shader = spd.gpu;
-                cfg.shader.thread_storage = tsd;
-        }
+      cfg.allow_forward_pixel_to_kill = !zs;
+      cfg.allow_forward_pixel_to_be_killed = true;
+      cfg.depth_stencil = pan_blitter_emit_zs(pool, z, s);
+      cfg.sample_mask = 0xFFFF;
+      cfg.multisample_enable = ms;
+      cfg.evaluate_per_sample = ms;
+      cfg.maximum_z = 1.0;
+      cfg.clean_fragment_write = clean_fragment_write;
+      cfg.shader.resources = T.gpu | nr_tables;
+      cfg.shader.shader = spd.gpu;
+      cfg.shader.thread_storage = tsd;
+   }
 #endif
 }
 
 #if PAN_ARCH <= 7
 static void *
 pan_blit_emit_tiler_job(struct pan_pool *pool,
-                        struct pan_scoreboard *scoreboard,
-                        mali_ptr tiler,
+                        struct pan_scoreboard *scoreboard, mali_ptr tiler,
                         struct panfrost_ptr *job)
 {
-        *job = pan_pool_alloc_desc(pool, TILER_JOB);
+   *job = pan_pool_alloc_desc(pool, TILER_JOB);
 
-        pan_section_pack(job->cpu, TILER_JOB, PRIMITIVE, cfg) {
-                cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
-                cfg.index_count = 4;
-                cfg.job_task_split = 6;
-        }
+   pan_section_pack(job->cpu, TILER_JOB, PRIMITIVE, cfg) {
+      cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
+      cfg.index_count = 4;
+      cfg.job_task_split = 6;
+   }
 
-        pan_section_pack(job->cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) {
-                cfg.constant = 1.0f;
-        }
+   pan_section_pack(job->cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) {
+      cfg.constant = 1.0f;
+   }
 
-        void *invoc = pan_section_ptr(job->cpu, TILER_JOB, INVOCATION);
-        panfrost_pack_work_groups_compute(invoc, 1, 4, 1, 1, 1, 1, true, false);
+   void *invoc = pan_section_ptr(job->cpu, TILER_JOB, INVOCATION);
+   panfrost_pack_work_groups_compute(invoc, 1, 4, 1, 1, 1, 1, true, false);
 
 #if PAN_ARCH >= 6
-        pan_section_pack(job->cpu, TILER_JOB, PADDING, cfg);
-        pan_section_pack(job->cpu, TILER_JOB, TILER, cfg) {
-                cfg.address = tiler;
-        }
+   pan_section_pack(job->cpu, TILER_JOB, PADDING, cfg)
+      ;
+   pan_section_pack(job->cpu, TILER_JOB, TILER, cfg) {
+      cfg.address = tiler;
+   }
 #endif
 
-        panfrost_add_job(pool, scoreboard, MALI_JOB_TYPE_TILER,
-                         false, false, 0, 0, job, false);
-        return pan_section_ptr(job->cpu, TILER_JOB, DRAW);
+   panfrost_add_job(pool, scoreboard, MALI_JOB_TYPE_TILER, false, false, 0, 0,
+                    job, false);
+   return pan_section_ptr(job->cpu, TILER_JOB, DRAW);
 }
 #endif
 
@@ -1221,172 +1227,157 @@ static void
 pan_preload_fb_alloc_pre_post_dcds(struct pan_pool *desc_pool,
                                    struct pan_fb_info *fb)
 {
-        if (fb->bifrost.pre_post.dcds.gpu)
-                return;
+   if (fb->bifrost.pre_post.dcds.gpu)
+      return;
 
-        fb->bifrost.pre_post.dcds =
-                pan_pool_alloc_desc_array(desc_pool, 3, DRAW);
+   fb->bifrost.pre_post.dcds = pan_pool_alloc_desc_array(desc_pool, 3, DRAW);
 }
 
 static void
 pan_preload_emit_pre_frame_dcd(struct pan_pool *desc_pool,
-                               struct pan_fb_info *fb, bool zs,
-                               mali_ptr coords, mali_ptr tsd)
+                               struct pan_fb_info *fb, bool zs, mali_ptr coords,
+                               mali_ptr tsd)
 {
-        unsigned dcd_idx = zs ? 1 : 0;
-        pan_preload_fb_alloc_pre_post_dcds(desc_pool, fb);
-        assert(fb->bifrost.pre_post.dcds.cpu);
-        void *dcd = fb->bifrost.pre_post.dcds.cpu +
-                    (dcd_idx * pan_size(DRAW));
+   unsigned dcd_idx = zs ? 1 : 0;
+   pan_preload_fb_alloc_pre_post_dcds(desc_pool, fb);
+   assert(fb->bifrost.pre_post.dcds.cpu);
+   void *dcd = fb->bifrost.pre_post.dcds.cpu + (dcd_idx * pan_size(DRAW));
 
-        /* We only use crc_rt to determine whether to force writes for updating
-         * the CRCs, so use a conservative tile size (16x16).
-         */
-        int crc_rt = GENX(pan_select_crc_rt)(fb, 16 * 16);
+   /* We only use crc_rt to determine whether to force writes for updating
+    * the CRCs, so use a conservative tile size (16x16).
+    */
+   int crc_rt = GENX(pan_select_crc_rt)(fb, 16 * 16);
 
-        bool always_write = false;
+   bool always_write = false;
 
-        /* If CRC data is currently invalid and this batch will make it valid,
-         * write even clean tiles to make sure CRC data is updated. */
-        if (crc_rt >= 0) {
-                bool *valid = fb->rts[crc_rt].crc_valid;
-                bool full = !fb->extent.minx && !fb->extent.miny &&
-                        fb->extent.maxx == (fb->width - 1) &&
-                        fb->extent.maxy == (fb->height - 1);
+   /* If CRC data is currently invalid and this batch will make it valid,
+    * write even clean tiles to make sure CRC data is updated. */
+   if (crc_rt >= 0) {
+      bool *valid = fb->rts[crc_rt].crc_valid;
+      bool full = !fb->extent.minx && !fb->extent.miny &&
+                  fb->extent.maxx == (fb->width - 1) &&
+                  fb->extent.maxy == (fb->height - 1);
 
-                if (full && !(*valid))
-                        always_write = true;
-        }
+      if (full && !(*valid))
+         always_write = true;
+   }
 
-        pan_preload_emit_dcd(desc_pool, fb, zs, coords, tsd, dcd, always_write);
-        if (zs) {
-                enum pipe_format fmt = fb->zs.view.zs ?
-                                       fb->zs.view.zs->image->layout.format :
-                                       fb->zs.view.s->image->layout.format;
-                bool always = false;
+   pan_preload_emit_dcd(desc_pool, fb, zs, coords, tsd, dcd, always_write);
+   if (zs) {
+      enum pipe_format fmt = fb->zs.view.zs
+                                ? fb->zs.view.zs->image->layout.format
+                                : fb->zs.view.s->image->layout.format;
+      bool always = false;
 
-                /* If we're dealing with a combined ZS resource and only one
-                 * component is cleared, we need to reload the whole surface
-                 * because the zs_clean_pixel_write_enable flag is set in that
-                 * case.
-                 */
-                if (util_format_is_depth_and_stencil(fmt) &&
-                    fb->zs.clear.z != fb->zs.clear.s)
-                        always = true;
+      /* If we're dealing with a combined ZS resource and only one
+       * component is cleared, we need to reload the whole surface
+       * because the zs_clean_pixel_write_enable flag is set in that
+       * case.
+       */
+      if (util_format_is_depth_and_stencil(fmt) &&
+          fb->zs.clear.z != fb->zs.clear.s)
+         always = true;
 
-                /* We could use INTERSECT on Bifrost v7 too, but
-                 * EARLY_ZS_ALWAYS has the advantage of reloading the ZS tile
-                 * buffer one or more tiles ahead, making ZS data immediately
-                 * available for any ZS tests taking place in other shaders.
-                 * Thing's haven't been benchmarked to determine what's
-                 * preferable (saving bandwidth vs having ZS preloaded
-                 * earlier), so let's leave it like that for now.
-                 */
-                fb->bifrost.pre_post.modes[dcd_idx] =
-                        desc_pool->dev->arch > 6 ?
-                        MALI_PRE_POST_FRAME_SHADER_MODE_EARLY_ZS_ALWAYS :
-                        always ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS :
-                        MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
-        } else {
-                fb->bifrost.pre_post.modes[dcd_idx] =
-                        always_write ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS :
-                        MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
-        }
+      /* We could use INTERSECT on Bifrost v7 too, but
+       * EARLY_ZS_ALWAYS has the advantage of reloading the ZS tile
+       * buffer one or more tiles ahead, making ZS data immediately
+       * available for any ZS tests taking place in other shaders.
+       * Thing's haven't been benchmarked to determine what's
+       * preferable (saving bandwidth vs having ZS preloaded
+       * earlier), so let's leave it like that for now.
+       */
+      fb->bifrost.pre_post.modes[dcd_idx] =
+         desc_pool->dev->arch > 6
+            ? MALI_PRE_POST_FRAME_SHADER_MODE_EARLY_ZS_ALWAYS
+         : always ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS
+                  : MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
+   } else {
+      fb->bifrost.pre_post.modes[dcd_idx] =
+         always_write ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS
+                      : MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
+   }
 }
 #else
 static struct panfrost_ptr
 pan_preload_emit_tiler_job(struct pan_pool *desc_pool,
                            struct pan_scoreboard *scoreboard,
-                           struct pan_fb_info *fb, bool zs,
-                           mali_ptr coords, mali_ptr tsd)
+                           struct pan_fb_info *fb, bool zs, mali_ptr coords,
+                           mali_ptr tsd)
 {
-        struct panfrost_ptr job =
-                pan_pool_alloc_desc(desc_pool, TILER_JOB);
+   struct panfrost_ptr job = pan_pool_alloc_desc(desc_pool, TILER_JOB);
 
-        pan_preload_emit_dcd(desc_pool, fb, zs, coords, tsd,
-                             pan_section_ptr(job.cpu, TILER_JOB, DRAW),
-                             false);
+   pan_preload_emit_dcd(desc_pool, fb, zs, coords, tsd,
+                        pan_section_ptr(job.cpu, TILER_JOB, DRAW), false);
 
-        pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) {
-                cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
-                cfg.index_count = 4;
-                cfg.job_task_split = 6;
-        }
+   pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) {
+      cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
+      cfg.index_count = 4;
+      cfg.job_task_split = 6;
+   }
 
-        pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) {
-                cfg.constant = 1.0f;
-        }
+   pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) {
+      cfg.constant = 1.0f;
+   }
 
-        void *invoc = pan_section_ptr(job.cpu,
-                                      TILER_JOB,
-                                      INVOCATION);
-        panfrost_pack_work_groups_compute(invoc, 1, 4,
-                                          1, 1, 1, 1, true, false);
+   void *invoc = pan_section_ptr(job.cpu, TILER_JOB, INVOCATION);
+   panfrost_pack_work_groups_compute(invoc, 1, 4, 1, 1, 1, 1, true, false);
 
-        panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER,
-                         false, false, 0, 0, &job, true);
-        return job;
+   panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER, false, false, 0,
+                    0, &job, true);
+   return job;
 }
 #endif
 
 static struct panfrost_ptr
-pan_preload_fb_part(struct pan_pool *pool,
-                    struct pan_scoreboard *scoreboard,
-                    struct pan_fb_info *fb, bool zs,
-                    mali_ptr coords, mali_ptr tsd, mali_ptr tiler)
+pan_preload_fb_part(struct pan_pool *pool, struct pan_scoreboard *scoreboard,
+                    struct pan_fb_info *fb, bool zs, mali_ptr coords,
+                    mali_ptr tsd, mali_ptr tiler)
 {
-        struct panfrost_ptr job = { 0 };
+   struct panfrost_ptr job = {0};
 
 #if PAN_ARCH >= 6
-        pan_preload_emit_pre_frame_dcd(pool, fb, zs, coords, tsd);
+   pan_preload_emit_pre_frame_dcd(pool, fb, zs, coords, tsd);
 #else
-        job = pan_preload_emit_tiler_job(pool, scoreboard, fb, zs, coords, tsd);
+   job = pan_preload_emit_tiler_job(pool, scoreboard, fb, zs, coords, tsd);
 #endif
-        return job;
+   return job;
 }
 
 unsigned
-GENX(pan_preload_fb)(struct pan_pool *pool,
-                     struct pan_scoreboard *scoreboard,
-                     struct pan_fb_info *fb,
-                     mali_ptr tsd, mali_ptr tiler,
+GENX(pan_preload_fb)(struct pan_pool *pool, struct pan_scoreboard *scoreboard,
+                     struct pan_fb_info *fb, mali_ptr tsd, mali_ptr tiler,
                      struct panfrost_ptr *jobs)
 {
-        bool preload_zs = pan_preload_needed(fb, true);
-        bool preload_rts = pan_preload_needed(fb, false);
-        mali_ptr coords;
+   bool preload_zs = pan_preload_needed(fb, true);
+   bool preload_rts = pan_preload_needed(fb, false);
+   mali_ptr coords;
 
-        if (!preload_zs && !preload_rts)
-                return 0;
+   if (!preload_zs && !preload_rts)
+      return 0;
 
-        float rect[] = {
-                0.0, 0.0, 0.0, 1.0,
-                fb->width, 0.0, 0.0, 1.0,
-                0.0, fb->height, 0.0, 1.0,
-                fb->width, fb->height, 0.0, 1.0,
-        };
+   float rect[] = {
+      0.0, 0.0,        0.0, 1.0, fb->width, 0.0,        0.0, 1.0,
+      0.0, fb->height, 0.0, 1.0, fb->width, fb->height, 0.0, 1.0,
+   };
 
-        coords = pan_pool_upload_aligned(pool, rect,
-                                         sizeof(rect), 64);
+   coords = pan_pool_upload_aligned(pool, rect, sizeof(rect), 64);
 
-        unsigned njobs = 0;
-        if (preload_zs) {
-                struct panfrost_ptr job =
-                        pan_preload_fb_part(pool, scoreboard, fb, true,
-                                            coords, tsd, tiler);
-                if (jobs && job.cpu)
-                        jobs[njobs++] = job;
-        }
+   unsigned njobs = 0;
+   if (preload_zs) {
+      struct panfrost_ptr job =
+         pan_preload_fb_part(pool, scoreboard, fb, true, coords, tsd, tiler);
+      if (jobs && job.cpu)
+         jobs[njobs++] = job;
+   }
 
-        if (preload_rts) {
-                struct panfrost_ptr job =
-                        pan_preload_fb_part(pool, scoreboard, fb, false,
-                                            coords, tsd, tiler);
-                if (jobs && job.cpu)
-                        jobs[njobs++] = job;
-        }
+   if (preload_rts) {
+      struct panfrost_ptr job =
+         pan_preload_fb_part(pool, scoreboard, fb, false, coords, tsd, tiler);
+      if (jobs && job.cpu)
+         jobs[njobs++] = job;
+   }
 
-        return njobs;
+   return njobs;
 }
 
 #if PAN_ARCH <= 7
@@ -1396,276 +1387,288 @@ GENX(pan_blit_ctx_init)(struct panfrost_device *dev,
                         struct pan_pool *blit_pool,
                         struct pan_blit_context *ctx)
 {
-        memset(ctx, 0, sizeof(*ctx));
+   memset(ctx, 0, sizeof(*ctx));
 
-        struct pan_image_view sviews[2] = {
-                {
-                        .format = info->src.planes[0].format,
-                        .image = info->src.planes[0].image,
-                        .dim = info->src.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_CUBE ?
-                               MALI_TEXTURE_DIMENSION_2D : info->src.planes[0].image->layout.dim,
-                        .first_level = info->src.level,
-                        .last_level = info->src.level,
-                        .first_layer = info->src.start.layer,
-                        .last_layer = info->src.end.layer,
-                        .swizzle = {
-                                PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
-                                PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
-                        },
-                },
-        };
+   struct pan_image_view sviews[2] = {
+      {
+         .format = info->src.planes[0].format,
+         .image = info->src.planes[0].image,
+         .dim =
+            info->src.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_CUBE
+               ? MALI_TEXTURE_DIMENSION_2D
+               : info->src.planes[0].image->layout.dim,
+         .first_level = info->src.level,
+         .last_level = info->src.level,
+         .first_layer = info->src.start.layer,
+         .last_layer = info->src.end.layer,
+         .swizzle =
+            {
+               PIPE_SWIZZLE_X,
+               PIPE_SWIZZLE_Y,
+               PIPE_SWIZZLE_Z,
+               PIPE_SWIZZLE_W,
+            },
+      },
+   };
 
-        struct pan_image_view dview = {
-                .format = info->dst.planes[0].format,
-                .image = info->dst.planes[0].image,
-                .dim = info->dst.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_1D ?
-                       MALI_TEXTURE_DIMENSION_1D : MALI_TEXTURE_DIMENSION_2D,
-                .first_level = info->dst.level,
-                .last_level = info->dst.level,
-                .first_layer = info->dst.start.layer,
-                .last_layer = info->dst.start.layer,
-                .swizzle = {
-                        PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
-                        PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
-                },
-        };
+   struct pan_image_view dview = {
+      .format = info->dst.planes[0].format,
+      .image = info->dst.planes[0].image,
+      .dim = info->dst.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_1D
+                ? MALI_TEXTURE_DIMENSION_1D
+                : MALI_TEXTURE_DIMENSION_2D,
+      .first_level = info->dst.level,
+      .last_level = info->dst.level,
+      .first_layer = info->dst.start.layer,
+      .last_layer = info->dst.start.layer,
+      .swizzle =
+         {
+            PIPE_SWIZZLE_X,
+            PIPE_SWIZZLE_Y,
+            PIPE_SWIZZLE_Z,
+            PIPE_SWIZZLE_W,
+         },
+   };
 
-        ctx->src.start.x = info->src.start.x;
-        ctx->src.start.y = info->src.start.y;
-        ctx->src.end.x = info->src.end.x;
-        ctx->src.end.y = info->src.end.y;
-        ctx->src.dim = sviews[0].dim;
+   ctx->src.start.x = info->src.start.x;
+   ctx->src.start.y = info->src.start.y;
+   ctx->src.end.x = info->src.end.x;
+   ctx->src.end.y = info->src.end.y;
+   ctx->src.dim = sviews[0].dim;
 
-        if (info->dst.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_3D) {
-                unsigned max_z = u_minify(info->dst.planes[0].image->layout.depth, info->dst.level) - 1;
+   if (info->dst.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_3D) {
+      unsigned max_z =
+         u_minify(info->dst.planes[0].image->layout.depth, info->dst.level) - 1;
 
-                ctx->z_scale = (float)(info->src.end.z - info->src.start.z) /
-                               (info->dst.end.z - info->dst.start.z);
-                assert(info->dst.start.z != info->dst.end.z);
-                if (info->dst.start.z > info->dst.end.z) {
-                        ctx->dst.cur_layer = info->dst.start.z - 1;
-                        ctx->dst.last_layer = info->dst.end.z;
-                } else {
-                        ctx->dst.cur_layer = info->dst.start.z;
-                        ctx->dst.last_layer = info->dst.end.z - 1;
-                }
-                ctx->dst.cur_layer = MIN2(MAX2(ctx->dst.cur_layer, 0), max_z);
-                ctx->dst.last_layer = MIN2(MAX2(ctx->dst.last_layer, 0), max_z);
-                ctx->dst.layer_offset = ctx->dst.cur_layer;
-        } else {
-                unsigned max_layer = info->dst.planes[0].image->layout.array_size - 1;
-                ctx->dst.layer_offset = info->dst.start.layer;
-                ctx->dst.cur_layer = info->dst.start.layer;
-                ctx->dst.last_layer = MIN2(info->dst.end.layer, max_layer);
-                ctx->z_scale = 1;
-        }
+      ctx->z_scale = (float)(info->src.end.z - info->src.start.z) /
+                     (info->dst.end.z - info->dst.start.z);
+      assert(info->dst.start.z != info->dst.end.z);
+      if (info->dst.start.z > info->dst.end.z) {
+         ctx->dst.cur_layer = info->dst.start.z - 1;
+         ctx->dst.last_layer = info->dst.end.z;
+      } else {
+         ctx->dst.cur_layer = info->dst.start.z;
+         ctx->dst.last_layer = info->dst.end.z - 1;
+      }
+      ctx->dst.cur_layer = MIN2(MAX2(ctx->dst.cur_layer, 0), max_z);
+      ctx->dst.last_layer = MIN2(MAX2(ctx->dst.last_layer, 0), max_z);
+      ctx->dst.layer_offset = ctx->dst.cur_layer;
+   } else {
+      unsigned max_layer = info->dst.planes[0].image->layout.array_size - 1;
+      ctx->dst.layer_offset = info->dst.start.layer;
+      ctx->dst.cur_layer = info->dst.start.layer;
+      ctx->dst.last_layer = MIN2(info->dst.end.layer, max_layer);
+      ctx->z_scale = 1;
+   }
 
-        if (sviews[0].dim == MALI_TEXTURE_DIMENSION_3D) {
-                if (info->src.start.z < info->src.end.z)
-                        ctx->src.z_offset = info->src.start.z + fabs(ctx->z_scale * 0.5f);
-                else
-                        ctx->src.z_offset = info->src.start.z - fabs(ctx->z_scale * 0.5f);
-        } else {
-                ctx->src.layer_offset = info->src.start.layer;
-        }
+   if (sviews[0].dim == MALI_TEXTURE_DIMENSION_3D) {
+      if (info->src.start.z < info->src.end.z)
+         ctx->src.z_offset = info->src.start.z + fabs(ctx->z_scale * 0.5f);
+      else
+         ctx->src.z_offset = info->src.start.z - fabs(ctx->z_scale * 0.5f);
+   } else {
+      ctx->src.layer_offset = info->src.start.layer;
+   }
 
-        /* Split depth and stencil */
-        if (util_format_is_depth_and_stencil(sviews[0].format)) {
-                sviews[1] = sviews[0];
-                sviews[0].format = util_format_get_depth_only(sviews[0].format);
-                sviews[1].format = util_format_stencil_only(sviews[1].format);
-        } else if (info->src.planes[1].format) {
-                sviews[1] = sviews[0];
-                sviews[1].format = info->src.planes[1].format;
-                sviews[1].image = info->src.planes[1].image;
-        }
+   /* Split depth and stencil */
+   if (util_format_is_depth_and_stencil(sviews[0].format)) {
+      sviews[1] = sviews[0];
+      sviews[0].format = util_format_get_depth_only(sviews[0].format);
+      sviews[1].format = util_format_stencil_only(sviews[1].format);
+   } else if (info->src.planes[1].format) {
+      sviews[1] = sviews[0];
+      sviews[1].format = info->src.planes[1].format;
+      sviews[1].image = info->src.planes[1].image;
+   }
 
-        ctx->rsd = pan_blit_get_rsd(dev, sviews, &dview);
+   ctx->rsd = pan_blit_get_rsd(dev, sviews, &dview);
 
-        ASSERTED unsigned nlayers = info->src.end.layer - info->src.start.layer + 1;
+   ASSERTED unsigned nlayers = info->src.end.layer - info->src.start.layer + 1;
 
-        assert(nlayers == (info->dst.end.layer - info->dst.start.layer + 1));
+   assert(nlayers == (info->dst.end.layer - info->dst.start.layer + 1));
 
-        unsigned dst_w = u_minify(info->dst.planes[0].image->layout.width, info->dst.level);
-        unsigned dst_h = u_minify(info->dst.planes[0].image->layout.height, info->dst.level);
-        unsigned maxx = MIN2(MAX2(info->dst.start.x, info->dst.end.x), dst_w - 1);
-        unsigned maxy = MIN2(MAX2(info->dst.start.y, info->dst.end.y), dst_h - 1);
-        unsigned minx = MAX2(MIN3(info->dst.start.x, info->dst.end.x, maxx), 0);
-        unsigned miny = MAX2(MIN3(info->dst.start.y, info->dst.end.y, maxy), 0);
+   unsigned dst_w =
+      u_minify(info->dst.planes[0].image->layout.width, info->dst.level);
+   unsigned dst_h =
+      u_minify(info->dst.planes[0].image->layout.height, info->dst.level);
+   unsigned maxx = MIN2(MAX2(info->dst.start.x, info->dst.end.x), dst_w - 1);
+   unsigned maxy = MIN2(MAX2(info->dst.start.y, info->dst.end.y), dst_h - 1);
+   unsigned minx = MAX2(MIN3(info->dst.start.x, info->dst.end.x, maxx), 0);
+   unsigned miny = MAX2(MIN3(info->dst.start.y, info->dst.end.y, maxy), 0);
 
-        if (info->scissor.enable) {
-                minx = MAX2(minx, info->scissor.minx);
-                miny = MAX2(miny, info->scissor.miny);
-                maxx = MIN2(maxx, info->scissor.maxx);
-                maxy = MIN2(maxy, info->scissor.maxy);
-        }
+   if (info->scissor.enable) {
+      minx = MAX2(minx, info->scissor.minx);
+      miny = MAX2(miny, info->scissor.miny);
+      maxx = MIN2(maxx, info->scissor.maxx);
+      maxy = MIN2(maxy, info->scissor.maxy);
+   }
 
-        const struct pan_image_view *sview_ptrs[] = { &sviews[0], &sviews[1] };
-        unsigned nviews = sviews[1].format ? 2 : 1;
+   const struct pan_image_view *sview_ptrs[] = {&sviews[0], &sviews[1]};
+   unsigned nviews = sviews[1].format ? 2 : 1;
 
-        ctx->textures = pan_blitter_emit_textures(blit_pool, nviews, sview_ptrs);
-        ctx->samplers = pan_blitter_emit_sampler(blit_pool, info->nearest);
+   ctx->textures = pan_blitter_emit_textures(blit_pool, nviews, sview_ptrs);
+   ctx->samplers = pan_blitter_emit_sampler(blit_pool, info->nearest);
 
-        ctx->vpd = pan_blitter_emit_viewport(blit_pool,
-                                             minx, miny, maxx, maxy);
+   ctx->vpd = pan_blitter_emit_viewport(blit_pool, minx, miny, maxx, maxy);
 
-        float dst_rect[] = {
-                info->dst.start.x, info->dst.start.y, 0.0, 1.0,
-                info->dst.end.x, info->dst.start.y, 0.0, 1.0,
-                info->dst.start.x, info->dst.end.y, 0.0, 1.0,
-                info->dst.end.x, info->dst.end.y, 0.0, 1.0,
-        };
+   float dst_rect[] = {
+      info->dst.start.x, info->dst.start.y, 0.0, 1.0,
+      info->dst.end.x,   info->dst.start.y, 0.0, 1.0,
+      info->dst.start.x, info->dst.end.y,   0.0, 1.0,
+      info->dst.end.x,   info->dst.end.y,   0.0, 1.0,
+   };
 
-        ctx->position =
-                pan_pool_upload_aligned(blit_pool, dst_rect,
-                                        sizeof(dst_rect), 64);
+   ctx->position =
+      pan_pool_upload_aligned(blit_pool, dst_rect, sizeof(dst_rect), 64);
 }
 
 struct panfrost_ptr
-GENX(pan_blit)(struct pan_blit_context *ctx,
-               struct pan_pool *pool,
-               struct pan_scoreboard *scoreboard,
-               mali_ptr tsd, mali_ptr tiler)
+GENX(pan_blit)(struct pan_blit_context *ctx, struct pan_pool *pool,
+               struct pan_scoreboard *scoreboard, mali_ptr tsd, mali_ptr tiler)
 {
-        if (ctx->dst.cur_layer < 0 ||
-            (ctx->dst.last_layer >= ctx->dst.layer_offset &&
-             ctx->dst.cur_layer > ctx->dst.last_layer) ||
-            (ctx->dst.last_layer < ctx->dst.layer_offset &&
-             ctx->dst.cur_layer < ctx->dst.last_layer))
-                return (struct panfrost_ptr){ 0 };
+   if (ctx->dst.cur_layer < 0 ||
+       (ctx->dst.last_layer >= ctx->dst.layer_offset &&
+        ctx->dst.cur_layer > ctx->dst.last_layer) ||
+       (ctx->dst.last_layer < ctx->dst.layer_offset &&
+        ctx->dst.cur_layer < ctx->dst.last_layer))
+      return (struct panfrost_ptr){0};
 
-        int32_t layer = ctx->dst.cur_layer - ctx->dst.layer_offset;
-        float src_z;
-        if (ctx->src.dim == MALI_TEXTURE_DIMENSION_3D)
-                src_z = (ctx->z_scale * layer) + ctx->src.z_offset;
-        else
-                src_z = ctx->src.layer_offset + layer;
+   int32_t layer = ctx->dst.cur_layer - ctx->dst.layer_offset;
+   float src_z;
+   if (ctx->src.dim == MALI_TEXTURE_DIMENSION_3D)
+      src_z = (ctx->z_scale * layer) + ctx->src.z_offset;
+   else
+      src_z = ctx->src.layer_offset + layer;
 
-        float src_rect[] = {
-                ctx->src.start.x, ctx->src.start.y, src_z, 1.0,
-                ctx->src.end.x, ctx->src.start.y, src_z, 1.0,
-                ctx->src.start.x, ctx->src.end.y, src_z, 1.0,
-                ctx->src.end.x, ctx->src.end.y, src_z, 1.0,
-        };
+   float src_rect[] = {
+      ctx->src.start.x, ctx->src.start.y, src_z, 1.0,
+      ctx->src.end.x,   ctx->src.start.y, src_z, 1.0,
+      ctx->src.start.x, ctx->src.end.y,   src_z, 1.0,
+      ctx->src.end.x,   ctx->src.end.y,   src_z, 1.0,
+   };
 
-        mali_ptr src_coords =
-                pan_pool_upload_aligned(pool, src_rect,
-                                        sizeof(src_rect), 64);
+   mali_ptr src_coords =
+      pan_pool_upload_aligned(pool, src_rect, sizeof(src_rect), 64);
 
-        struct panfrost_ptr job = { 0 };
-        void *dcd = pan_blit_emit_tiler_job(pool, scoreboard, tiler, &job);
+   struct panfrost_ptr job = {0};
+   void *dcd = pan_blit_emit_tiler_job(pool, scoreboard, tiler, &job);
 
-        pan_pack(dcd, DRAW, cfg) {
-                cfg.thread_storage = tsd;
-                cfg.state = ctx->rsd;
+   pan_pack(dcd, DRAW, cfg) {
+      cfg.thread_storage = tsd;
+      cfg.state = ctx->rsd;
 
-                cfg.position = ctx->position;
-                cfg.varyings = pan_blitter_emit_varying(pool);
-                cfg.varying_buffers = pan_blitter_emit_varying_buffer(pool, src_coords);
-                cfg.viewport = ctx->vpd;
-                cfg.textures = ctx->textures;
-                cfg.samplers = ctx->samplers;
-        }
+      cfg.position = ctx->position;
+      cfg.varyings = pan_blitter_emit_varying(pool);
+      cfg.varying_buffers = pan_blitter_emit_varying_buffer(pool, src_coords);
+      cfg.viewport = ctx->vpd;
+      cfg.textures = ctx->textures;
+      cfg.samplers = ctx->samplers;
+   }
 
-        return job;
+   return job;
 }
 #endif
 
-static uint32_t pan_blit_shader_key_hash(const void *key)
+static uint32_t
+pan_blit_shader_key_hash(const void *key)
 {
-        return _mesa_hash_data(key, sizeof(struct pan_blit_shader_key));
+   return _mesa_hash_data(key, sizeof(struct pan_blit_shader_key));
 }
 
-static bool pan_blit_shader_key_equal(const void *a, const void *b)
+static bool
+pan_blit_shader_key_equal(const void *a, const void *b)
 {
-        return !memcmp(a, b, sizeof(struct pan_blit_shader_key));
+   return !memcmp(a, b, sizeof(struct pan_blit_shader_key));
 }
 
-static uint32_t pan_blit_blend_shader_key_hash(const void *key)
+static uint32_t
+pan_blit_blend_shader_key_hash(const void *key)
 {
-        return _mesa_hash_data(key, sizeof(struct pan_blit_blend_shader_key));
+   return _mesa_hash_data(key, sizeof(struct pan_blit_blend_shader_key));
 }
 
-static bool pan_blit_blend_shader_key_equal(const void *a, const void *b)
+static bool
+pan_blit_blend_shader_key_equal(const void *a, const void *b)
 {
-        return !memcmp(a, b, sizeof(struct pan_blit_blend_shader_key));
+   return !memcmp(a, b, sizeof(struct pan_blit_blend_shader_key));
 }
 
-static uint32_t pan_blit_rsd_key_hash(const void *key)
+static uint32_t
+pan_blit_rsd_key_hash(const void *key)
 {
-        return _mesa_hash_data(key, sizeof(struct pan_blit_rsd_key));
+   return _mesa_hash_data(key, sizeof(struct pan_blit_rsd_key));
 }
 
-static bool pan_blit_rsd_key_equal(const void *a, const void *b)
+static bool
+pan_blit_rsd_key_equal(const void *a, const void *b)
 {
-        return !memcmp(a, b, sizeof(struct pan_blit_rsd_key));
+   return !memcmp(a, b, sizeof(struct pan_blit_rsd_key));
 }
 
 static void
 pan_blitter_prefill_blit_shader_cache(struct panfrost_device *dev)
 {
-        static const struct pan_blit_shader_key prefill[] = {
-                {
-                        .surfaces[0] = {
-                                .loc = FRAG_RESULT_DEPTH,
-                                .type = nir_type_float32,
-                                .dim = MALI_TEXTURE_DIMENSION_2D,
-                                .src_samples = 1,
-                                .dst_samples = 1,
-                        },
-                },
-                {
-                        .surfaces[1] = {
-                                .loc = FRAG_RESULT_STENCIL,
-                                .type = nir_type_uint32,
-                                .dim = MALI_TEXTURE_DIMENSION_2D,
-                                .src_samples = 1,
-                                .dst_samples = 1,
-                        },
-                },
-                {
-                        .surfaces[0] = {
-                                .loc = FRAG_RESULT_DATA0,
-                                .type = nir_type_float32,
-                                .dim = MALI_TEXTURE_DIMENSION_2D,
-                                .src_samples = 1,
-                                .dst_samples = 1,
-                        },
-                },
-        };
+   static const struct pan_blit_shader_key prefill[] = {
+      {
+         .surfaces[0] =
+            {
+               .loc = FRAG_RESULT_DEPTH,
+               .type = nir_type_float32,
+               .dim = MALI_TEXTURE_DIMENSION_2D,
+               .src_samples = 1,
+               .dst_samples = 1,
+            },
+      },
+      {
+         .surfaces[1] =
+            {
+               .loc = FRAG_RESULT_STENCIL,
+               .type = nir_type_uint32,
+               .dim = MALI_TEXTURE_DIMENSION_2D,
+               .src_samples = 1,
+               .dst_samples = 1,
+            },
+      },
+      {
+         .surfaces[0] =
+            {
+               .loc = FRAG_RESULT_DATA0,
+               .type = nir_type_float32,
+               .dim = MALI_TEXTURE_DIMENSION_2D,
+               .src_samples = 1,
+               .dst_samples = 1,
+            },
+      },
+   };
 
-        for (unsigned i = 0; i < ARRAY_SIZE(prefill); i++)
-                pan_blitter_get_blit_shader(dev, &prefill[i]);
+   for (unsigned i = 0; i < ARRAY_SIZE(prefill); i++)
+      pan_blitter_get_blit_shader(dev, &prefill[i]);
 }
 
 void
-GENX(pan_blitter_init)(struct panfrost_device *dev,
-                       struct pan_pool *bin_pool,
+GENX(pan_blitter_init)(struct panfrost_device *dev, struct pan_pool *bin_pool,
                        struct pan_pool *desc_pool)
 {
-        dev->blitter.shaders.blit =
-                _mesa_hash_table_create(NULL, pan_blit_shader_key_hash,
-                                        pan_blit_shader_key_equal);
-        dev->blitter.shaders.blend =
-                _mesa_hash_table_create(NULL, pan_blit_blend_shader_key_hash,
-                                        pan_blit_blend_shader_key_equal);
-        dev->blitter.shaders.pool = bin_pool;
-        pthread_mutex_init(&dev->blitter.shaders.lock, NULL);
-        pan_blitter_prefill_blit_shader_cache(dev);
+   dev->blitter.shaders.blit = _mesa_hash_table_create(
+      NULL, pan_blit_shader_key_hash, pan_blit_shader_key_equal);
+   dev->blitter.shaders.blend = _mesa_hash_table_create(
+      NULL, pan_blit_blend_shader_key_hash, pan_blit_blend_shader_key_equal);
+   dev->blitter.shaders.pool = bin_pool;
+   pthread_mutex_init(&dev->blitter.shaders.lock, NULL);
+   pan_blitter_prefill_blit_shader_cache(dev);
 
-        dev->blitter.rsds.pool = desc_pool;
-        dev->blitter.rsds.rsds =
-                _mesa_hash_table_create(NULL, pan_blit_rsd_key_hash,
-                                        pan_blit_rsd_key_equal);
-        pthread_mutex_init(&dev->blitter.rsds.lock, NULL);
+   dev->blitter.rsds.pool = desc_pool;
+   dev->blitter.rsds.rsds = _mesa_hash_table_create(NULL, pan_blit_rsd_key_hash,
+                                                    pan_blit_rsd_key_equal);
+   pthread_mutex_init(&dev->blitter.rsds.lock, NULL);
 }
 
 void
 GENX(pan_blitter_cleanup)(struct panfrost_device *dev)
 {
-        _mesa_hash_table_destroy(dev->blitter.shaders.blit, NULL);
-        _mesa_hash_table_destroy(dev->blitter.shaders.blend, NULL);
-        pthread_mutex_destroy(&dev->blitter.shaders.lock);
-        _mesa_hash_table_destroy(dev->blitter.rsds.rsds, NULL);
-        pthread_mutex_destroy(&dev->blitter.rsds.lock);
+   _mesa_hash_table_destroy(dev->blitter.shaders.blit, NULL);
+   _mesa_hash_table_destroy(dev->blitter.shaders.blend, NULL);
+   pthread_mutex_destroy(&dev->blitter.shaders.lock);
+   _mesa_hash_table_destroy(dev->blitter.rsds.rsds, NULL);
+   pthread_mutex_destroy(&dev->blitter.rsds.lock);
 }
diff --git a/src/panfrost/lib/pan_blitter.h b/src/panfrost/lib/pan_blitter.h
index cb71161f5c5..6381a90f574 100644
--- a/src/panfrost/lib/pan_blitter.h
+++ b/src/panfrost/lib/pan_blitter.h
@@ -27,12 +27,12 @@
 
 #include "genxml/gen_macros.h"
 
-#include "panfrost-job.h"
+#include "util/format/u_format.h"
 #include "pan_cs.h"
 #include "pan_pool.h"
 #include "pan_texture.h"
 #include "pan_util.h"
-#include "util/format/u_format.h"
+#include "panfrost-job.h"
 
 struct pan_fb_info;
 struct pan_scoreboard;
@@ -40,90 +40,84 @@ struct pan_pool;
 struct panfrost_device;
 
 struct pan_blit_info {
-        struct {
-                struct {
-                        const struct pan_image *image;
-                        enum pipe_format format;
-                } planes[2];
-                unsigned level;
-                struct {
-                        int32_t x, y, z;
-                        unsigned layer;
-                } start, end;
-        } src, dst;
-        struct {
-               bool enable;
-               uint16_t minx, miny, maxx, maxy;
-        } scissor;
-        bool nearest;
+   struct {
+      struct {
+         const struct pan_image *image;
+         enum pipe_format format;
+      } planes[2];
+      unsigned level;
+      struct {
+         int32_t x, y, z;
+         unsigned layer;
+      } start, end;
+   } src, dst;
+   struct {
+      bool enable;
+      uint16_t minx, miny, maxx, maxy;
+   } scissor;
+   bool nearest;
 };
 
 struct pan_blit_context {
-        mali_ptr rsd, vpd;
-        mali_ptr textures;
-        mali_ptr samplers;
-        mali_ptr position;
-        struct {
-                enum mali_texture_dimension dim;
-                struct {
-                        float x, y;
-                } start, end;
-                union {
-                        unsigned layer_offset;
-                        float z_offset;
-                };
-        } src;
-        struct {
-                int32_t layer_offset;
-                int32_t cur_layer;
-                int32_t last_layer;
-        } dst;
-        float z_scale;
+   mali_ptr rsd, vpd;
+   mali_ptr textures;
+   mali_ptr samplers;
+   mali_ptr position;
+   struct {
+      enum mali_texture_dimension dim;
+      struct {
+         float x, y;
+      } start, end;
+      union {
+         unsigned layer_offset;
+         float z_offset;
+      };
+   } src;
+   struct {
+      int32_t layer_offset;
+      int32_t cur_layer;
+      int32_t last_layer;
+   } dst;
+   float z_scale;
 };
 
-void
-GENX(pan_blitter_init)(struct panfrost_device *dev,
-                       struct pan_pool *bin_pool,
-                       struct pan_pool *desc_pool);
+void GENX(pan_blitter_init)(struct panfrost_device *dev,
+                            struct pan_pool *bin_pool,
+                            struct pan_pool *desc_pool);
 
-void
-GENX(pan_blitter_cleanup)(struct panfrost_device *dev);
+void GENX(pan_blitter_cleanup)(struct panfrost_device *dev);
 
-unsigned
-GENX(pan_preload_fb)(struct pan_pool *desc_pool,
-                     struct pan_scoreboard *scoreboard,
-                     struct pan_fb_info *fb,
-                     mali_ptr tsd, mali_ptr tiler,
-                     struct panfrost_ptr *jobs);
+unsigned GENX(pan_preload_fb)(struct pan_pool *desc_pool,
+                              struct pan_scoreboard *scoreboard,
+                              struct pan_fb_info *fb, mali_ptr tsd,
+                              mali_ptr tiler, struct panfrost_ptr *jobs);
 
-void
-GENX(pan_blit_ctx_init)(struct panfrost_device *dev,
-                        const struct pan_blit_info *info,
-                        struct pan_pool *blit_pool,
-                        struct pan_blit_context *ctx);
+void GENX(pan_blit_ctx_init)(struct panfrost_device *dev,
+                             const struct pan_blit_info *info,
+                             struct pan_pool *blit_pool,
+                             struct pan_blit_context *ctx);
 
 static inline bool
 pan_blit_next_surface(struct pan_blit_context *ctx)
 {
-        if (ctx->dst.last_layer < ctx->dst.layer_offset) {
-                if (ctx->dst.cur_layer <= ctx->dst.last_layer)
-                        return false;
+   if (ctx->dst.last_layer < ctx->dst.layer_offset) {
+      if (ctx->dst.cur_layer <= ctx->dst.last_layer)
+         return false;
 
-                ctx->dst.cur_layer--;
-        } else {
-                if (ctx->dst.cur_layer >= ctx->dst.last_layer)
-                        return false;
+      ctx->dst.cur_layer--;
+   } else {
+      if (ctx->dst.cur_layer >= ctx->dst.last_layer)
+         return false;
 
-                ctx->dst.cur_layer++;
-        }
+      ctx->dst.cur_layer++;
+   }
 
-        return true;
+   return true;
 }
 
-struct panfrost_ptr
-GENX(pan_blit)(struct pan_blit_context *ctx,
-               struct pan_pool *pool,
-               struct pan_scoreboard *scoreboard,
-               mali_ptr tsd, mali_ptr tiler);
+struct panfrost_ptr GENX(pan_blit)(struct pan_blit_context *ctx,
+                                   struct pan_pool *pool,
+                                   struct pan_scoreboard *scoreboard,
+                                   mali_ptr tsd, mali_ptr tiler);
 
 #endif
diff --git a/src/panfrost/lib/pan_bo.c b/src/panfrost/lib/pan_bo.c
index b606d1b0359..145a039092a 100644
--- a/src/panfrost/lib/pan_bo.c
+++ b/src/panfrost/lib/pan_bo.c
@@ -24,10 +24,10 @@
  *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
  */
 #include <errno.h>
-#include <stdio.h>
 #include <fcntl.h>
-#include <xf86drm.h>
 #include <pthread.h>
+#include <stdio.h>
+#include <xf86drm.h>
 #include "drm-uapi/panfrost_drm.h"
 
 #include "pan_bo.h"
@@ -56,53 +56,53 @@
  */
 
 static struct panfrost_bo *
-panfrost_bo_alloc(struct panfrost_device *dev, size_t size,
-                  uint32_t flags, const char *label)
+panfrost_bo_alloc(struct panfrost_device *dev, size_t size, uint32_t flags,
+                  const char *label)
 {
-        struct drm_panfrost_create_bo create_bo = { .size = size };
-        struct panfrost_bo *bo;
-        int ret;
+   struct drm_panfrost_create_bo create_bo = {.size = size};
+   struct panfrost_bo *bo;
+   int ret;
 
-        if (dev->kernel_version->version_major > 1 ||
-            dev->kernel_version->version_minor >= 1) {
-                if (flags & PAN_BO_GROWABLE)
-                        create_bo.flags |= PANFROST_BO_HEAP;
-                if (!(flags & PAN_BO_EXECUTE))
-                        create_bo.flags |= PANFROST_BO_NOEXEC;
-        }
+   if (dev->kernel_version->version_major > 1 ||
+       dev->kernel_version->version_minor >= 1) {
+      if (flags & PAN_BO_GROWABLE)
+         create_bo.flags |= PANFROST_BO_HEAP;
+      if (!(flags & PAN_BO_EXECUTE))
+         create_bo.flags |= PANFROST_BO_NOEXEC;
+   }
 
-        ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_CREATE_BO, &create_bo);
-        if (ret) {
-                fprintf(stderr, "DRM_IOCTL_PANFROST_CREATE_BO failed: %m\n");
-                return NULL;
-        }
+   ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_CREATE_BO, &create_bo);
+   if (ret) {
+      fprintf(stderr, "DRM_IOCTL_PANFROST_CREATE_BO failed: %m\n");
+      return NULL;
+   }
 
-        bo = pan_lookup_bo(dev, create_bo.handle);
-        assert(!memcmp(bo, &((struct panfrost_bo){}), sizeof(*bo)));
+   bo = pan_lookup_bo(dev, create_bo.handle);
+   assert(!memcmp(bo, &((struct panfrost_bo){}), sizeof(*bo)));
 
-        bo->size = create_bo.size;
-        bo->ptr.gpu = create_bo.offset;
-        bo->gem_handle = create_bo.handle;
-        bo->flags = flags;
-        bo->dev = dev;
-        bo->label = label;
-        return bo;
+   bo->size = create_bo.size;
+   bo->ptr.gpu = create_bo.offset;
+   bo->gem_handle = create_bo.handle;
+   bo->flags = flags;
+   bo->dev = dev;
+   bo->label = label;
+   return bo;
 }
 
 static void
 panfrost_bo_free(struct panfrost_bo *bo)
 {
-        struct drm_gem_close gem_close = { .handle = bo->gem_handle };
-        int ret;
+   struct drm_gem_close gem_close = {.handle = bo->gem_handle};
+   int ret;
 
-        ret = drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
-        if (ret) {
-                fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %m\n");
-                assert(0);
-        }
+   ret = drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
+   if (ret) {
+      fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %m\n");
+      assert(0);
+   }
 
-        /* BO will be freed with the sparse array, but zero to indicate free */
-        memset(bo, 0, sizeof(*bo));
+   /* BO will be freed with the sparse array, but zero to indicate free */
+   memset(bo, 0, sizeof(*bo));
 }
 
 /* Returns true if the BO is ready, false otherwise.
@@ -113,44 +113,44 @@ panfrost_bo_free(struct panfrost_bo *bo)
 bool
 panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns, bool wait_readers)
 {
-        struct drm_panfrost_wait_bo req = {
-                .handle = bo->gem_handle,
-		.timeout_ns = timeout_ns,
-        };
-        int ret;
+   struct drm_panfrost_wait_bo req = {
+      .handle = bo->gem_handle,
+      .timeout_ns = timeout_ns,
+   };
+   int ret;
 
-        /* If the BO has been exported or imported we can't rely on the cached
-         * state, we need to call the WAIT_BO ioctl.
-         */
-        if (!(bo->flags & PAN_BO_SHARED)) {
-                /* If ->gpu_access is 0, the BO is idle, no need to wait. */
-                if (!bo->gpu_access)
-                        return true;
+   /* If the BO has been exported or imported we can't rely on the cached
+    * state, we need to call the WAIT_BO ioctl.
+    */
+   if (!(bo->flags & PAN_BO_SHARED)) {
+      /* If ->gpu_access is 0, the BO is idle, no need to wait. */
+      if (!bo->gpu_access)
+         return true;
 
-                /* If the caller only wants to wait for writers and no
-                 * writes are pending, we don't have to wait.
-                 */
-                if (!wait_readers && !(bo->gpu_access & PAN_BO_ACCESS_WRITE))
-                        return true;
-        }
+      /* If the caller only wants to wait for writers and no
+       * writes are pending, we don't have to wait.
+       */
+      if (!wait_readers && !(bo->gpu_access & PAN_BO_ACCESS_WRITE))
+         return true;
+   }
 
-        /* The ioctl returns >= 0 value when the BO we are waiting for is ready
-         * -1 otherwise.
-         */
-        ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_WAIT_BO, &req);
-        if (ret != -1) {
-                /* Set gpu_access to 0 so that the next call to bo_wait()
-                 * doesn't have to call the WAIT_BO ioctl.
-                 */
-                bo->gpu_access = 0;
-                return true;
-        }
+   /* The ioctl returns >= 0 value when the BO we are waiting for is ready
+    * -1 otherwise.
+    */
+   ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_WAIT_BO, &req);
+   if (ret != -1) {
+      /* Set gpu_access to 0 so that the next call to bo_wait()
+       * doesn't have to call the WAIT_BO ioctl.
+       */
+      bo->gpu_access = 0;
+      return true;
+   }
 
-        /* If errno is not ETIMEDOUT or EBUSY that means the handle we passed
-         * is invalid, which shouldn't happen here.
-         */
-        assert(errno == ETIMEDOUT || errno == EBUSY);
-        return false;
+   /* If errno is not ETIMEDOUT or EBUSY that means the handle we passed
+    * is invalid, which shouldn't happen here.
+    */
+   assert(errno == ETIMEDOUT || errno == EBUSY);
+   return false;
 }
 
 /* Helper to calculate the bucket index of a BO */
@@ -158,24 +158,23 @@ panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns, bool wait_readers)
 static unsigned
 pan_bucket_index(unsigned size)
 {
-        /* Round down to POT to compute a bucket index */
+   /* Round down to POT to compute a bucket index */
 
-        unsigned bucket_index = util_logbase2(size);
+   unsigned bucket_index = util_logbase2(size);
 
-        /* Clamp the bucket index; all huge allocations will be
-         * sorted into the largest bucket */
+   /* Clamp the bucket index; all huge allocations will be
+    * sorted into the largest bucket */
 
-        bucket_index = CLAMP(bucket_index, MIN_BO_CACHE_BUCKET,
-                             MAX_BO_CACHE_BUCKET);
+   bucket_index = CLAMP(bucket_index, MIN_BO_CACHE_BUCKET, MAX_BO_CACHE_BUCKET);
 
-        /* Reindex from 0 */
-        return (bucket_index - MIN_BO_CACHE_BUCKET);
+   /* Reindex from 0 */
+   return (bucket_index - MIN_BO_CACHE_BUCKET);
 }
 
 static struct list_head *
 pan_bucket(struct panfrost_device *dev, unsigned size)
 {
-        return &dev->bo_cache.buckets[pan_bucket_index(size)];
+   return &dev->bo_cache.buckets[pan_bucket_index(size)];
 }
 
 /* Tries to fetch a BO of sufficient size with the appropriate flags from the
@@ -184,74 +183,71 @@ pan_bucket(struct panfrost_device *dev, unsigned size)
  * BO. */
 
 static struct panfrost_bo *
-panfrost_bo_cache_fetch(struct panfrost_device *dev,
-                        size_t size, uint32_t flags, const char *label,
-                        bool dontwait)
+panfrost_bo_cache_fetch(struct panfrost_device *dev, size_t size,
+                        uint32_t flags, const char *label, bool dontwait)
 {
-        pthread_mutex_lock(&dev->bo_cache.lock);
-        struct list_head *bucket = pan_bucket(dev, size);
-        struct panfrost_bo *bo = NULL;
+   pthread_mutex_lock(&dev->bo_cache.lock);
+   struct list_head *bucket = pan_bucket(dev, size);
+   struct panfrost_bo *bo = NULL;
 
-        /* Iterate the bucket looking for something suitable */
-        list_for_each_entry_safe(struct panfrost_bo, entry, bucket,
-                                 bucket_link) {
-                if (entry->size < size || entry->flags != flags)
-                        continue;
+   /* Iterate the bucket looking for something suitable */
+   list_for_each_entry_safe(struct panfrost_bo, entry, bucket, bucket_link) {
+      if (entry->size < size || entry->flags != flags)
+         continue;
 
-                /* If the oldest BO in the cache is busy, likely so is
-                 * everything newer, so bail. */
-                if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX,
-                                      PAN_BO_ACCESS_RW))
-                        break;
+      /* If the oldest BO in the cache is busy, likely so is
+       * everything newer, so bail. */
+      if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX, PAN_BO_ACCESS_RW))
+         break;
 
-                struct drm_panfrost_madvise madv = {
-                        .handle = entry->gem_handle,
-                        .madv = PANFROST_MADV_WILLNEED,
-                };
-                int ret;
+      struct drm_panfrost_madvise madv = {
+         .handle = entry->gem_handle,
+         .madv = PANFROST_MADV_WILLNEED,
+      };
+      int ret;
 
-                /* This one works, splice it out of the cache */
-                list_del(&entry->bucket_link);
-                list_del(&entry->lru_link);
+      /* This one works, splice it out of the cache */
+      list_del(&entry->bucket_link);
+      list_del(&entry->lru_link);
 
-                ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
-                if (!ret && !madv.retained) {
-                        panfrost_bo_free(entry);
-                        continue;
-                }
-                /* Let's go! */
-                bo = entry;
-                bo->label = label;
-                break;
-        }
-        pthread_mutex_unlock(&dev->bo_cache.lock);
+      ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
+      if (!ret && !madv.retained) {
+         panfrost_bo_free(entry);
+         continue;
+      }
+      /* Let's go! */
+      bo = entry;
+      bo->label = label;
+      break;
+   }
+   pthread_mutex_unlock(&dev->bo_cache.lock);
 
-        return bo;
+   return bo;
 }
 
 static void
 panfrost_bo_cache_evict_stale_bos(struct panfrost_device *dev)
 {
-        struct timespec time;
+   struct timespec time;
 
-        clock_gettime(CLOCK_MONOTONIC, &time);
-        list_for_each_entry_safe(struct panfrost_bo, entry,
-                                 &dev->bo_cache.lru, lru_link) {
-                /* We want all entries that have been used more than 1 sec
-                 * ago to be dropped, others can be kept.
-                 * Note the <= 2 check and not <= 1. It's here to account for
-                 * the fact that we're only testing ->tv_sec, not ->tv_nsec.
-                 * That means we might keep entries that are between 1 and 2
-                 * seconds old, but we don't really care, as long as unused BOs
-                 * are dropped at some point.
-                 */
-                if (time.tv_sec - entry->last_used <= 2)
-                        break;
+   clock_gettime(CLOCK_MONOTONIC, &time);
+   list_for_each_entry_safe(struct panfrost_bo, entry, &dev->bo_cache.lru,
+                            lru_link) {
+      /* We want all entries that have been used more than 1 sec
+       * ago to be dropped, others can be kept.
+       * Note the <= 2 check and not <= 1. It's here to account for
+       * the fact that we're only testing ->tv_sec, not ->tv_nsec.
+       * That means we might keep entries that are between 1 and 2
+       * seconds old, but we don't really care, as long as unused BOs
+       * are dropped at some point.
+       */
+      if (time.tv_sec - entry->last_used <= 2)
+         break;
 
-                list_del(&entry->bucket_link);
-                list_del(&entry->lru_link);
-                panfrost_bo_free(entry);
-        }
+      list_del(&entry->bucket_link);
+      list_del(&entry->lru_link);
+      panfrost_bo_free(entry);
+   }
 }
 
 /* Tries to add a BO to the cache. Returns if it was
@@ -260,43 +256,43 @@ panfrost_bo_cache_evict_stale_bos(struct panfrost_device *dev)
 static bool
 panfrost_bo_cache_put(struct panfrost_bo *bo)
 {
-        struct panfrost_device *dev = bo->dev;
+   struct panfrost_device *dev = bo->dev;
 
-        if (bo->flags & PAN_BO_SHARED || dev->debug & PAN_DBG_NO_CACHE)
-                return false;
+   if (bo->flags & PAN_BO_SHARED || dev->debug & PAN_DBG_NO_CACHE)
+      return false;
 
-        /* Must be first */
-        pthread_mutex_lock(&dev->bo_cache.lock);
+   /* Must be first */
+   pthread_mutex_lock(&dev->bo_cache.lock);
 
-        struct list_head *bucket = pan_bucket(dev, MAX2(bo->size, 4096));
-        struct drm_panfrost_madvise madv;
-        struct timespec time;
+   struct list_head *bucket = pan_bucket(dev, MAX2(bo->size, 4096));
+   struct drm_panfrost_madvise madv;
+   struct timespec time;
 
-        madv.handle = bo->gem_handle;
-        madv.madv = PANFROST_MADV_DONTNEED;
-	madv.retained = 0;
+   madv.handle = bo->gem_handle;
+   madv.madv = PANFROST_MADV_DONTNEED;
+   madv.retained = 0;
 
-        drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
+   drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
 
-        /* Add us to the bucket */
-        list_addtail(&bo->bucket_link, bucket);
+   /* Add us to the bucket */
+   list_addtail(&bo->bucket_link, bucket);
 
-        /* Add us to the LRU list and update the last_used field. */
-        list_addtail(&bo->lru_link, &dev->bo_cache.lru);
-        clock_gettime(CLOCK_MONOTONIC, &time);
-        bo->last_used = time.tv_sec;
+   /* Add us to the LRU list and update the last_used field. */
+   list_addtail(&bo->lru_link, &dev->bo_cache.lru);
+   clock_gettime(CLOCK_MONOTONIC, &time);
+   bo->last_used = time.tv_sec;
 
-        /* Let's do some cleanup in the BO cache while we hold the
-         * lock.
-         */
-        panfrost_bo_cache_evict_stale_bos(dev);
+   /* Let's do some cleanup in the BO cache while we hold the
+    * lock.
+    */
+   panfrost_bo_cache_evict_stale_bos(dev);
 
-        /* Update the label to help debug BO cache memory usage issues */
-        bo->label = "Unused (BO cache)";
+   /* Update the label to help debug BO cache memory usage issues */
+   bo->label = "Unused (BO cache)";
 
-        /* Must be last */
-        pthread_mutex_unlock(&dev->bo_cache.lock);
-        return true;
+   /* Must be last */
+   pthread_mutex_unlock(&dev->bo_cache.lock);
+   return true;
 }
 
 /* Evicts all BOs from the cache. Called during context
@@ -306,228 +302,226 @@ panfrost_bo_cache_put(struct panfrost_bo *bo)
  * OS) */
 
 void
-panfrost_bo_cache_evict_all(
-                struct panfrost_device *dev)
+panfrost_bo_cache_evict_all(struct panfrost_device *dev)
 {
-        pthread_mutex_lock(&dev->bo_cache.lock);
-        for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i) {
-                struct list_head *bucket = &dev->bo_cache.buckets[i];
+   pthread_mutex_lock(&dev->bo_cache.lock);
+   for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i) {
+      struct list_head *bucket = &dev->bo_cache.buckets[i];
 
-                list_for_each_entry_safe(struct panfrost_bo, entry, bucket,
-                                         bucket_link) {
-                        list_del(&entry->bucket_link);
-                        list_del(&entry->lru_link);
-                        panfrost_bo_free(entry);
-                }
-        }
-        pthread_mutex_unlock(&dev->bo_cache.lock);
+      list_for_each_entry_safe(struct panfrost_bo, entry, bucket, bucket_link) {
+         list_del(&entry->bucket_link);
+         list_del(&entry->lru_link);
+         panfrost_bo_free(entry);
+      }
+   }
+   pthread_mutex_unlock(&dev->bo_cache.lock);
 }
 
 void
 panfrost_bo_mmap(struct panfrost_bo *bo)
 {
-        struct drm_panfrost_mmap_bo mmap_bo = { .handle = bo->gem_handle };
-        int ret;
+   struct drm_panfrost_mmap_bo mmap_bo = {.handle = bo->gem_handle};
+   int ret;
 
-        if (bo->ptr.cpu)
-                return;
+   if (bo->ptr.cpu)
+      return;
 
-        ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo);
-        if (ret) {
-                fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %m\n");
-                assert(0);
-        }
+   ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo);
+   if (ret) {
+      fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %m\n");
+      assert(0);
+   }
 
-        bo->ptr.cpu = os_mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
-                              bo->dev->fd, mmap_bo.offset);
-        if (bo->ptr.cpu == MAP_FAILED) {
-                bo->ptr.cpu = NULL;
-                fprintf(stderr,
-                        "mmap failed: result=%p size=0x%llx fd=%i offset=0x%llx %m\n",
-                        bo->ptr.cpu, (long long)bo->size, bo->dev->fd,
-                        (long long)mmap_bo.offset);
-        }
+   bo->ptr.cpu = os_mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
+                         bo->dev->fd, mmap_bo.offset);
+   if (bo->ptr.cpu == MAP_FAILED) {
+      bo->ptr.cpu = NULL;
+      fprintf(stderr,
+              "mmap failed: result=%p size=0x%llx fd=%i offset=0x%llx %m\n",
+              bo->ptr.cpu, (long long)bo->size, bo->dev->fd,
+              (long long)mmap_bo.offset);
+   }
 }
 
 static void
 panfrost_bo_munmap(struct panfrost_bo *bo)
 {
-        if (!bo->ptr.cpu)
-                return;
+   if (!bo->ptr.cpu)
+      return;
 
-        if (os_munmap((void *) (uintptr_t)bo->ptr.cpu, bo->size)) {
-                perror("munmap");
-                abort();
-        }
+   if (os_munmap((void *)(uintptr_t)bo->ptr.cpu, bo->size)) {
+      perror("munmap");
+      abort();
+   }
 
-        bo->ptr.cpu = NULL;
+   bo->ptr.cpu = NULL;
 }
 
 struct panfrost_bo *
-panfrost_bo_create(struct panfrost_device *dev, size_t size,
-                   uint32_t flags, const char *label)
+panfrost_bo_create(struct panfrost_device *dev, size_t size, uint32_t flags,
+                   const char *label)
 {
-        struct panfrost_bo *bo;
+   struct panfrost_bo *bo;
 
-        /* Kernel will fail (confusingly) with EPERM otherwise */
-        assert(size > 0);
+   /* Kernel will fail (confusingly) with EPERM otherwise */
+   assert(size > 0);
 
-        /* To maximize BO cache usage, don't allocate tiny BOs */
-        size = ALIGN_POT(size, 4096);
+   /* To maximize BO cache usage, don't allocate tiny BOs */
+   size = ALIGN_POT(size, 4096);
 
-        /* GROWABLE BOs cannot be mmapped */
-        if (flags & PAN_BO_GROWABLE)
-                assert(flags & PAN_BO_INVISIBLE);
+   /* GROWABLE BOs cannot be mmapped */
+   if (flags & PAN_BO_GROWABLE)
+      assert(flags & PAN_BO_INVISIBLE);
 
-        /* Ideally, we get a BO that's ready in the cache, or allocate a fresh
-         * BO. If allocation fails, we can try waiting for something in the
-         * cache. But if there's no nothing suitable, we should flush the cache
-         * to make space for the new allocation.
-         */
-        bo = panfrost_bo_cache_fetch(dev, size, flags, label, true);
-        if (!bo)
-                bo = panfrost_bo_alloc(dev, size, flags, label);
-        if (!bo)
-                bo = panfrost_bo_cache_fetch(dev, size, flags, label, false);
-        if (!bo) {
-                panfrost_bo_cache_evict_all(dev);
-                bo = panfrost_bo_alloc(dev, size, flags, label);
-        }
+   /* Ideally, we get a BO that's ready in the cache, or allocate a fresh
+    * BO. If allocation fails, we can try waiting for something in the
+    * cache. But if there's no nothing suitable, we should flush the cache
+    * to make space for the new allocation.
+    */
+   bo = panfrost_bo_cache_fetch(dev, size, flags, label, true);
+   if (!bo)
+      bo = panfrost_bo_alloc(dev, size, flags, label);
+   if (!bo)
+      bo = panfrost_bo_cache_fetch(dev, size, flags, label, false);
+   if (!bo) {
+      panfrost_bo_cache_evict_all(dev);
+      bo = panfrost_bo_alloc(dev, size, flags, label);
+   }
 
-        if (!bo) {
-                unreachable("BO creation failed. We don't handle that yet.");
-                return NULL;
-        }
+   if (!bo) {
+      unreachable("BO creation failed. We don't handle that yet.");
+      return NULL;
+   }
 
-        /* Only mmap now if we know we need to. For CPU-invisible buffers, we
-         * never map since we don't care about their contents; they're purely
-         * for GPU-internal use. But we do trace them anyway. */
+   /* Only mmap now if we know we need to. For CPU-invisible buffers, we
+    * never map since we don't care about their contents; they're purely
+    * for GPU-internal use. But we do trace them anyway. */
 
-        if (!(flags & (PAN_BO_INVISIBLE | PAN_BO_DELAY_MMAP)))
-                panfrost_bo_mmap(bo);
+   if (!(flags & (PAN_BO_INVISIBLE | PAN_BO_DELAY_MMAP)))
+      panfrost_bo_mmap(bo);
 
-        p_atomic_set(&bo->refcnt, 1);
+   p_atomic_set(&bo->refcnt, 1);
 
-        if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {
-                if (flags & PAN_BO_INVISIBLE)
-                        pandecode_inject_mmap(bo->ptr.gpu, NULL, bo->size, NULL);
-                else if (!(flags & PAN_BO_DELAY_MMAP))
-                        pandecode_inject_mmap(bo->ptr.gpu, bo->ptr.cpu, bo->size, NULL);
-        }
+   if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {
+      if (flags & PAN_BO_INVISIBLE)
+         pandecode_inject_mmap(bo->ptr.gpu, NULL, bo->size, NULL);
+      else if (!(flags & PAN_BO_DELAY_MMAP))
+         pandecode_inject_mmap(bo->ptr.gpu, bo->ptr.cpu, bo->size, NULL);
+   }
 
-        return bo;
+   return bo;
 }
 
 void
 panfrost_bo_reference(struct panfrost_bo *bo)
 {
-        if (bo) {
-                ASSERTED int count = p_atomic_inc_return(&bo->refcnt);
-                assert(count != 1);
-        }
+   if (bo) {
+      ASSERTED int count = p_atomic_inc_return(&bo->refcnt);
+      assert(count != 1);
+   }
 }
 
 void
 panfrost_bo_unreference(struct panfrost_bo *bo)
 {
-        if (!bo)
-                return;
+   if (!bo)
+      return;
 
-        /* Don't return to cache if there are still references */
-        if (p_atomic_dec_return(&bo->refcnt))
-                return;
+   /* Don't return to cache if there are still references */
+   if (p_atomic_dec_return(&bo->refcnt))
+      return;
 
-        struct panfrost_device *dev = bo->dev;
+   struct panfrost_device *dev = bo->dev;
 
-        pthread_mutex_lock(&dev->bo_map_lock);
+   pthread_mutex_lock(&dev->bo_map_lock);
 
-        /* Someone might have imported this BO while we were waiting for the
-         * lock, let's make sure it's still not referenced before freeing it.
-         */
-        if (p_atomic_read(&bo->refcnt) == 0) {
-                /* When the reference count goes to zero, we need to cleanup */
-                panfrost_bo_munmap(bo);
+   /* Someone might have imported this BO while we were waiting for the
+    * lock, let's make sure it's still not referenced before freeing it.
+    */
+   if (p_atomic_read(&bo->refcnt) == 0) {
+      /* When the reference count goes to zero, we need to cleanup */
+      panfrost_bo_munmap(bo);
 
-                if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
-                        pandecode_inject_free(bo->ptr.gpu, bo->size);
+      if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
+         pandecode_inject_free(bo->ptr.gpu, bo->size);
 
-                /* Rather than freeing the BO now, we'll cache the BO for later
-                 * allocations if we're allowed to.
-                 */
-                if (!panfrost_bo_cache_put(bo))
-                        panfrost_bo_free(bo);
-
-        }
-        pthread_mutex_unlock(&dev->bo_map_lock);
+      /* Rather than freeing the BO now, we'll cache the BO for later
+       * allocations if we're allowed to.
+       */
+      if (!panfrost_bo_cache_put(bo))
+         panfrost_bo_free(bo);
+   }
+   pthread_mutex_unlock(&dev->bo_map_lock);
 }
 
 struct panfrost_bo *
 panfrost_bo_import(struct panfrost_device *dev, int fd)
 {
-        struct panfrost_bo *bo;
-        struct drm_panfrost_get_bo_offset get_bo_offset = {0,};
-        ASSERTED int ret;
-        unsigned gem_handle;
+   struct panfrost_bo *bo;
+   struct drm_panfrost_get_bo_offset get_bo_offset = {
+      0,
+   };
+   ASSERTED int ret;
+   unsigned gem_handle;
 
-        ret = drmPrimeFDToHandle(dev->fd, fd, &gem_handle);
-        assert(!ret);
+   ret = drmPrimeFDToHandle(dev->fd, fd, &gem_handle);
+   assert(!ret);
 
-        pthread_mutex_lock(&dev->bo_map_lock);
-        bo = pan_lookup_bo(dev, gem_handle);
+   pthread_mutex_lock(&dev->bo_map_lock);
+   bo = pan_lookup_bo(dev, gem_handle);
 
-        if (!bo->dev) {
-                get_bo_offset.handle = gem_handle;
-                ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_GET_BO_OFFSET, &get_bo_offset);
-                assert(!ret);
+   if (!bo->dev) {
+      get_bo_offset.handle = gem_handle;
+      ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_GET_BO_OFFSET, &get_bo_offset);
+      assert(!ret);
 
-                bo->dev = dev;
-                bo->ptr.gpu = (mali_ptr) get_bo_offset.offset;
-                bo->size = lseek(fd, 0, SEEK_END);
-                /* Sometimes this can fail and return -1. size of -1 is not
-                 * a nice thing for mmap to try mmap. Be more robust also
-                 * for zero sized maps and fail nicely too
-                 */
-                if ((bo->size == 0) || (bo->size == (size_t)-1)) {
-                        pthread_mutex_unlock(&dev->bo_map_lock);
-                        return NULL;
-                }
-                bo->flags = PAN_BO_SHARED;
-                bo->gem_handle = gem_handle;
-                p_atomic_set(&bo->refcnt, 1);
-        } else {
-                /* bo->refcnt == 0 can happen if the BO
-                 * was being released but panfrost_bo_import() acquired the
-                 * lock before panfrost_bo_unreference(). In that case, refcnt
-                 * is 0 and we can't use panfrost_bo_reference() directly, we
-                 * have to re-initialize the refcnt().
-                 * Note that panfrost_bo_unreference() checks
-                 * refcnt value just after acquiring the lock to
-                 * make sure the object is not freed if panfrost_bo_import()
-                 * acquired it in the meantime.
-                 */
-                if (p_atomic_read(&bo->refcnt) == 0)
-                        p_atomic_set(&bo->refcnt, 1);
-                else
-                        panfrost_bo_reference(bo);
-        }
-        pthread_mutex_unlock(&dev->bo_map_lock);
+      bo->dev = dev;
+      bo->ptr.gpu = (mali_ptr)get_bo_offset.offset;
+      bo->size = lseek(fd, 0, SEEK_END);
+      /* Sometimes this can fail and return -1. size of -1 is not
+       * a nice thing for mmap to try mmap. Be more robust also
+       * for zero sized maps and fail nicely too
+       */
+      if ((bo->size == 0) || (bo->size == (size_t)-1)) {
+         pthread_mutex_unlock(&dev->bo_map_lock);
+         return NULL;
+      }
+      bo->flags = PAN_BO_SHARED;
+      bo->gem_handle = gem_handle;
+      p_atomic_set(&bo->refcnt, 1);
+   } else {
+      /* bo->refcnt == 0 can happen if the BO
+       * was being released but panfrost_bo_import() acquired the
+       * lock before panfrost_bo_unreference(). In that case, refcnt
+       * is 0 and we can't use panfrost_bo_reference() directly, we
+       * have to re-initialize the refcnt().
+       * Note that panfrost_bo_unreference() checks
+       * refcnt value just after acquiring the lock to
+       * make sure the object is not freed if panfrost_bo_import()
+       * acquired it in the meantime.
+       */
+      if (p_atomic_read(&bo->refcnt) == 0)
+         p_atomic_set(&bo->refcnt, 1);
+      else
+         panfrost_bo_reference(bo);
+   }
+   pthread_mutex_unlock(&dev->bo_map_lock);
 
-        return bo;
+   return bo;
 }
 
 int
 panfrost_bo_export(struct panfrost_bo *bo)
 {
-        struct drm_prime_handle args = {
-                .handle = bo->gem_handle,
-                .flags = DRM_CLOEXEC,
-        };
+   struct drm_prime_handle args = {
+      .handle = bo->gem_handle,
+      .flags = DRM_CLOEXEC,
+   };
 
-        int ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
-        if (ret == -1)
-                return -1;
+   int ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
+   if (ret == -1)
+      return -1;
 
-        bo->flags |= PAN_BO_SHARED;
-        return args.fd;
+   bo->flags |= PAN_BO_SHARED;
+   return args.fd;
 }
-
diff --git a/src/panfrost/lib/pan_bo.h b/src/panfrost/lib/pan_bo.h
index 7d19fba9dfc..4742fec5bd1 100644
--- a/src/panfrost/lib/pan_bo.h
+++ b/src/panfrost/lib/pan_bo.h
@@ -26,113 +26,106 @@
 #ifndef __PAN_BO_H__
 #define __PAN_BO_H__
 
+#include <time.h>
 #include "util/list.h"
 #include "panfrost-job.h"
-#include <time.h>
 
 /* Flags for allocated memory */
 
 /* This memory region is executable */
-#define PAN_BO_EXECUTE            (1 << 0)
+#define PAN_BO_EXECUTE (1 << 0)
 
 /* This memory region should be lazily allocated and grow-on-page-fault. Must
  * be used in conjunction with INVISIBLE */
-#define PAN_BO_GROWABLE           (1 << 1)
+#define PAN_BO_GROWABLE (1 << 1)
 
 /* This memory region should not be mapped to the CPU */
-#define PAN_BO_INVISIBLE          (1 << 2)
+#define PAN_BO_INVISIBLE (1 << 2)
 
 /* This region may not be used immediately and will not mmap on allocate
  * (semantically distinct from INVISIBLE, which cannot never be mmaped) */
-#define PAN_BO_DELAY_MMAP         (1 << 3)
+#define PAN_BO_DELAY_MMAP (1 << 3)
 
 /* BO is shared across processes (imported or exported) and therefore cannot be
  * cached locally */
-#define PAN_BO_SHARED             (1 << 4)
+#define PAN_BO_SHARED (1 << 4)
 
 /* GPU access flags */
 
 /* BO is either shared (can be accessed by more than one GPU batch) or private
  * (reserved by a specific GPU job). */
-#define PAN_BO_ACCESS_PRIVATE         (0 << 0)
-#define PAN_BO_ACCESS_SHARED          (1 << 0)
+#define PAN_BO_ACCESS_PRIVATE (0 << 0)
+#define PAN_BO_ACCESS_SHARED  (1 << 0)
 
 /* BO is being read/written by the GPU */
-#define PAN_BO_ACCESS_READ            (1 << 1)
-#define PAN_BO_ACCESS_WRITE           (1 << 2)
-#define PAN_BO_ACCESS_RW              (PAN_BO_ACCESS_READ | PAN_BO_ACCESS_WRITE)
+#define PAN_BO_ACCESS_READ  (1 << 1)
+#define PAN_BO_ACCESS_WRITE (1 << 2)
+#define PAN_BO_ACCESS_RW    (PAN_BO_ACCESS_READ | PAN_BO_ACCESS_WRITE)
 
 /* BO is accessed by the vertex/tiler job. */
-#define PAN_BO_ACCESS_VERTEX_TILER    (1 << 3)
+#define PAN_BO_ACCESS_VERTEX_TILER (1 << 3)
 
 /* BO is accessed by the fragment job. */
-#define PAN_BO_ACCESS_FRAGMENT        (1 << 4)
+#define PAN_BO_ACCESS_FRAGMENT (1 << 4)
 
 typedef uint8_t pan_bo_access;
 
 struct panfrost_device;
 
 struct panfrost_ptr {
-        /* CPU address */
-        void *cpu;
+   /* CPU address */
+   void *cpu;
 
-        /* GPU address */
-        mali_ptr gpu;
+   /* GPU address */
+   mali_ptr gpu;
 };
 
 struct panfrost_bo {
-        /* Must be first for casting */
-        struct list_head bucket_link;
+   /* Must be first for casting */
+   struct list_head bucket_link;
 
-        /* Used to link the BO to the BO cache LRU list. */
-        struct list_head lru_link;
+   /* Used to link the BO to the BO cache LRU list. */
+   struct list_head lru_link;
 
-        /* Store the time this BO was use last, so the BO cache logic can evict
-         * stale BOs.
-         */
-        time_t last_used;
+   /* Store the time this BO was use last, so the BO cache logic can evict
+    * stale BOs.
+    */
+   time_t last_used;
 
-        /* Atomic reference count */
-        int32_t refcnt;
+   /* Atomic reference count */
+   int32_t refcnt;
 
-        struct panfrost_device *dev;
+   struct panfrost_device *dev;
 
-        /* Mapping for the entire object (all levels) */
-        struct panfrost_ptr ptr;
+   /* Mapping for the entire object (all levels) */
+   struct panfrost_ptr ptr;
 
-        /* Size of all entire trees */
-        size_t size;
+   /* Size of all entire trees */
+   size_t size;
 
-        int gem_handle;
+   int gem_handle;
 
-        uint32_t flags;
+   uint32_t flags;
 
-        /* Combination of PAN_BO_ACCESS_{READ,WRITE} flags encoding pending
-         * GPU accesses to this BO. Useful to avoid calling the WAIT_BO ioctl
-         * when the BO is idle.
-         */
-        uint32_t gpu_access;
+   /* Combination of PAN_BO_ACCESS_{READ,WRITE} flags encoding pending
+    * GPU accesses to this BO. Useful to avoid calling the WAIT_BO ioctl
+    * when the BO is idle.
+    */
+   uint32_t gpu_access;
 
-        /* Human readable description of the BO for debugging. */
-        const char *label;
+   /* Human readable description of the BO for debugging. */
+   const char *label;
 };
 
-bool
-panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns, bool wait_readers);
-void
-panfrost_bo_reference(struct panfrost_bo *bo);
-void
-panfrost_bo_unreference(struct panfrost_bo *bo);
-struct panfrost_bo *
-panfrost_bo_create(struct panfrost_device *dev, size_t size,
-                   uint32_t flags, const char *label);
-void
-panfrost_bo_mmap(struct panfrost_bo *bo);
-struct panfrost_bo *
-panfrost_bo_import(struct panfrost_device *dev, int fd);
-int
-panfrost_bo_export(struct panfrost_bo *bo);
-void
-panfrost_bo_cache_evict_all(struct panfrost_device *dev);
+bool panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns,
+                      bool wait_readers);
+void panfrost_bo_reference(struct panfrost_bo *bo);
+void panfrost_bo_unreference(struct panfrost_bo *bo);
+struct panfrost_bo *panfrost_bo_create(struct panfrost_device *dev, size_t size,
+                                       uint32_t flags, const char *label);
+void panfrost_bo_mmap(struct panfrost_bo *bo);
+struct panfrost_bo *panfrost_bo_import(struct panfrost_device *dev, int fd);
+int panfrost_bo_export(struct panfrost_bo *bo);
+void panfrost_bo_cache_evict_all(struct panfrost_device *dev);
 
 #endif /* __PAN_BO_H__ */
diff --git a/src/panfrost/lib/pan_clear.c b/src/panfrost/lib/pan_clear.c
index 4b7a302cf09..b1a8533a8ee 100644
--- a/src/panfrost/lib/pan_clear.c
+++ b/src/panfrost/lib/pan_clear.c
@@ -26,11 +26,11 @@
 #include "genxml/gen_macros.h"
 
 #include <string.h>
-#include "pan_util.h"
-#include "pan_format.h"
 #include "gallium/auxiliary/util/u_pack_color.h"
-#include "util/rounding.h"
 #include "util/format_srgb.h"
+#include "util/rounding.h"
+#include "pan_format.h"
+#include "pan_util.h"
 
 /* Clear colours are packed as the internal format of the tilebuffer, looked up
  * in the blendable formats table given the render target format.
@@ -49,8 +49,8 @@
 static void
 pan_pack_color_32(uint32_t *packed, uint32_t v)
 {
-        for (unsigned i = 0; i < 4; ++i)
-                packed[i] = v;
+   for (unsigned i = 0; i < 4; ++i)
+      packed[i] = v;
 }
 
 /* For m integer bits and n fractional bits, calculate the conversion factor,
@@ -61,22 +61,22 @@ pan_pack_color_32(uint32_t *packed, uint32_t v)
 static inline uint32_t
 float_to_fixed(float f, unsigned bits_int, unsigned bits_frac, bool dither)
 {
-        uint32_t m = (1 << bits_int) - 1;
+   uint32_t m = (1 << bits_int) - 1;
 
-        if (dither) {
-                float factor = m << bits_frac;
-                return _mesa_roundevenf(f * factor);
-        } else {
-                uint32_t v = _mesa_roundevenf(f * (float) m);
-                return v << bits_frac;
-        }
+   if (dither) {
+      float factor = m << bits_frac;
+      return _mesa_roundevenf(f * factor);
+   } else {
+      uint32_t v = _mesa_roundevenf(f * (float)m);
+      return v << bits_frac;
+   }
 }
 
 struct mali_tib_layout {
-        unsigned int_r, frac_r;
-        unsigned int_g, frac_g;
-        unsigned int_b, frac_b;
-        unsigned int_a, frac_a;
+   unsigned int_r, frac_r;
+   unsigned int_g, frac_g;
+   unsigned int_b, frac_b;
+   unsigned int_a, frac_a;
 };
 
 /* clang-format off */
@@ -93,76 +93,77 @@ static const struct mali_tib_layout tib_layouts[] = {
 /* Raw values are stored as-is but replicated for multisampling */
 
 static void
-pan_pack_raw(uint32_t *packed, const union pipe_color_union *color, enum pipe_format format)
+pan_pack_raw(uint32_t *packed, const union pipe_color_union *color,
+             enum pipe_format format)
 {
-        union util_color out = { 0 };
-        unsigned size = util_format_get_blocksize(format);
-        assert(size <= 16);
+   union util_color out = {0};
+   unsigned size = util_format_get_blocksize(format);
+   assert(size <= 16);
 
-        util_pack_color(color->f, format, &out);
+   util_pack_color(color->f, format, &out);
 
-        if (size == 1) {
-                unsigned s = out.ui[0] | (out.ui[0] << 8);
-                pan_pack_color_32(packed, s | (s << 16));
-        } else if (size == 2)
-                pan_pack_color_32(packed, out.ui[0] | (out.ui[0] << 16));
-        else if (size <= 4)
-                pan_pack_color_32(packed, out.ui[0]);
-        else if (size <= 8) {
-                memcpy(packed + 0, out.ui, 8);
-                memcpy(packed + 2, out.ui, 8);
-        } else {
-                memcpy(packed, out.ui, 16);
-        }
+   if (size == 1) {
+      unsigned s = out.ui[0] | (out.ui[0] << 8);
+      pan_pack_color_32(packed, s | (s << 16));
+   } else if (size == 2)
+      pan_pack_color_32(packed, out.ui[0] | (out.ui[0] << 16));
+   else if (size <= 4)
+      pan_pack_color_32(packed, out.ui[0]);
+   else if (size <= 8) {
+      memcpy(packed + 0, out.ui, 8);
+      memcpy(packed + 2, out.ui, 8);
+   } else {
+      memcpy(packed, out.ui, 16);
+   }
 }
 
 void
 pan_pack_color(uint32_t *packed, const union pipe_color_union *color,
                enum pipe_format format, bool dithered)
 {
-        /* Set of blendable formats is common across versions. TODO: v9 */
-        enum mali_color_buffer_internal_format internal =
-                panfrost_blendable_formats_v7[format].internal;
+   /* Set of blendable formats is common across versions. TODO: v9 */
+   enum mali_color_buffer_internal_format internal =
+      panfrost_blendable_formats_v7[format].internal;
 
-        if (internal == MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW_VALUE) {
-                pan_pack_raw(packed, color, format);
-                return;
-        }
+   if (internal == MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW_VALUE) {
+      pan_pack_raw(packed, color, format);
+      return;
+   }
 
-        /* Saturate to [0, 1] by definition of UNORM. Prevents overflow. */
-        float r = SATURATE(color->f[0]);
-        float g = SATURATE(color->f[1]);
-        float b = SATURATE(color->f[2]);
-        float a = SATURATE(color->f[3]);
+   /* Saturate to [0, 1] by definition of UNORM. Prevents overflow. */
+   float r = SATURATE(color->f[0]);
+   float g = SATURATE(color->f[1]);
+   float b = SATURATE(color->f[2]);
+   float a = SATURATE(color->f[3]);
 
-        /* Fill in alpha = 1.0 by default */
-        if (!util_format_has_alpha(format))
-                a = 1.0;
+   /* Fill in alpha = 1.0 by default */
+   if (!util_format_has_alpha(format))
+      a = 1.0;
 
-        /* Convert colourspace while we still have floats */
-        if (util_format_is_srgb(format)) {
-                r = util_format_linear_to_srgb_float(r);
-                g = util_format_linear_to_srgb_float(g);
-                b = util_format_linear_to_srgb_float(b);
-        }
+   /* Convert colourspace while we still have floats */
+   if (util_format_is_srgb(format)) {
+      r = util_format_linear_to_srgb_float(r);
+      g = util_format_linear_to_srgb_float(g);
+      b = util_format_linear_to_srgb_float(b);
+   }
 
-        /* Look up the layout of the tilebuffer */
-        assert(internal < ARRAY_SIZE(tib_layouts));
-        struct mali_tib_layout l = tib_layouts[internal];
+   /* Look up the layout of the tilebuffer */
+   assert(internal < ARRAY_SIZE(tib_layouts));
+   struct mali_tib_layout l = tib_layouts[internal];
 
-        unsigned count_r = l.int_r + l.frac_r;
-        unsigned count_g = l.int_g + l.frac_g + count_r;
-        unsigned count_b = l.int_b + l.frac_b + count_g;
-        ASSERTED unsigned count_a = l.int_a + l.frac_a + count_b;
+   unsigned count_r = l.int_r + l.frac_r;
+   unsigned count_g = l.int_g + l.frac_g + count_r;
+   unsigned count_b = l.int_b + l.frac_b + count_g;
+   ASSERTED unsigned count_a = l.int_a + l.frac_a + count_b;
 
-        /* Must fill the word */
-        assert(count_a == 32);
+   /* Must fill the word */
+   assert(count_a == 32);
 
-        /* Convert the transformed float colour to the given layout */
-        uint32_t ur = float_to_fixed(r, l.int_r, l.frac_r, dithered) << 0;
-        uint32_t ug = float_to_fixed(g, l.int_g, l.frac_g, dithered) << count_r;
-        uint32_t ub = float_to_fixed(b, l.int_b, l.frac_b, dithered) << count_g;
-        uint32_t ua = float_to_fixed(a, l.int_a, l.frac_a, dithered) << count_b;
+   /* Convert the transformed float colour to the given layout */
+   uint32_t ur = float_to_fixed(r, l.int_r, l.frac_r, dithered) << 0;
+   uint32_t ug = float_to_fixed(g, l.int_g, l.frac_g, dithered) << count_r;
+   uint32_t ub = float_to_fixed(b, l.int_b, l.frac_b, dithered) << count_g;
+   uint32_t ua = float_to_fixed(a, l.int_a, l.frac_a, dithered) << count_b;
 
-        pan_pack_color_32(packed, ur | ug | ub | ua);
+   pan_pack_color_32(packed, ur | ug | ub | ua);
 }
diff --git a/src/panfrost/lib/pan_cs.c b/src/panfrost/lib/pan_cs.c
index 87587b2c931..45b578dc981 100644
--- a/src/panfrost/lib/pan_cs.c
+++ b/src/panfrost/lib/pan_cs.c
@@ -27,7 +27,6 @@
 
 #include "util/macros.h"
 
-
 #include "pan_cs.h"
 #include "pan_encoder.h"
 #include "pan_texture.h"
@@ -35,270 +34,285 @@
 static unsigned
 mod_to_block_fmt(uint64_t mod)
 {
-        switch (mod) {
-        case DRM_FORMAT_MOD_LINEAR:
-                return MALI_BLOCK_FORMAT_LINEAR;
-	case DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED:
-                return MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
-        default:
+   switch (mod) {
+   case DRM_FORMAT_MOD_LINEAR:
+      return MALI_BLOCK_FORMAT_LINEAR;
+   case DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED:
+      return MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
+   default:
 #if PAN_ARCH >= 5
-                if (drm_is_afbc(mod) && !(mod & AFBC_FORMAT_MOD_TILED))
-                        return MALI_BLOCK_FORMAT_AFBC;
+      if (drm_is_afbc(mod) && !(mod & AFBC_FORMAT_MOD_TILED))
+         return MALI_BLOCK_FORMAT_AFBC;
 #endif
 #if PAN_ARCH >= 7
-                if (drm_is_afbc(mod) && (mod & AFBC_FORMAT_MOD_TILED))
-                        return MALI_BLOCK_FORMAT_AFBC_TILED;
+      if (drm_is_afbc(mod) && (mod & AFBC_FORMAT_MOD_TILED))
+         return MALI_BLOCK_FORMAT_AFBC_TILED;
 #endif
 
-                unreachable("Unsupported modifer");
-        }
+      unreachable("Unsupported modifer");
+   }
 }
 
 static enum mali_msaa
 mali_sampling_mode(const struct pan_image_view *view)
 {
-        if (view->image->layout.nr_samples > 1) {
-                assert(view->nr_samples == view->image->layout.nr_samples);
-                assert(view->image->layout.slices[0].surface_stride != 0);
-                return MALI_MSAA_LAYERED;
-        }
+   if (view->image->layout.nr_samples > 1) {
+      assert(view->nr_samples == view->image->layout.nr_samples);
+      assert(view->image->layout.slices[0].surface_stride != 0);
+      return MALI_MSAA_LAYERED;
+   }
 
-        if (view->nr_samples > view->image->layout.nr_samples) {
-                assert(view->image->layout.nr_samples == 1);
-                return MALI_MSAA_AVERAGE;
-        }
+   if (view->nr_samples > view->image->layout.nr_samples) {
+      assert(view->image->layout.nr_samples == 1);
+      return MALI_MSAA_AVERAGE;
+   }
 
-        assert(view->nr_samples == view->image->layout.nr_samples);
-        assert(view->nr_samples == 1);
+   assert(view->nr_samples == view->image->layout.nr_samples);
+   assert(view->nr_samples == 1);
 
-        return MALI_MSAA_SINGLE;
+   return MALI_MSAA_SINGLE;
 }
 
 #if PAN_ARCH >= 5
 static inline enum mali_sample_pattern
 pan_sample_pattern(unsigned samples)
 {
-        switch (samples) {
-        case 1:  return MALI_SAMPLE_PATTERN_SINGLE_SAMPLED;
-        case 4:  return MALI_SAMPLE_PATTERN_ROTATED_4X_GRID;
-        case 8:  return MALI_SAMPLE_PATTERN_D3D_8X_GRID;
-        case 16: return MALI_SAMPLE_PATTERN_D3D_16X_GRID;
-        default: unreachable("Unsupported sample count");
-        }
+   switch (samples) {
+   case 1:
+      return MALI_SAMPLE_PATTERN_SINGLE_SAMPLED;
+   case 4:
+      return MALI_SAMPLE_PATTERN_ROTATED_4X_GRID;
+   case 8:
+      return MALI_SAMPLE_PATTERN_D3D_8X_GRID;
+   case 16:
+      return MALI_SAMPLE_PATTERN_D3D_16X_GRID;
+   default:
+      unreachable("Unsupported sample count");
+   }
 }
 #endif
 
 int
 GENX(pan_select_crc_rt)(const struct pan_fb_info *fb, unsigned tile_size)
 {
-        /* Disable CRC when the tile size is not 16x16. In the hardware, CRC
-         * tiles are the same size as the tiles of the framebuffer. However,
-         * our code only handles 16x16 tiles. Therefore under the current
-         * implementation, we must disable CRC when 16x16 tiles are not used.
-         *
-         * This may hurt performance. However, smaller tile sizes are rare, and
-         * CRCs are more expensive at smaller tile sizes, reducing the benefit.
-         * Restricting CRC to 16x16 should work in practice.
-         */
-        if (tile_size != 16 * 16) {
-                assert(tile_size < 16 * 16);
-                return -1;
-        }
+   /* Disable CRC when the tile size is not 16x16. In the hardware, CRC
+    * tiles are the same size as the tiles of the framebuffer. However,
+    * our code only handles 16x16 tiles. Therefore under the current
+    * implementation, we must disable CRC when 16x16 tiles are not used.
+    *
+    * This may hurt performance. However, smaller tile sizes are rare, and
+    * CRCs are more expensive at smaller tile sizes, reducing the benefit.
+    * Restricting CRC to 16x16 should work in practice.
+    */
+   if (tile_size != 16 * 16) {
+      assert(tile_size < 16 * 16);
+      return -1;
+   }
 
 #if PAN_ARCH <= 6
-        if (fb->rt_count == 1 && fb->rts[0].view && !fb->rts[0].discard &&
-            fb->rts[0].view->image->layout.crc)
-                return 0;
+   if (fb->rt_count == 1 && fb->rts[0].view && !fb->rts[0].discard &&
+       fb->rts[0].view->image->layout.crc)
+      return 0;
 
-        return -1;
+   return -1;
 #else
-        bool best_rt_valid = false;
-        int best_rt = -1;
+   bool best_rt_valid = false;
+   int best_rt = -1;
 
-        for (unsigned i = 0; i < fb->rt_count; i++) {
-		if (!fb->rts[i].view || fb->rts[0].discard ||
-                    !fb->rts[i].view->image->layout.crc)
-                        continue;
+   for (unsigned i = 0; i < fb->rt_count; i++) {
+      if (!fb->rts[i].view || fb->rts[0].discard ||
+          !fb->rts[i].view->image->layout.crc)
+         continue;
 
-                bool valid = *(fb->rts[i].crc_valid);
-                bool full = !fb->extent.minx && !fb->extent.miny &&
-                            fb->extent.maxx == (fb->width - 1) &&
-                            fb->extent.maxy == (fb->height - 1);
-                if (!full && !valid)
-                        continue;
+      bool valid = *(fb->rts[i].crc_valid);
+      bool full = !fb->extent.minx && !fb->extent.miny &&
+                  fb->extent.maxx == (fb->width - 1) &&
+                  fb->extent.maxy == (fb->height - 1);
+      if (!full && !valid)
+         continue;
 
-                if (best_rt < 0 || (valid && !best_rt_valid)) {
-                        best_rt = i;
-                        best_rt_valid = valid;
-                }
+      if (best_rt < 0 || (valid && !best_rt_valid)) {
+         best_rt = i;
+         best_rt_valid = valid;
+      }
 
-                if (valid)
-                        break;
-        }
+      if (valid)
+         break;
+   }
 
-        return best_rt;
+   return best_rt;
 #endif
 }
 
 static enum mali_zs_format
 translate_zs_format(enum pipe_format in)
 {
-        switch (in) {
-        case PIPE_FORMAT_Z16_UNORM: return MALI_ZS_FORMAT_D16;
-        case PIPE_FORMAT_Z24_UNORM_S8_UINT: return MALI_ZS_FORMAT_D24S8;
-        case PIPE_FORMAT_Z24X8_UNORM: return MALI_ZS_FORMAT_D24X8;
-        case PIPE_FORMAT_Z32_FLOAT: return MALI_ZS_FORMAT_D32;
+   switch (in) {
+   case PIPE_FORMAT_Z16_UNORM:
+      return MALI_ZS_FORMAT_D16;
+   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+      return MALI_ZS_FORMAT_D24S8;
+   case PIPE_FORMAT_Z24X8_UNORM:
+      return MALI_ZS_FORMAT_D24X8;
+   case PIPE_FORMAT_Z32_FLOAT:
+      return MALI_ZS_FORMAT_D32;
 #if PAN_ARCH <= 7
-        case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: return MALI_ZS_FORMAT_D32_S8X24;
+   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+      return MALI_ZS_FORMAT_D32_S8X24;
 #endif
-        default: unreachable("Unsupported depth/stencil format.");
-        }
+   default:
+      unreachable("Unsupported depth/stencil format.");
+   }
 }
 
 #if PAN_ARCH >= 5
 static enum mali_s_format
 translate_s_format(enum pipe_format in)
 {
-        switch (in) {
-        case PIPE_FORMAT_S8_UINT: return MALI_S_FORMAT_S8;
-        case PIPE_FORMAT_Z24_UNORM_S8_UINT:
-        case PIPE_FORMAT_X24S8_UINT:
-                return MALI_S_FORMAT_X24S8;
+   switch (in) {
+   case PIPE_FORMAT_S8_UINT:
+      return MALI_S_FORMAT_S8;
+   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+   case PIPE_FORMAT_X24S8_UINT:
+      return MALI_S_FORMAT_X24S8;
 
 #if PAN_ARCH <= 7
-        case PIPE_FORMAT_S8_UINT_Z24_UNORM:
-        case PIPE_FORMAT_S8X24_UINT:
-                return MALI_S_FORMAT_S8X24;
-        case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
-                return MALI_S_FORMAT_X32_S8X24;
+   case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+   case PIPE_FORMAT_S8X24_UINT:
+      return MALI_S_FORMAT_S8X24;
+   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+      return MALI_S_FORMAT_X32_S8X24;
 #endif
 
-        default:
-                unreachable("Unsupported stencil format.");
-        }
+   default:
+      unreachable("Unsupported stencil format.");
+   }
 }
 
 static void
-pan_prepare_s(const struct pan_fb_info *fb,
-              struct MALI_ZS_CRC_EXTENSION *ext)
+pan_prepare_s(const struct pan_fb_info *fb, struct MALI_ZS_CRC_EXTENSION *ext)
 {
-        const struct pan_image_view *s = fb->zs.view.s;
+   const struct pan_image_view *s = fb->zs.view.s;
 
-        if (!s)
-                return;
+   if (!s)
+      return;
 
-        unsigned level = s->first_level;
+   unsigned level = s->first_level;
 
-        ext->s_msaa = mali_sampling_mode(s);
+   ext->s_msaa = mali_sampling_mode(s);
 
-        struct pan_surface surf;
-        pan_iview_get_surface(s, 0, 0, 0, &surf);
+   struct pan_surface surf;
+   pan_iview_get_surface(s, 0, 0, 0, &surf);
 
-        assert(s->image->layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED ||
-               s->image->layout.modifier == DRM_FORMAT_MOD_LINEAR);
-        ext->s_writeback_base = surf.data;
-        ext->s_writeback_row_stride = s->image->layout.slices[level].row_stride;
-        ext->s_writeback_surface_stride =
-                (s->image->layout.nr_samples > 1) ?
-                s->image->layout.slices[level].surface_stride : 0;
-        ext->s_block_format = mod_to_block_fmt(s->image->layout.modifier);
-        ext->s_write_format = translate_s_format(s->format);
+   assert(s->image->layout.modifier ==
+             DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED ||
+          s->image->layout.modifier == DRM_FORMAT_MOD_LINEAR);
+   ext->s_writeback_base = surf.data;
+   ext->s_writeback_row_stride = s->image->layout.slices[level].row_stride;
+   ext->s_writeback_surface_stride =
+      (s->image->layout.nr_samples > 1)
+         ? s->image->layout.slices[level].surface_stride
+         : 0;
+   ext->s_block_format = mod_to_block_fmt(s->image->layout.modifier);
+   ext->s_write_format = translate_s_format(s->format);
 }
 
 static void
-pan_prepare_zs(const struct pan_fb_info *fb,
-               struct MALI_ZS_CRC_EXTENSION *ext)
+pan_prepare_zs(const struct pan_fb_info *fb, struct MALI_ZS_CRC_EXTENSION *ext)
 {
-        const struct pan_image_view *zs = fb->zs.view.zs;
+   const struct pan_image_view *zs = fb->zs.view.zs;
 
-        if (!zs)
-                return;
+   if (!zs)
+      return;
 
-        unsigned level = zs->first_level;
+   unsigned level = zs->first_level;
 
-        ext->zs_msaa = mali_sampling_mode(zs);
+   ext->zs_msaa = mali_sampling_mode(zs);
 
-        struct pan_surface surf;
-        pan_iview_get_surface(zs, 0, 0, 0, &surf);
-        UNUSED const struct pan_image_slice_layout *slice = &zs->image->layout.slices[level];
+   struct pan_surface surf;
+   pan_iview_get_surface(zs, 0, 0, 0, &surf);
+   UNUSED const struct pan_image_slice_layout *slice =
+      &zs->image->layout.slices[level];
 
-        if (drm_is_afbc(zs->image->layout.modifier)) {
+   if (drm_is_afbc(zs->image->layout.modifier)) {
 #if PAN_ARCH >= 9
-                ext->zs_writeback_base = surf.afbc.header;
-                ext->zs_writeback_row_stride = slice->row_stride;
-                /* TODO: surface stride? */
-                ext->zs_afbc_body_offset = surf.afbc.body - surf.afbc.header;
+      ext->zs_writeback_base = surf.afbc.header;
+      ext->zs_writeback_row_stride = slice->row_stride;
+      /* TODO: surface stride? */
+      ext->zs_afbc_body_offset = surf.afbc.body - surf.afbc.header;
 
-                /* TODO: stencil AFBC? */
+      /* TODO: stencil AFBC? */
 #else
 #if PAN_ARCH >= 6
-                ext->zs_afbc_row_stride = pan_afbc_stride_blocks(zs->image->layout.modifier, slice->row_stride);
+      ext->zs_afbc_row_stride =
+         pan_afbc_stride_blocks(zs->image->layout.modifier, slice->row_stride);
 #else
-                ext->zs_block_format = MALI_BLOCK_FORMAT_AFBC;
-                ext->zs_afbc_body_size = 0x1000;
-                ext->zs_afbc_chunk_size = 9;
-                ext->zs_afbc_sparse = true;
+      ext->zs_block_format = MALI_BLOCK_FORMAT_AFBC;
+      ext->zs_afbc_body_size = 0x1000;
+      ext->zs_afbc_chunk_size = 9;
+      ext->zs_afbc_sparse = true;
 #endif
 
-                ext->zs_afbc_header = surf.afbc.header;
-                ext->zs_afbc_body = surf.afbc.body;
+      ext->zs_afbc_header = surf.afbc.header;
+      ext->zs_afbc_body = surf.afbc.body;
 #endif
-        } else {
-                assert(zs->image->layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED ||
-                       zs->image->layout.modifier == DRM_FORMAT_MOD_LINEAR);
+   } else {
+      assert(zs->image->layout.modifier ==
+                DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED ||
+             zs->image->layout.modifier == DRM_FORMAT_MOD_LINEAR);
 
-                /* TODO: Z32F(S8) support, which is always linear */
+      /* TODO: Z32F(S8) support, which is always linear */
 
-                ext->zs_writeback_base = surf.data;
-                ext->zs_writeback_row_stride =
-                        zs->image->layout.slices[level].row_stride;
-                ext->zs_writeback_surface_stride =
-                        (zs->image->layout.nr_samples > 1) ?
-                        zs->image->layout.slices[level].surface_stride : 0;
-        }
+      ext->zs_writeback_base = surf.data;
+      ext->zs_writeback_row_stride = zs->image->layout.slices[level].row_stride;
+      ext->zs_writeback_surface_stride =
+         (zs->image->layout.nr_samples > 1)
+            ? zs->image->layout.slices[level].surface_stride
+            : 0;
+   }
 
-        ext->zs_block_format = mod_to_block_fmt(zs->image->layout.modifier);
-        ext->zs_write_format = translate_zs_format(zs->format);
-        if (ext->zs_write_format == MALI_ZS_FORMAT_D24S8)
-                ext->s_writeback_base = ext->zs_writeback_base;
+   ext->zs_block_format = mod_to_block_fmt(zs->image->layout.modifier);
+   ext->zs_write_format = translate_zs_format(zs->format);
+   if (ext->zs_write_format == MALI_ZS_FORMAT_D24S8)
+      ext->s_writeback_base = ext->zs_writeback_base;
 }
 
 static void
 pan_prepare_crc(const struct pan_fb_info *fb, int rt_crc,
                 struct MALI_ZS_CRC_EXTENSION *ext)
 {
-        if (rt_crc < 0)
-                return;
+   if (rt_crc < 0)
+      return;
 
-        assert(rt_crc < fb->rt_count);
+   assert(rt_crc < fb->rt_count);
 
-        const struct pan_image_view *rt = fb->rts[rt_crc].view;
-        const struct pan_image_slice_layout *slice = &rt->image->layout.slices[rt->first_level];
-        ext->crc_base = rt->image->data.bo->ptr.gpu + rt->image->data.offset
-                                                    + slice->crc.offset;
-        ext->crc_row_stride = slice->crc.stride;
+   const struct pan_image_view *rt = fb->rts[rt_crc].view;
+   const struct pan_image_slice_layout *slice =
+      &rt->image->layout.slices[rt->first_level];
+   ext->crc_base =
+      rt->image->data.bo->ptr.gpu + rt->image->data.offset + slice->crc.offset;
+   ext->crc_row_stride = slice->crc.stride;
 
 #if PAN_ARCH >= 7
-        ext->crc_render_target = rt_crc;
+   ext->crc_render_target = rt_crc;
 
-        if (fb->rts[rt_crc].clear) {
-                uint32_t clear_val = fb->rts[rt_crc].clear_value[0];
-                ext->crc_clear_color = clear_val | 0xc000000000000000 |
-                                       (((uint64_t)clear_val & 0xffff) << 32);
-        }
+   if (fb->rts[rt_crc].clear) {
+      uint32_t clear_val = fb->rts[rt_crc].clear_value[0];
+      ext->crc_clear_color = clear_val | 0xc000000000000000 |
+                             (((uint64_t)clear_val & 0xffff) << 32);
+   }
 #endif
 }
 
 static void
-pan_emit_zs_crc_ext(const struct pan_fb_info *fb, int rt_crc,
-                    void *zs_crc_ext)
+pan_emit_zs_crc_ext(const struct pan_fb_info *fb, int rt_crc, void *zs_crc_ext)
 {
-        pan_pack(zs_crc_ext, ZS_CRC_EXTENSION, cfg) {
-                pan_prepare_crc(fb, rt_crc, &cfg);
-                cfg.zs_clean_pixel_write_enable = fb->zs.clear.z || fb->zs.clear.s;
-                pan_prepare_zs(fb, &cfg);
-                pan_prepare_s(fb, &cfg);
-        }
+   pan_pack(zs_crc_ext, ZS_CRC_EXTENSION, cfg) {
+      pan_prepare_crc(fb, rt_crc, &cfg);
+      cfg.zs_clean_pixel_write_enable = fb->zs.clear.z || fb->zs.clear.s;
+      pan_prepare_zs(fb, &cfg);
+      pan_prepare_s(fb, &cfg);
+   }
 }
 
 /* Measure format as it appears in the tile buffer */
@@ -306,33 +320,33 @@ pan_emit_zs_crc_ext(const struct pan_fb_info *fb, int rt_crc,
 static unsigned
 pan_bytes_per_pixel_tib(enum pipe_format format)
 {
-        if (panfrost_blendable_formats_v7[format].internal) {
-                /* Blendable formats are always 32-bits in the tile buffer,
-                 * extra bits are used as padding or to dither */
-                return 4;
-        } else {
-                /* Non-blendable formats are raw, rounded up to the nearest
-                 * power-of-two size */
-                unsigned bytes = util_format_get_blocksize(format);
-                return util_next_power_of_two(bytes);
-        }
+   if (panfrost_blendable_formats_v7[format].internal) {
+      /* Blendable formats are always 32-bits in the tile buffer,
+       * extra bits are used as padding or to dither */
+      return 4;
+   } else {
+      /* Non-blendable formats are raw, rounded up to the nearest
+       * power-of-two size */
+      unsigned bytes = util_format_get_blocksize(format);
+      return util_next_power_of_two(bytes);
+   }
 }
 
 static unsigned
 pan_cbuf_bytes_per_pixel(const struct pan_fb_info *fb)
 {
-        unsigned sum = 0;
+   unsigned sum = 0;
 
-        for (int cb = 0; cb < fb->rt_count; ++cb) {
-                const struct pan_image_view *rt = fb->rts[cb].view;
+   for (int cb = 0; cb < fb->rt_count; ++cb) {
+      const struct pan_image_view *rt = fb->rts[cb].view;
 
-                if (!rt)
-                        continue;
+      if (!rt)
+         continue;
 
-                sum += pan_bytes_per_pixel_tib(rt->format) * rt->nr_samples;
-        }
+      sum += pan_bytes_per_pixel_tib(rt->format) * rt->nr_samples;
+   }
 
-        return sum;
+   return sum;
 }
 
 /*
@@ -346,10 +360,10 @@ pan_cbuf_bytes_per_pixel(const struct pan_fb_info *fb)
 static unsigned
 pan_select_max_tile_size(unsigned tile_buffer_bytes, unsigned bytes_per_pixel)
 {
-        assert(util_is_power_of_two_nonzero(tile_buffer_bytes));
-        assert(tile_buffer_bytes >= 1024);
+   assert(util_is_power_of_two_nonzero(tile_buffer_bytes));
+   assert(tile_buffer_bytes >= 1024);
 
-        return tile_buffer_bytes >> util_logbase2_ceil(bytes_per_pixel);
+   return tile_buffer_bytes >> util_logbase2_ceil(bytes_per_pixel);
 }
 
 static enum mali_color_format
@@ -382,61 +396,63 @@ static void
 pan_rt_init_format(const struct pan_image_view *rt,
                    struct MALI_RENDER_TARGET *cfg)
 {
-        /* Explode details on the format */
+   /* Explode details on the format */
 
-        const struct util_format_description *desc =
-                util_format_description(rt->format);
+   const struct util_format_description *desc =
+      util_format_description(rt->format);
 
-        /* The swizzle for rendering is inverted from texturing */
+   /* The swizzle for rendering is inverted from texturing */
 
-        unsigned char swizzle[4] = {
-                PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
-        };
+   unsigned char swizzle[4] = {
+      PIPE_SWIZZLE_X,
+      PIPE_SWIZZLE_Y,
+      PIPE_SWIZZLE_Z,
+      PIPE_SWIZZLE_W,
+   };
 
-        /* Fill in accordingly, defaulting to 8-bit UNORM */
+   /* Fill in accordingly, defaulting to 8-bit UNORM */
 
-        if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
-                cfg->srgb = true;
+   if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
+      cfg->srgb = true;
 
-        struct pan_blendable_format fmt = panfrost_blendable_formats_v7[rt->format];
+   struct pan_blendable_format fmt = panfrost_blendable_formats_v7[rt->format];
 
-        if (fmt.internal) {
-                cfg->internal_format = fmt.internal;
-                cfg->writeback_format = fmt.writeback;
-                panfrost_invert_swizzle(desc->swizzle, swizzle);
-        } else {
-                /* Construct RAW internal/writeback, where internal is
-                 * specified logarithmically (round to next power-of-two).
-                 * Offset specified from RAW8, where 8 = 2^3 */
+   if (fmt.internal) {
+      cfg->internal_format = fmt.internal;
+      cfg->writeback_format = fmt.writeback;
+      panfrost_invert_swizzle(desc->swizzle, swizzle);
+   } else {
+      /* Construct RAW internal/writeback, where internal is
+       * specified logarithmically (round to next power-of-two).
+       * Offset specified from RAW8, where 8 = 2^3 */
 
-                unsigned bits = desc->block.bits;
-                unsigned offset = util_logbase2_ceil(bits) - 3;
-                assert(offset <= 4);
+      unsigned bits = desc->block.bits;
+      unsigned offset = util_logbase2_ceil(bits) - 3;
+      assert(offset <= 4);
 
-                cfg->internal_format =
-                        MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW8 + offset;
+      cfg->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW8 + offset;
 
-                cfg->writeback_format = pan_mfbd_raw_format(bits);
-        }
+      cfg->writeback_format = pan_mfbd_raw_format(bits);
+   }
 
-        cfg->swizzle = panfrost_translate_swizzle_4(swizzle);
+   cfg->swizzle = panfrost_translate_swizzle_4(swizzle);
 }
 
 #if PAN_ARCH >= 9
 enum mali_afbc_compression_mode
 pan_afbc_compression_mode(enum pipe_format format)
 {
-        /* There's a special case for texturing the stencil part from a combined
-         * depth/stencil texture, handle it separately.
-         */
-        if (format == PIPE_FORMAT_X24S8_UINT)
-                return MALI_AFBC_COMPRESSION_MODE_X24S8;
+   /* There's a special case for texturing the stencil part from a combined
+    * depth/stencil texture, handle it separately.
+    */
+   if (format == PIPE_FORMAT_X24S8_UINT)
+      return MALI_AFBC_COMPRESSION_MODE_X24S8;
 
-        /* Otherwise, map canonical formats to the hardware enum. This only
-         * needs to handle the subset of formats returned by
-         * panfrost_afbc_format.
-         */
-        /* clang-format off */
+   /* Otherwise, map canonical formats to the hardware enum. This only
+    * needs to handle the subset of formats returned by
+    * panfrost_afbc_format.
+    */
+   /* clang-format off */
    switch (panfrost_afbc_format(PAN_ARCH, format)) {
    case PAN_AFBC_MODE_R8:          return MALI_AFBC_COMPRESSION_MODE_R8;
    case PAN_AFBC_MODE_R8G8:        return MALI_AFBC_COMPRESSION_MODE_R8G8;
@@ -450,194 +466,186 @@ pan_afbc_compression_mode(enum pipe_format format)
    case PAN_AFBC_MODE_S8:          return MALI_AFBC_COMPRESSION_MODE_S8;
    case PAN_AFBC_MODE_INVALID:     unreachable("Invalid AFBC format");
    }
-        /* clang-format on */
+   /* clang-format on */
 
-        unreachable("all AFBC formats handled");
+   unreachable("all AFBC formats handled");
 }
 #endif
 
 static void
-pan_prepare_rt(const struct pan_fb_info *fb, unsigned idx,
-               unsigned cbuf_offset,
+pan_prepare_rt(const struct pan_fb_info *fb, unsigned idx, unsigned cbuf_offset,
                struct MALI_RENDER_TARGET *cfg)
 {
-        cfg->clean_pixel_write_enable = fb->rts[idx].clear;
-        cfg->internal_buffer_offset = cbuf_offset;
-        if (fb->rts[idx].clear) {
-                cfg->clear.color_0 = fb->rts[idx].clear_value[0];
-                cfg->clear.color_1 = fb->rts[idx].clear_value[1];
-                cfg->clear.color_2 = fb->rts[idx].clear_value[2];
-                cfg->clear.color_3 = fb->rts[idx].clear_value[3];
-        }
+   cfg->clean_pixel_write_enable = fb->rts[idx].clear;
+   cfg->internal_buffer_offset = cbuf_offset;
+   if (fb->rts[idx].clear) {
+      cfg->clear.color_0 = fb->rts[idx].clear_value[0];
+      cfg->clear.color_1 = fb->rts[idx].clear_value[1];
+      cfg->clear.color_2 = fb->rts[idx].clear_value[2];
+      cfg->clear.color_3 = fb->rts[idx].clear_value[3];
+   }
 
-        const struct pan_image_view *rt = fb->rts[idx].view;
-        if (!rt || fb->rts[idx].discard) {
-                cfg->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R8G8B8A8;
-                cfg->internal_buffer_offset = cbuf_offset;
+   const struct pan_image_view *rt = fb->rts[idx].view;
+   if (!rt || fb->rts[idx].discard) {
+      cfg->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R8G8B8A8;
+      cfg->internal_buffer_offset = cbuf_offset;
 #if PAN_ARCH >= 7
-                cfg->writeback_block_format = MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
-                cfg->dithering_enable = true;
+      cfg->writeback_block_format = MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
+      cfg->dithering_enable = true;
 #endif
-                return;
-        }
+      return;
+   }
 
-        cfg->write_enable = true;
-        cfg->dithering_enable = true;
+   cfg->write_enable = true;
+   cfg->dithering_enable = true;
 
-        unsigned level = rt->first_level;
-        assert(rt->last_level == rt->first_level);
-        assert(rt->last_layer == rt->first_layer);
+   unsigned level = rt->first_level;
+   assert(rt->last_level == rt->first_level);
+   assert(rt->last_layer == rt->first_layer);
 
-        int row_stride = rt->image->layout.slices[level].row_stride;
+   int row_stride = rt->image->layout.slices[level].row_stride;
 
-        /* Only set layer_stride for layered MSAA rendering  */
+   /* Only set layer_stride for layered MSAA rendering  */
 
-        unsigned layer_stride =
-                (rt->image->layout.nr_samples > 1) ?
-                        rt->image->layout.slices[level].surface_stride : 0;
+   unsigned layer_stride = (rt->image->layout.nr_samples > 1)
+                              ? rt->image->layout.slices[level].surface_stride
+                              : 0;
 
-        cfg->writeback_msaa = mali_sampling_mode(rt);
+   cfg->writeback_msaa = mali_sampling_mode(rt);
 
-        pan_rt_init_format(rt, cfg);
+   pan_rt_init_format(rt, cfg);
 
-        cfg->writeback_block_format = mod_to_block_fmt(rt->image->layout.modifier);
+   cfg->writeback_block_format = mod_to_block_fmt(rt->image->layout.modifier);
 
-        struct pan_surface surf;
-        pan_iview_get_surface(rt, 0, 0, 0, &surf);
+   struct pan_surface surf;
+   pan_iview_get_surface(rt, 0, 0, 0, &surf);
 
-        if (drm_is_afbc(rt->image->layout.modifier)) {
+   if (drm_is_afbc(rt->image->layout.modifier)) {
 #if PAN_ARCH >= 9
-                if (rt->image->layout.modifier & AFBC_FORMAT_MOD_YTR)
-                        cfg->afbc.yuv_transform = true;
+      if (rt->image->layout.modifier & AFBC_FORMAT_MOD_YTR)
+         cfg->afbc.yuv_transform = true;
 
-                cfg->afbc.wide_block = panfrost_afbc_is_wide(rt->image->layout.modifier);
-                cfg->afbc.header = surf.afbc.header;
-                cfg->afbc.body_offset = surf.afbc.body - surf.afbc.header;
-                assert(surf.afbc.body >= surf.afbc.header);
+      cfg->afbc.wide_block = panfrost_afbc_is_wide(rt->image->layout.modifier);
+      cfg->afbc.header = surf.afbc.header;
+      cfg->afbc.body_offset = surf.afbc.body - surf.afbc.header;
+      assert(surf.afbc.body >= surf.afbc.header);
 
-                cfg->afbc.compression_mode = pan_afbc_compression_mode(rt->format);
-                cfg->afbc.row_stride = row_stride;
+      cfg->afbc.compression_mode = pan_afbc_compression_mode(rt->format);
+      cfg->afbc.row_stride = row_stride;
 #else
-                const struct pan_image_slice_layout *slice = &rt->image->layout.slices[level];
+      const struct pan_image_slice_layout *slice =
+         &rt->image->layout.slices[level];
 
 #if PAN_ARCH >= 6
-                cfg->afbc.row_stride = pan_afbc_stride_blocks(rt->image->layout.modifier, slice->row_stride);
-                cfg->afbc.afbc_wide_block_enable =
-                        panfrost_afbc_is_wide(rt->image->layout.modifier);
+      cfg->afbc.row_stride =
+         pan_afbc_stride_blocks(rt->image->layout.modifier, slice->row_stride);
+      cfg->afbc.afbc_wide_block_enable =
+         panfrost_afbc_is_wide(rt->image->layout.modifier);
 #else
-                cfg->afbc.chunk_size = 9;
-                cfg->afbc.sparse = true;
-                cfg->afbc.body_size = slice->afbc.body_size;
+      cfg->afbc.chunk_size = 9;
+      cfg->afbc.sparse = true;
+      cfg->afbc.body_size = slice->afbc.body_size;
 #endif
 
-                cfg->afbc.header = surf.afbc.header;
-                cfg->afbc.body = surf.afbc.body;
+      cfg->afbc.header = surf.afbc.header;
+      cfg->afbc.body = surf.afbc.body;
 
-                if (rt->image->layout.modifier & AFBC_FORMAT_MOD_YTR)
-                        cfg->afbc.yuv_transform_enable = true;
+      if (rt->image->layout.modifier & AFBC_FORMAT_MOD_YTR)
+         cfg->afbc.yuv_transform_enable = true;
 #endif
-        } else {
-                assert(rt->image->layout.modifier == DRM_FORMAT_MOD_LINEAR ||
-                       rt->image->layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED);
-                cfg->rgb.base = surf.data;
-                cfg->rgb.row_stride = row_stride;
-                cfg->rgb.surface_stride = layer_stride;
-        }
+   } else {
+      assert(rt->image->layout.modifier == DRM_FORMAT_MOD_LINEAR ||
+             rt->image->layout.modifier ==
+                DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED);
+      cfg->rgb.base = surf.data;
+      cfg->rgb.row_stride = row_stride;
+      cfg->rgb.surface_stride = layer_stride;
+   }
 }
 #endif
 
 void
-GENX(pan_emit_tls)(const struct pan_tls_info *info,
-                   void *out)
+GENX(pan_emit_tls)(const struct pan_tls_info *info, void *out)
 {
-        pan_pack(out, LOCAL_STORAGE, cfg) {
-                if (info->tls.size) {
-                        unsigned shift =
-                                panfrost_get_stack_shift(info->tls.size);
+   pan_pack(out, LOCAL_STORAGE, cfg) {
+      if (info->tls.size) {
+         unsigned shift = panfrost_get_stack_shift(info->tls.size);
 
-                        cfg.tls_size = shift;
+         cfg.tls_size = shift;
 #if PAN_ARCH >= 9
-                        /* For now, always use packed TLS addressing. This is
-                         * better for the cache and requires no fix up code in
-                         * the shader. We may need to revisit this someday for
-                         * OpenCL generic pointer support.
-                         */
-                        cfg.tls_address_mode = MALI_ADDRESS_MODE_PACKED;
+         /* For now, always use packed TLS addressing. This is
+          * better for the cache and requires no fix up code in
+          * the shader. We may need to revisit this someday for
+          * OpenCL generic pointer support.
+          */
+         cfg.tls_address_mode = MALI_ADDRESS_MODE_PACKED;
 
-                        assert((info->tls.ptr & 4095) == 0);
-                        cfg.tls_base_pointer = info->tls.ptr >> 8;
+         assert((info->tls.ptr & 4095) == 0);
+         cfg.tls_base_pointer = info->tls.ptr >> 8;
 #else
-                        cfg.tls_base_pointer = info->tls.ptr;
+         cfg.tls_base_pointer = info->tls.ptr;
 #endif
-                }
+      }
 
-                if (info->wls.size) {
-                        assert(!(info->wls.ptr & 4095));
-                        assert((info->wls.ptr & 0xffffffff00000000ULL) == ((info->wls.ptr + info->wls.size - 1) & 0xffffffff00000000ULL));
-                        cfg.wls_base_pointer = info->wls.ptr;
-                        unsigned wls_size = pan_wls_adjust_size(info->wls.size);
-                        cfg.wls_instances = info->wls.instances;
-                        cfg.wls_size_scale = util_logbase2(wls_size) + 1;
-                } else {
-                        cfg.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
-                }
-        }
+      if (info->wls.size) {
+         assert(!(info->wls.ptr & 4095));
+         assert((info->wls.ptr & 0xffffffff00000000ULL) ==
+                ((info->wls.ptr + info->wls.size - 1) & 0xffffffff00000000ULL));
+         cfg.wls_base_pointer = info->wls.ptr;
+         unsigned wls_size = pan_wls_adjust_size(info->wls.size);
+         cfg.wls_instances = info->wls.instances;
+         cfg.wls_size_scale = util_logbase2(wls_size) + 1;
+      } else {
+         cfg.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
+      }
+   }
 }
 
 #if PAN_ARCH <= 5
 static void
 pan_emit_midgard_tiler(const struct panfrost_device *dev,
                        const struct pan_fb_info *fb,
-                       const struct pan_tiler_context *tiler_ctx,
-                       void *out)
+                       const struct pan_tiler_context *tiler_ctx, void *out)
 {
-        bool hierarchy = !dev->model->quirks.no_hierarchical_tiling;
+   bool hierarchy = !dev->model->quirks.no_hierarchical_tiling;
 
-        assert(tiler_ctx->midgard.polygon_list->ptr.gpu);
+   assert(tiler_ctx->midgard.polygon_list->ptr.gpu);
 
-        pan_pack(out, TILER_CONTEXT, cfg) {
-                unsigned header_size;
+   pan_pack(out, TILER_CONTEXT, cfg) {
+      unsigned header_size;
 
-                if (tiler_ctx->midgard.disable) {
-                        cfg.hierarchy_mask =
-                                hierarchy ?
-                                MALI_MIDGARD_TILER_DISABLED :
-                                MALI_MIDGARD_TILER_USER;
-                        header_size = MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE;
-                        cfg.polygon_list_size = header_size + (hierarchy ? 0 : 4);
-                        cfg.heap_start = tiler_ctx->midgard.polygon_list->ptr.gpu;
-                        cfg.heap_end = tiler_ctx->midgard.polygon_list->ptr.gpu;
-		} else {
-                        cfg.hierarchy_mask =
-                                panfrost_choose_hierarchy_mask(fb->width,
-                                                               fb->height,
-                                                               1, hierarchy);
-                        header_size = panfrost_tiler_header_size(fb->width,
-                                                                 fb->height,
-                                                                 cfg.hierarchy_mask,
-                                                                 hierarchy);
-                        cfg.polygon_list_size =
-                                panfrost_tiler_full_size(fb->width, fb->height,
-                                                         cfg.hierarchy_mask,
-                                                         hierarchy);
-                        cfg.heap_start = dev->tiler_heap->ptr.gpu;
-                        cfg.heap_end = dev->tiler_heap->ptr.gpu + dev->tiler_heap->size;
-                }
+      if (tiler_ctx->midgard.disable) {
+         cfg.hierarchy_mask =
+            hierarchy ? MALI_MIDGARD_TILER_DISABLED : MALI_MIDGARD_TILER_USER;
+         header_size = MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE;
+         cfg.polygon_list_size = header_size + (hierarchy ? 0 : 4);
+         cfg.heap_start = tiler_ctx->midgard.polygon_list->ptr.gpu;
+         cfg.heap_end = tiler_ctx->midgard.polygon_list->ptr.gpu;
+      } else {
+         cfg.hierarchy_mask =
+            panfrost_choose_hierarchy_mask(fb->width, fb->height, 1, hierarchy);
+         header_size = panfrost_tiler_header_size(
+            fb->width, fb->height, cfg.hierarchy_mask, hierarchy);
+         cfg.polygon_list_size = panfrost_tiler_full_size(
+            fb->width, fb->height, cfg.hierarchy_mask, hierarchy);
+         cfg.heap_start = dev->tiler_heap->ptr.gpu;
+         cfg.heap_end = dev->tiler_heap->ptr.gpu + dev->tiler_heap->size;
+      }
 
-                cfg.polygon_list = tiler_ctx->midgard.polygon_list->ptr.gpu;
-                cfg.polygon_list_body = cfg.polygon_list + header_size;
-        }
+      cfg.polygon_list = tiler_ctx->midgard.polygon_list->ptr.gpu;
+      cfg.polygon_list_body = cfg.polygon_list + header_size;
+   }
 }
 #endif
 
 #if PAN_ARCH >= 5
 static void
-pan_emit_rt(const struct pan_fb_info *fb,
-            unsigned idx, unsigned cbuf_offset, void *out)
+pan_emit_rt(const struct pan_fb_info *fb, unsigned idx, unsigned cbuf_offset,
+            void *out)
 {
-        pan_pack(out, RENDER_TARGET, cfg) {
-                pan_prepare_rt(fb, idx, cbuf_offset, &cfg);
-        }
+   pan_pack(out, RENDER_TARGET, cfg) {
+      pan_prepare_rt(fb, idx, cbuf_offset, &cfg);
+   }
 }
 
 #if PAN_ARCH >= 6
@@ -650,12 +658,13 @@ pan_emit_rt(const struct pan_fb_info *fb,
  * ignore, this cannot affect correctness, only performance */
 
 static enum mali_pre_post_frame_shader_mode
-pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode, bool force_clean_tile)
+pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode,
+                          bool force_clean_tile)
 {
-        if (force_clean_tile && mode == MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT)
-                return MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS;
-        else
-                return mode;
+   if (force_clean_tile && mode == MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT)
+      return MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS;
+   else
+      return mode;
 }
 
 /* Regardless of clean_tile_write_enable, the hardware writes clean tiles if
@@ -665,349 +674,345 @@ pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode, bool force_
 static bool
 pan_force_clean_write_rt(const struct pan_image_view *rt, unsigned tile_size)
 {
-        if (!drm_is_afbc(rt->image->layout.modifier))
-                return false;
+   if (!drm_is_afbc(rt->image->layout.modifier))
+      return false;
 
-        unsigned superblock = panfrost_afbc_superblock_width(rt->image->layout.modifier);
+   unsigned superblock =
+      panfrost_afbc_superblock_width(rt->image->layout.modifier);
 
-        assert(superblock >= 16);
-        assert(tile_size <= 16*16);
+   assert(superblock >= 16);
+   assert(tile_size <= 16 * 16);
 
-        /* Tile size and superblock differ unless they are both 16x16 */
-        return !(superblock == 16 && tile_size == 16*16);
+   /* Tile size and superblock differ unless they are both 16x16 */
+   return !(superblock == 16 && tile_size == 16 * 16);
 }
 
 static bool
 pan_force_clean_write(const struct pan_fb_info *fb, unsigned tile_size)
 {
-        /* Maximum tile size */
-        assert(tile_size <= 16*16);
+   /* Maximum tile size */
+   assert(tile_size <= 16 * 16);
 
-        for (unsigned i = 0; i < fb->rt_count; ++i) {
-                if (fb->rts[i].view && !fb->rts[i].discard &&
-                    pan_force_clean_write_rt(fb->rts[i].view, tile_size))
-                        return true;
-        }
+   for (unsigned i = 0; i < fb->rt_count; ++i) {
+      if (fb->rts[i].view && !fb->rts[i].discard &&
+          pan_force_clean_write_rt(fb->rts[i].view, tile_size))
+         return true;
+   }
 
-        if (fb->zs.view.zs && !fb->zs.discard.z &&
-            pan_force_clean_write_rt(fb->zs.view.zs, tile_size))
-                return true;
+   if (fb->zs.view.zs && !fb->zs.discard.z &&
+       pan_force_clean_write_rt(fb->zs.view.zs, tile_size))
+      return true;
 
-        if (fb->zs.view.s && !fb->zs.discard.s &&
-            pan_force_clean_write_rt(fb->zs.view.s, tile_size))
-                return true;
+   if (fb->zs.view.s && !fb->zs.discard.s &&
+       pan_force_clean_write_rt(fb->zs.view.s, tile_size))
+      return true;
 
-        return false;
+   return false;
 }
 
 #endif
 
 unsigned
 GENX(pan_emit_fbd)(const struct panfrost_device *dev,
-                   const struct pan_fb_info *fb,
-                   const struct pan_tls_info *tls,
-                   const struct pan_tiler_context *tiler_ctx,
-                   void *out)
+                   const struct pan_fb_info *fb, const struct pan_tls_info *tls,
+                   const struct pan_tiler_context *tiler_ctx, void *out)
 {
-        unsigned tags = MALI_FBD_TAG_IS_MFBD;
-        void *fbd = out;
-        void *rtd = out + pan_size(FRAMEBUFFER);
+   unsigned tags = MALI_FBD_TAG_IS_MFBD;
+   void *fbd = out;
+   void *rtd = out + pan_size(FRAMEBUFFER);
 
 #if PAN_ARCH <= 5
-        GENX(pan_emit_tls)(tls,
-                           pan_section_ptr(fbd, FRAMEBUFFER, LOCAL_STORAGE));
+   GENX(pan_emit_tls)(tls, pan_section_ptr(fbd, FRAMEBUFFER, LOCAL_STORAGE));
 #endif
 
-        unsigned bytes_per_pixel = pan_cbuf_bytes_per_pixel(fb);
-        unsigned tile_size = pan_select_max_tile_size(dev->optimal_tib_size,
-                                                      bytes_per_pixel);
+   unsigned bytes_per_pixel = pan_cbuf_bytes_per_pixel(fb);
+   unsigned tile_size =
+      pan_select_max_tile_size(dev->optimal_tib_size, bytes_per_pixel);
 
-        /* Clamp tile size to hardware limits */
-        tile_size = MIN2(tile_size, 16 * 16);
-        assert(tile_size >= 4 * 4);
+   /* Clamp tile size to hardware limits */
+   tile_size = MIN2(tile_size, 16 * 16);
+   assert(tile_size >= 4 * 4);
 
-        /* Colour buffer allocations must be 1K aligned. */
-        unsigned cbuf_allocation = ALIGN_POT(bytes_per_pixel * tile_size, 1024);
-        assert(cbuf_allocation <= dev->optimal_tib_size && "tile too big");
+   /* Colour buffer allocations must be 1K aligned. */
+   unsigned cbuf_allocation = ALIGN_POT(bytes_per_pixel * tile_size, 1024);
+   assert(cbuf_allocation <= dev->optimal_tib_size && "tile too big");
 
-        int crc_rt = GENX(pan_select_crc_rt)(fb, tile_size);
-        bool has_zs_crc_ext = (fb->zs.view.zs || fb->zs.view.s || crc_rt >= 0);
+   int crc_rt = GENX(pan_select_crc_rt)(fb, tile_size);
+   bool has_zs_crc_ext = (fb->zs.view.zs || fb->zs.view.s || crc_rt >= 0);
 
-        pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) {
+   pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) {
 #if PAN_ARCH >= 6
-                bool force_clean_write = pan_force_clean_write(fb, tile_size);
+      bool force_clean_write = pan_force_clean_write(fb, tile_size);
 
-                cfg.sample_locations =
-                        panfrost_sample_positions(dev, pan_sample_pattern(fb->nr_samples));
-                cfg.pre_frame_0 = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[0], force_clean_write);
-                cfg.pre_frame_1 = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[1], force_clean_write);
-                cfg.post_frame  = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[2], force_clean_write);
-                cfg.frame_shader_dcds = fb->bifrost.pre_post.dcds.gpu;
-                cfg.tiler = tiler_ctx->bifrost;
+      cfg.sample_locations =
+         panfrost_sample_positions(dev, pan_sample_pattern(fb->nr_samples));
+      cfg.pre_frame_0 = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[0],
+                                                  force_clean_write);
+      cfg.pre_frame_1 = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[1],
+                                                  force_clean_write);
+      cfg.post_frame = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[2],
+                                                 force_clean_write);
+      cfg.frame_shader_dcds = fb->bifrost.pre_post.dcds.gpu;
+      cfg.tiler = tiler_ctx->bifrost;
 #endif
-                cfg.width = fb->width;
-                cfg.height = fb->height;
-                cfg.bound_max_x = fb->width - 1;
-                cfg.bound_max_y = fb->height - 1;
+      cfg.width = fb->width;
+      cfg.height = fb->height;
+      cfg.bound_max_x = fb->width - 1;
+      cfg.bound_max_y = fb->height - 1;
 
-                cfg.effective_tile_size = tile_size;
-                cfg.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT;
-                cfg.render_target_count = MAX2(fb->rt_count, 1);
+      cfg.effective_tile_size = tile_size;
+      cfg.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT;
+      cfg.render_target_count = MAX2(fb->rt_count, 1);
 
-                /* Default to 24 bit depth if there's no surface. */
-                cfg.z_internal_format =
-                        fb->zs.view.zs ?
-                        panfrost_get_z_internal_format(fb->zs.view.zs->format) :
-                        MALI_Z_INTERNAL_FORMAT_D24;
+      /* Default to 24 bit depth if there's no surface. */
+      cfg.z_internal_format =
+         fb->zs.view.zs ? panfrost_get_z_internal_format(fb->zs.view.zs->format)
+                        : MALI_Z_INTERNAL_FORMAT_D24;
 
-                cfg.z_clear = fb->zs.clear_value.depth;
-                cfg.s_clear = fb->zs.clear_value.stencil;
-                cfg.color_buffer_allocation = cbuf_allocation;
-                cfg.sample_count = fb->nr_samples;
-                cfg.sample_pattern = pan_sample_pattern(fb->nr_samples);
-                cfg.z_write_enable = (fb->zs.view.zs && !fb->zs.discard.z);
-                cfg.s_write_enable = (fb->zs.view.s && !fb->zs.discard.s);
-                cfg.has_zs_crc_extension = has_zs_crc_ext;
+      cfg.z_clear = fb->zs.clear_value.depth;
+      cfg.s_clear = fb->zs.clear_value.stencil;
+      cfg.color_buffer_allocation = cbuf_allocation;
+      cfg.sample_count = fb->nr_samples;
+      cfg.sample_pattern = pan_sample_pattern(fb->nr_samples);
+      cfg.z_write_enable = (fb->zs.view.zs && !fb->zs.discard.z);
+      cfg.s_write_enable = (fb->zs.view.s && !fb->zs.discard.s);
+      cfg.has_zs_crc_extension = has_zs_crc_ext;
 
-                if (crc_rt >= 0) {
-                        bool *valid = fb->rts[crc_rt].crc_valid;
-                        bool full = !fb->extent.minx && !fb->extent.miny &&
-                                    fb->extent.maxx == (fb->width - 1) &&
-                                    fb->extent.maxy == (fb->height - 1);
+      if (crc_rt >= 0) {
+         bool *valid = fb->rts[crc_rt].crc_valid;
+         bool full = !fb->extent.minx && !fb->extent.miny &&
+                     fb->extent.maxx == (fb->width - 1) &&
+                     fb->extent.maxy == (fb->height - 1);
 
-                        cfg.crc_read_enable = *valid;
+         cfg.crc_read_enable = *valid;
 
-                        /* If the data is currently invalid, still write CRC
-                         * data if we are doing a full write, so that it is
-                         * valid for next time. */
-                        cfg.crc_write_enable = *valid || full;
+         /* If the data is currently invalid, still write CRC
+          * data if we are doing a full write, so that it is
+          * valid for next time. */
+         cfg.crc_write_enable = *valid || full;
 
-                        *valid |= full;
-                }
+         *valid |= full;
+      }
 
 #if PAN_ARCH >= 9
-                cfg.point_sprite_coord_origin_max_y = fb->sprite_coord_origin;
-                cfg.first_provoking_vertex = fb->first_provoking_vertex;
+      cfg.point_sprite_coord_origin_max_y = fb->sprite_coord_origin;
+      cfg.first_provoking_vertex = fb->first_provoking_vertex;
 #endif
-        }
+   }
 
 #if PAN_ARCH >= 6
-        pan_section_pack(fbd, FRAMEBUFFER, PADDING, padding);
+   pan_section_pack(fbd, FRAMEBUFFER, PADDING, padding)
+      ;
 #else
-        pan_emit_midgard_tiler(dev, fb, tiler_ctx,
-                               pan_section_ptr(fbd, FRAMEBUFFER, TILER));
+   pan_emit_midgard_tiler(dev, fb, tiler_ctx,
+                          pan_section_ptr(fbd, FRAMEBUFFER, TILER));
 
-        /* All weights set to 0, nothing to do here */
-        pan_section_pack(fbd, FRAMEBUFFER, TILER_WEIGHTS, w);
+   /* All weights set to 0, nothing to do here */
+   pan_section_pack(fbd, FRAMEBUFFER, TILER_WEIGHTS, w)
+      ;
 #endif
 
-        if (has_zs_crc_ext) {
-                pan_emit_zs_crc_ext(fb, crc_rt,
-                                    out + pan_size(FRAMEBUFFER));
-                rtd += pan_size(ZS_CRC_EXTENSION);
-                tags |= MALI_FBD_TAG_HAS_ZS_RT;
-        }
+   if (has_zs_crc_ext) {
+      pan_emit_zs_crc_ext(fb, crc_rt, out + pan_size(FRAMEBUFFER));
+      rtd += pan_size(ZS_CRC_EXTENSION);
+      tags |= MALI_FBD_TAG_HAS_ZS_RT;
+   }
 
-        unsigned rt_count = MAX2(fb->rt_count, 1);
-        unsigned cbuf_offset = 0;
-        for (unsigned i = 0; i < rt_count; i++) {
-                pan_emit_rt(fb, i, cbuf_offset, rtd);
-                rtd += pan_size(RENDER_TARGET);
-                if (!fb->rts[i].view)
-                        continue;
+   unsigned rt_count = MAX2(fb->rt_count, 1);
+   unsigned cbuf_offset = 0;
+   for (unsigned i = 0; i < rt_count; i++) {
+      pan_emit_rt(fb, i, cbuf_offset, rtd);
+      rtd += pan_size(RENDER_TARGET);
+      if (!fb->rts[i].view)
+         continue;
 
-                cbuf_offset += pan_bytes_per_pixel_tib(fb->rts[i].view->format) *
-                               tile_size * fb->rts[i].view->image->layout.nr_samples;
+      cbuf_offset += pan_bytes_per_pixel_tib(fb->rts[i].view->format) *
+                     tile_size * fb->rts[i].view->image->layout.nr_samples;
 
-                if (i != crc_rt)
-                        *(fb->rts[i].crc_valid) = false;
-        }
-        tags |= MALI_POSITIVE(MAX2(fb->rt_count, 1)) << 2;
+      if (i != crc_rt)
+         *(fb->rts[i].crc_valid) = false;
+   }
+   tags |= MALI_POSITIVE(MAX2(fb->rt_count, 1)) << 2;
 
-        return tags;
+   return tags;
 }
 #else /* PAN_ARCH == 4 */
 unsigned
 GENX(pan_emit_fbd)(const struct panfrost_device *dev,
-                   const struct pan_fb_info *fb,
-                   const struct pan_tls_info *tls,
-                   const struct pan_tiler_context *tiler_ctx,
-                   void *fbd)
+                   const struct pan_fb_info *fb, const struct pan_tls_info *tls,
+                   const struct pan_tiler_context *tiler_ctx, void *fbd)
 {
-        assert(fb->rt_count <= 1);
+   assert(fb->rt_count <= 1);
 
-        GENX(pan_emit_tls)(tls,
-                           pan_section_ptr(fbd, FRAMEBUFFER,
-                                           LOCAL_STORAGE));
-        pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) {
-                cfg.bound_max_x = fb->width - 1;
-                cfg.bound_max_y = fb->height - 1;
-                cfg.dithering_enable = true;
-                cfg.clean_pixel_write_enable = true;
-                cfg.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT;
-                if (fb->rts[0].clear) {
-                        cfg.clear_color_0 = fb->rts[0].clear_value[0];
-                        cfg.clear_color_1 = fb->rts[0].clear_value[1];
-                        cfg.clear_color_2 = fb->rts[0].clear_value[2];
-                        cfg.clear_color_3 = fb->rts[0].clear_value[3];
-                }
+   GENX(pan_emit_tls)(tls, pan_section_ptr(fbd, FRAMEBUFFER, LOCAL_STORAGE));
+   pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) {
+      cfg.bound_max_x = fb->width - 1;
+      cfg.bound_max_y = fb->height - 1;
+      cfg.dithering_enable = true;
+      cfg.clean_pixel_write_enable = true;
+      cfg.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT;
+      if (fb->rts[0].clear) {
+         cfg.clear_color_0 = fb->rts[0].clear_value[0];
+         cfg.clear_color_1 = fb->rts[0].clear_value[1];
+         cfg.clear_color_2 = fb->rts[0].clear_value[2];
+         cfg.clear_color_3 = fb->rts[0].clear_value[3];
+      }
 
-                if (fb->zs.clear.z)
-                        cfg.z_clear = fb->zs.clear_value.depth;
+      if (fb->zs.clear.z)
+         cfg.z_clear = fb->zs.clear_value.depth;
 
-                if (fb->zs.clear.s)
-                        cfg.s_clear = fb->zs.clear_value.stencil;
+      if (fb->zs.clear.s)
+         cfg.s_clear = fb->zs.clear_value.stencil;
 
-                if (fb->rt_count && fb->rts[0].view) {
-                        const struct pan_image_view *rt = fb->rts[0].view;
+      if (fb->rt_count && fb->rts[0].view) {
+         const struct pan_image_view *rt = fb->rts[0].view;
 
-                        const struct util_format_description *desc =
-                                util_format_description(rt->format);
+         const struct util_format_description *desc =
+            util_format_description(rt->format);
 
-                        /* The swizzle for rendering is inverted from texturing */
-                        unsigned char swizzle[4];
-                        panfrost_invert_swizzle(desc->swizzle, swizzle);
-                        cfg.swizzle = panfrost_translate_swizzle_4(swizzle);
+         /* The swizzle for rendering is inverted from texturing */
+         unsigned char swizzle[4];
+         panfrost_invert_swizzle(desc->swizzle, swizzle);
+         cfg.swizzle = panfrost_translate_swizzle_4(swizzle);
 
-                        struct pan_blendable_format fmt = panfrost_blendable_formats_v7[rt->format];
-                        if (fmt.internal) {
-                                cfg.internal_format = fmt.internal;
-                                cfg.color_writeback_format = fmt.writeback;
-                        } else {
-                                unreachable("raw formats not finished for SFBD");
-                        }
+         struct pan_blendable_format fmt =
+            panfrost_blendable_formats_v7[rt->format];
+         if (fmt.internal) {
+            cfg.internal_format = fmt.internal;
+            cfg.color_writeback_format = fmt.writeback;
+         } else {
+            unreachable("raw formats not finished for SFBD");
+         }
 
-                        unsigned level = rt->first_level;
-                        struct pan_surface surf;
+         unsigned level = rt->first_level;
+         struct pan_surface surf;
 
-                        pan_iview_get_surface(rt, 0, 0, 0, &surf);
+         pan_iview_get_surface(rt, 0, 0, 0, &surf);
 
-                        cfg.color_write_enable = !fb->rts[0].discard;
-                        cfg.color_writeback.base = surf.data;
-                        cfg.color_writeback.row_stride =
-	                        rt->image->layout.slices[level].row_stride;
+         cfg.color_write_enable = !fb->rts[0].discard;
+         cfg.color_writeback.base = surf.data;
+         cfg.color_writeback.row_stride =
+            rt->image->layout.slices[level].row_stride;
 
-                        cfg.color_block_format = mod_to_block_fmt(rt->image->layout.modifier);
-                        assert(cfg.color_block_format == MALI_BLOCK_FORMAT_LINEAR ||
-                               cfg.color_block_format == MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED);
+         cfg.color_block_format = mod_to_block_fmt(rt->image->layout.modifier);
+         assert(cfg.color_block_format == MALI_BLOCK_FORMAT_LINEAR ||
+                cfg.color_block_format ==
+                   MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED);
 
-                        if (rt->image->layout.crc) {
-                                const struct pan_image_slice_layout *slice =
-                                        &rt->image->layout.slices[level];
+         if (rt->image->layout.crc) {
+            const struct pan_image_slice_layout *slice =
+               &rt->image->layout.slices[level];
 
-                                cfg.crc_buffer.row_stride = slice->crc.stride;
-                                cfg.crc_buffer.base = rt->image->data.bo->ptr.gpu +
-                                                      rt->image->data.offset +
-                                                      slice->crc.offset;
-                        }
-                }
+            cfg.crc_buffer.row_stride = slice->crc.stride;
+            cfg.crc_buffer.base = rt->image->data.bo->ptr.gpu +
+                                  rt->image->data.offset + slice->crc.offset;
+         }
+      }
 
-                if (fb->zs.view.zs) {
-                        const struct pan_image_view *zs = fb->zs.view.zs;
-                        unsigned level = zs->first_level;
-                        struct pan_surface surf;
+      if (fb->zs.view.zs) {
+         const struct pan_image_view *zs = fb->zs.view.zs;
+         unsigned level = zs->first_level;
+         struct pan_surface surf;
 
-                        pan_iview_get_surface(zs, 0, 0, 0, &surf);
+         pan_iview_get_surface(zs, 0, 0, 0, &surf);
 
-                        cfg.zs_write_enable = !fb->zs.discard.z;
-                        cfg.zs_writeback.base = surf.data;
-                        cfg.zs_writeback.row_stride =
-                                zs->image->layout.slices[level].row_stride;
-                        cfg.zs_block_format = mod_to_block_fmt(zs->image->layout.modifier);
-                        assert(cfg.zs_block_format == MALI_BLOCK_FORMAT_LINEAR ||
-                               cfg.zs_block_format == MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED);
+         cfg.zs_write_enable = !fb->zs.discard.z;
+         cfg.zs_writeback.base = surf.data;
+         cfg.zs_writeback.row_stride =
+            zs->image->layout.slices[level].row_stride;
+         cfg.zs_block_format = mod_to_block_fmt(zs->image->layout.modifier);
+         assert(cfg.zs_block_format == MALI_BLOCK_FORMAT_LINEAR ||
+                cfg.zs_block_format == MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED);
 
-                        cfg.zs_format = translate_zs_format(zs->format);
-                }
+         cfg.zs_format = translate_zs_format(zs->format);
+      }
 
-                cfg.sample_count = fb->nr_samples;
+      cfg.sample_count = fb->nr_samples;
 
-                if (fb->rt_count)
-                        cfg.msaa = mali_sampling_mode(fb->rts[0].view);
-        }
+      if (fb->rt_count)
+         cfg.msaa = mali_sampling_mode(fb->rts[0].view);
+   }
 
-        pan_emit_midgard_tiler(dev, fb, tiler_ctx,
-                               pan_section_ptr(fbd, FRAMEBUFFER, TILER));
+   pan_emit_midgard_tiler(dev, fb, tiler_ctx,
+                          pan_section_ptr(fbd, FRAMEBUFFER, TILER));
 
-        /* All weights set to 0, nothing to do here */
-        pan_section_pack(fbd, FRAMEBUFFER, TILER_WEIGHTS, w);
+   /* All weights set to 0, nothing to do here */
+   pan_section_pack(fbd, FRAMEBUFFER, TILER_WEIGHTS, w)
+      ;
 
-        pan_section_pack(fbd, FRAMEBUFFER, PADDING_1, padding);
-        pan_section_pack(fbd, FRAMEBUFFER, PADDING_2, padding);
-        return 0;
+   pan_section_pack(fbd, FRAMEBUFFER, PADDING_1, padding)
+      ;
+   pan_section_pack(fbd, FRAMEBUFFER, PADDING_2, padding)
+      ;
+   return 0;
 }
 #endif
 
 #if PAN_ARCH >= 6
 void
-GENX(pan_emit_tiler_heap)(const struct panfrost_device *dev,
-                          void *out)
+GENX(pan_emit_tiler_heap)(const struct panfrost_device *dev, void *out)
 {
-        pan_pack(out, TILER_HEAP, heap) {
-                heap.size = dev->tiler_heap->size;
-                heap.base = dev->tiler_heap->ptr.gpu;
-                heap.bottom = dev->tiler_heap->ptr.gpu;
-                heap.top = dev->tiler_heap->ptr.gpu + dev->tiler_heap->size;
-        }
+   pan_pack(out, TILER_HEAP, heap) {
+      heap.size = dev->tiler_heap->size;
+      heap.base = dev->tiler_heap->ptr.gpu;
+      heap.bottom = dev->tiler_heap->ptr.gpu;
+      heap.top = dev->tiler_heap->ptr.gpu + dev->tiler_heap->size;
+   }
 }
 
 void
-GENX(pan_emit_tiler_ctx)(const struct panfrost_device *dev,
-                         unsigned fb_width, unsigned fb_height,
-                         unsigned nr_samples,
-                         bool first_provoking_vertex,
-                         mali_ptr heap,
-                         void *out)
+GENX(pan_emit_tiler_ctx)(const struct panfrost_device *dev, unsigned fb_width,
+                         unsigned fb_height, unsigned nr_samples,
+                         bool first_provoking_vertex, mali_ptr heap, void *out)
 {
-        unsigned max_levels = dev->tiler_features.max_levels;
-        assert(max_levels >= 2);
+   unsigned max_levels = dev->tiler_features.max_levels;
+   assert(max_levels >= 2);
 
-        pan_pack(out, TILER_CONTEXT, tiler) {
-                /* TODO: Select hierarchy mask more effectively */
-                tiler.hierarchy_mask = (max_levels >= 8) ? 0xFF : 0x28;
+   pan_pack(out, TILER_CONTEXT, tiler) {
+      /* TODO: Select hierarchy mask more effectively */
+      tiler.hierarchy_mask = (max_levels >= 8) ? 0xFF : 0x28;
 
-                /* For large framebuffers, disable the smallest bin size to
-                 * avoid pathological tiler memory usage. Required to avoid OOM
-                 * on dEQP-GLES31.functional.fbo.no_attachments.maximums.all on
-                 * Mali-G57.
-                 */
-                if (MAX2(fb_width, fb_height) >= 4096)
-                        tiler.hierarchy_mask &= ~1;
+      /* For large framebuffers, disable the smallest bin size to
+       * avoid pathological tiler memory usage. Required to avoid OOM
+       * on dEQP-GLES31.functional.fbo.no_attachments.maximums.all on
+       * Mali-G57.
+       */
+      if (MAX2(fb_width, fb_height) >= 4096)
+         tiler.hierarchy_mask &= ~1;
 
-                tiler.fb_width = fb_width;
-                tiler.fb_height = fb_height;
-                tiler.heap = heap;
-                tiler.sample_pattern = pan_sample_pattern(nr_samples);
+      tiler.fb_width = fb_width;
+      tiler.fb_height = fb_height;
+      tiler.heap = heap;
+      tiler.sample_pattern = pan_sample_pattern(nr_samples);
 #if PAN_ARCH >= 9
-                tiler.first_provoking_vertex = first_provoking_vertex;
+      tiler.first_provoking_vertex = first_provoking_vertex;
 #endif
-        }
+   }
 }
 #endif
 
 void
-GENX(pan_emit_fragment_job)(const struct pan_fb_info *fb,
-                            mali_ptr fbd,
+GENX(pan_emit_fragment_job)(const struct pan_fb_info *fb, mali_ptr fbd,
                             void *out)
 {
-        pan_section_pack(out, FRAGMENT_JOB, HEADER, header) {
-                header.type = MALI_JOB_TYPE_FRAGMENT;
-                header.index = 1;
-        }
+   pan_section_pack(out, FRAGMENT_JOB, HEADER, header) {
+      header.type = MALI_JOB_TYPE_FRAGMENT;
+      header.index = 1;
+   }
 
-        pan_section_pack(out, FRAGMENT_JOB, PAYLOAD, payload) {
-                payload.bound_min_x = fb->extent.minx >> MALI_TILE_SHIFT;
-                payload.bound_min_y = fb->extent.miny >> MALI_TILE_SHIFT;
-                payload.bound_max_x = fb->extent.maxx >> MALI_TILE_SHIFT;
-                payload.bound_max_y = fb->extent.maxy >> MALI_TILE_SHIFT;
-                payload.framebuffer = fbd;
+   pan_section_pack(out, FRAGMENT_JOB, PAYLOAD, payload) {
+      payload.bound_min_x = fb->extent.minx >> MALI_TILE_SHIFT;
+      payload.bound_min_y = fb->extent.miny >> MALI_TILE_SHIFT;
+      payload.bound_max_x = fb->extent.maxx >> MALI_TILE_SHIFT;
+      payload.bound_max_y = fb->extent.maxy >> MALI_TILE_SHIFT;
+      payload.framebuffer = fbd;
 
 #if PAN_ARCH >= 5
-                if (fb->tile_map.base) {
-                        payload.has_tile_enable_map = true;
-                        payload.tile_enable_map = fb->tile_map.base;
-                        payload.tile_enable_map_row_stride = fb->tile_map.stride;
-                }
+      if (fb->tile_map.base) {
+         payload.has_tile_enable_map = true;
+         payload.tile_enable_map = fb->tile_map.base;
+         payload.tile_enable_map_row_stride = fb->tile_map.stride;
+      }
 #endif
-        }
+   }
 }
diff --git a/src/panfrost/lib/pan_cs.h b/src/panfrost/lib/pan_cs.h
index 8186102e5c0..c192ac52a5f 100644
--- a/src/panfrost/lib/pan_cs.h
+++ b/src/panfrost/lib/pan_cs.h
@@ -33,152 +33,140 @@
 #include "pan_texture.h"
 
 struct pan_compute_dim {
-        uint32_t x, y, z;
+   uint32_t x, y, z;
 };
 
 struct pan_fb_color_attachment {
-        const struct pan_image_view *view;
-        bool *crc_valid;
-        bool clear;
-        bool preload;
-        bool discard;
-        uint32_t clear_value[4];
+   const struct pan_image_view *view;
+   bool *crc_valid;
+   bool clear;
+   bool preload;
+   bool discard;
+   uint32_t clear_value[4];
 };
 
 struct pan_fb_zs_attachment {
-        struct {
-                const struct pan_image_view *zs, *s;
-        } view;
+   struct {
+      const struct pan_image_view *zs, *s;
+   } view;
 
-        struct {
-                bool z, s;
-        } clear;
+   struct {
+      bool z, s;
+   } clear;
 
-        struct {
-                bool z, s;
-        } discard;
+   struct {
+      bool z, s;
+   } discard;
 
-        struct {
-                bool z, s;
-        } preload;
+   struct {
+      bool z, s;
+   } preload;
 
-        struct {
-                float depth;
-                uint8_t stencil;
-        } clear_value;
+   struct {
+      float depth;
+      uint8_t stencil;
+   } clear_value;
 };
 
 struct pan_tiler_context {
-        union {
-                mali_ptr bifrost;
-                struct {
-                        bool disable;
-                        struct panfrost_bo *polygon_list;
-                } midgard;
-        };
+   union {
+      mali_ptr bifrost;
+      struct {
+         bool disable;
+         struct panfrost_bo *polygon_list;
+      } midgard;
+   };
 };
 
 struct pan_tls_info {
-        struct {
-                mali_ptr ptr;
-                unsigned size;
-        } tls;
+   struct {
+      mali_ptr ptr;
+      unsigned size;
+   } tls;
 
-        struct {
-                unsigned instances;
-                mali_ptr ptr;
-                unsigned size;
-        } wls;
+   struct {
+      unsigned instances;
+      mali_ptr ptr;
+      unsigned size;
+   } wls;
 };
 
 struct pan_fb_bifrost_info {
-        struct {
-                struct panfrost_ptr dcds;
-                unsigned modes[3];
-        } pre_post;
+   struct {
+      struct panfrost_ptr dcds;
+      unsigned modes[3];
+   } pre_post;
 };
 
 struct pan_fb_info {
-        unsigned width, height;
-        struct {
-                /* Max values are inclusive */
-                unsigned minx, miny, maxx, maxy;
-        } extent;
-        unsigned nr_samples;
-        unsigned rt_count;
-        struct pan_fb_color_attachment rts[8];
-        struct pan_fb_zs_attachment zs;
+   unsigned width, height;
+   struct {
+      /* Max values are inclusive */
+      unsigned minx, miny, maxx, maxy;
+   } extent;
+   unsigned nr_samples;
+   unsigned rt_count;
+   struct pan_fb_color_attachment rts[8];
+   struct pan_fb_zs_attachment zs;
 
-        struct {
-                unsigned stride;
-                mali_ptr base;
-        } tile_map;
+   struct {
+      unsigned stride;
+      mali_ptr base;
+   } tile_map;
 
-        union {
-                struct pan_fb_bifrost_info bifrost;
-        };
+   union {
+      struct pan_fb_bifrost_info bifrost;
+   };
 
-        /* Only used on Valhall */
-        bool sprite_coord_origin;
-        bool first_provoking_vertex;
+   /* Only used on Valhall */
+   bool sprite_coord_origin;
+   bool first_provoking_vertex;
 };
 
 static inline unsigned
 pan_wls_instances(const struct pan_compute_dim *dim)
 {
-        return util_next_power_of_two(dim->x) *
-               util_next_power_of_two(dim->y) *
-               util_next_power_of_two(dim->z);
+   return util_next_power_of_two(dim->x) * util_next_power_of_two(dim->y) *
+          util_next_power_of_two(dim->z);
 }
 
 static inline unsigned
 pan_wls_adjust_size(unsigned wls_size)
 {
-        return util_next_power_of_two(MAX2(wls_size, 128));
+   return util_next_power_of_two(MAX2(wls_size, 128));
 }
 
 static inline unsigned
 pan_wls_mem_size(const struct panfrost_device *dev,
-                 const struct pan_compute_dim *dim,
-                 unsigned wls_size)
+                 const struct pan_compute_dim *dim, unsigned wls_size)
 {
-        unsigned instances = pan_wls_instances(dim);
+   unsigned instances = pan_wls_instances(dim);
 
-        return pan_wls_adjust_size(wls_size) * instances * dev->core_id_range;
+   return pan_wls_adjust_size(wls_size) * instances * dev->core_id_range;
 }
 
 #ifdef PAN_ARCH
-void
-GENX(pan_emit_tls)(const struct pan_tls_info *info,
-                   void *out);
+void GENX(pan_emit_tls)(const struct pan_tls_info *info, void *out);
 
-int
-GENX(pan_select_crc_rt)(const struct pan_fb_info *fb, unsigned tile_size);
+int GENX(pan_select_crc_rt)(const struct pan_fb_info *fb, unsigned tile_size);
 
-unsigned
-GENX(pan_emit_fbd)(const struct panfrost_device *dev,
-                   const struct pan_fb_info *fb,
-                   const struct pan_tls_info *tls,
-                   const struct pan_tiler_context *tiler_ctx,
-                   void *out);
+unsigned GENX(pan_emit_fbd)(const struct panfrost_device *dev,
+                            const struct pan_fb_info *fb,
+                            const struct pan_tls_info *tls,
+                            const struct pan_tiler_context *tiler_ctx,
+                            void *out);
 
 #if PAN_ARCH >= 6
-void
-GENX(pan_emit_tiler_heap)(const struct panfrost_device *dev,
-                          void *out);
+void GENX(pan_emit_tiler_heap)(const struct panfrost_device *dev, void *out);
 
-void
-GENX(pan_emit_tiler_ctx)(const struct panfrost_device *dev,
-                         unsigned fb_width, unsigned fb_height,
-                         unsigned nr_samples, bool first_provoking_vertex,
-                         mali_ptr heap,
-                         void *out);
+void GENX(pan_emit_tiler_ctx)(const struct panfrost_device *dev,
+                              unsigned fb_width, unsigned fb_height,
+                              unsigned nr_samples, bool first_provoking_vertex,
+                              mali_ptr heap, void *out);
 #endif
 
-void
-GENX(pan_emit_fragment_job)(const struct pan_fb_info *fb,
-                            mali_ptr fbd,
-                            void *out);
+void GENX(pan_emit_fragment_job)(const struct pan_fb_info *fb, mali_ptr fbd,
+                                 void *out);
 #endif /* ifdef PAN_ARCH */
 
 #endif
diff --git a/src/panfrost/lib/pan_device.h b/src/panfrost/lib/pan_device.h
index 52c029bfe9f..e441f1e4bc3 100644
--- a/src/panfrost/lib/pan_device.h
+++ b/src/panfrost/lib/pan_device.h
@@ -32,10 +32,10 @@
 
 #include <xf86drm.h>
 #include "renderonly/renderonly.h"
-#include "util/u_dynarray.h"
 #include "util/bitset.h"
 #include "util/list.h"
 #include "util/sparse_array.h"
+#include "util/u_dynarray.h"
 
 #include "panfrost/util/pan_ir.h"
 #include "pan_pool.h"
@@ -61,193 +61,185 @@ extern "C" {
 #define NR_BO_CACHE_BUCKETS (MAX_BO_CACHE_BUCKET - MIN_BO_CACHE_BUCKET + 1)
 
 struct pan_blitter {
-        struct {
-                struct pan_pool *pool;
-                struct hash_table *blit;
-                struct hash_table *blend;
-                pthread_mutex_t lock;
-        } shaders;
-        struct {
-                struct pan_pool *pool;
-                struct hash_table *rsds;
-                pthread_mutex_t lock;
-        } rsds;
+   struct {
+      struct pan_pool *pool;
+      struct hash_table *blit;
+      struct hash_table *blend;
+      pthread_mutex_t lock;
+   } shaders;
+   struct {
+      struct pan_pool *pool;
+      struct hash_table *rsds;
+      pthread_mutex_t lock;
+   } rsds;
 };
 
 struct pan_blend_shaders {
-        struct hash_table *shaders;
-        pthread_mutex_t lock;
+   struct hash_table *shaders;
+   pthread_mutex_t lock;
 };
 
 struct pan_indirect_dispatch {
-        struct panfrost_ubo_push push;
-        struct panfrost_bo *bin;
-        struct panfrost_bo *descs;
+   struct panfrost_ubo_push push;
+   struct panfrost_bo *bin;
+   struct panfrost_bo *descs;
 };
 
 /** Implementation-defined tiler features */
 struct panfrost_tiler_features {
-        /** Number of bytes per tiler bin */
-        unsigned bin_size;
+   /** Number of bytes per tiler bin */
+   unsigned bin_size;
 
-        /** Maximum number of levels that may be simultaneously enabled.
-         * Invariant: bitcount(hierarchy_mask) <= max_levels */
-        unsigned max_levels;
+   /** Maximum number of levels that may be simultaneously enabled.
+    * Invariant: bitcount(hierarchy_mask) <= max_levels */
+   unsigned max_levels;
 };
 
 struct panfrost_model {
-        /* GPU ID */
-        uint32_t gpu_id;
+   /* GPU ID */
+   uint32_t gpu_id;
 
-        /* Marketing name for the GPU, used as the GL_RENDERER */
-        const char *name;
+   /* Marketing name for the GPU, used as the GL_RENDERER */
+   const char *name;
 
-        /* Set of associated performance counters */
-        const char *performance_counters;
+   /* Set of associated performance counters */
+   const char *performance_counters;
 
-        /* Minimum GPU revision required for anisotropic filtering. ~0 and 0
-         * means "no revisions support anisotropy" and "all revisions support
-         * anistropy" respectively -- so checking for anisotropy is simply
-         * comparing the reivsion.
-         */
-        uint32_t min_rev_anisotropic;
+   /* Minimum GPU revision required for anisotropic filtering. ~0 and 0
+    * means "no revisions support anisotropy" and "all revisions support
+    * anistropy" respectively -- so checking for anisotropy is simply
+    * comparing the reivsion.
+    */
+   uint32_t min_rev_anisotropic;
 
-        /* Default tilebuffer size in bytes for the model. */
-        unsigned tilebuffer_size;
+   /* Default tilebuffer size in bytes for the model. */
+   unsigned tilebuffer_size;
 
-        struct {
-                /* The GPU lacks the capability for hierarchical tiling, without
-                 * an "Advanced Tiling Unit", instead requiring a single bin
-                 * size for the entire framebuffer be selected by the driver
-                 */
-                bool no_hierarchical_tiling;
-        } quirks;
+   struct {
+      /* The GPU lacks the capability for hierarchical tiling, without
+       * an "Advanced Tiling Unit", instead requiring a single bin
+       * size for the entire framebuffer be selected by the driver
+       */
+      bool no_hierarchical_tiling;
+   } quirks;
 };
 
 struct panfrost_device {
-        /* For ralloc */
-        void *memctx;
+   /* For ralloc */
+   void *memctx;
 
-        int fd;
+   int fd;
 
-        /* Properties of the GPU in use */
-        unsigned arch;
-        unsigned gpu_id;
-        unsigned revision;
+   /* Properties of the GPU in use */
+   unsigned arch;
+   unsigned gpu_id;
+   unsigned revision;
 
-        /* Number of shader cores */
-        unsigned core_count;
+   /* Number of shader cores */
+   unsigned core_count;
 
-        /* Range of core IDs, equal to the maximum core ID + 1. Satisfies
-         * core_id_range >= core_count.
-         */
-        unsigned core_id_range;
+   /* Range of core IDs, equal to the maximum core ID + 1. Satisfies
+    * core_id_range >= core_count.
+    */
+   unsigned core_id_range;
 
-        /* Maximum tilebuffer size in bytes for optimal performance. */
-        unsigned optimal_tib_size;
+   /* Maximum tilebuffer size in bytes for optimal performance. */
+   unsigned optimal_tib_size;
 
-        unsigned thread_tls_alloc;
-        struct panfrost_tiler_features tiler_features;
-        const struct panfrost_model *model;
-        bool has_afbc;
+   unsigned thread_tls_alloc;
+   struct panfrost_tiler_features tiler_features;
+   const struct panfrost_model *model;
+   bool has_afbc;
 
-        /* Table of formats, indexed by a PIPE format */
-        const struct panfrost_format *formats;
+   /* Table of formats, indexed by a PIPE format */
+   const struct panfrost_format *formats;
 
-        /* Bitmask of supported compressed texture formats */
-        uint32_t compressed_formats;
+   /* Bitmask of supported compressed texture formats */
+   uint32_t compressed_formats;
 
-        /* debug flags, see pan_util.h how to interpret */
-        unsigned debug;
+   /* debug flags, see pan_util.h how to interpret */
+   unsigned debug;
 
-        drmVersionPtr kernel_version;
+   drmVersionPtr kernel_version;
 
-        struct renderonly *ro;
+   struct renderonly *ro;
 
-        pthread_mutex_t bo_map_lock;
-        struct util_sparse_array bo_map;
+   pthread_mutex_t bo_map_lock;
+   struct util_sparse_array bo_map;
 
-        struct {
-                pthread_mutex_t lock;
+   struct {
+      pthread_mutex_t lock;
 
-                /* List containing all cached BOs sorted in LRU (Least
-                 * Recently Used) order. This allows us to quickly evict BOs
-                 * that are more than 1 second old.
-                 */
-                struct list_head lru;
+      /* List containing all cached BOs sorted in LRU (Least
+       * Recently Used) order. This allows us to quickly evict BOs
+       * that are more than 1 second old.
+       */
+      struct list_head lru;
 
-                /* The BO cache is a set of buckets with power-of-two sizes
-                 * ranging from 2^12 (4096, the page size) to
-                 * 2^(12 + MAX_BO_CACHE_BUCKETS).
-                 * Each bucket is a linked list of free panfrost_bo objects. */
+      /* The BO cache is a set of buckets with power-of-two sizes
+       * ranging from 2^12 (4096, the page size) to
+       * 2^(12 + MAX_BO_CACHE_BUCKETS).
+       * Each bucket is a linked list of free panfrost_bo objects. */
 
-                struct list_head buckets[NR_BO_CACHE_BUCKETS];
-        } bo_cache;
+      struct list_head buckets[NR_BO_CACHE_BUCKETS];
+   } bo_cache;
 
-        struct pan_blitter blitter;
-        struct pan_blend_shaders blend_shaders;
-        struct pan_indirect_dispatch indirect_dispatch;
+   struct pan_blitter blitter;
+   struct pan_blend_shaders blend_shaders;
+   struct pan_indirect_dispatch indirect_dispatch;
 
-        /* Tiler heap shared across all tiler jobs, allocated against the
-         * device since there's only a single tiler. Since this is invisible to
-         * the CPU, it's okay for multiple contexts to reference it
-         * simultaneously; by keeping on the device struct, we eliminate a
-         * costly per-context allocation. */
+   /* Tiler heap shared across all tiler jobs, allocated against the
+    * device since there's only a single tiler. Since this is invisible to
+    * the CPU, it's okay for multiple contexts to reference it
+    * simultaneously; by keeping on the device struct, we eliminate a
+    * costly per-context allocation. */
 
-        struct panfrost_bo *tiler_heap;
+   struct panfrost_bo *tiler_heap;
 
-        /* The tiler heap is shared by all contexts, and is written by tiler
-         * jobs and read by fragment job. We need to ensure that a
-         * vertex/tiler job chain from one context is not inserted between
-         * the vertex/tiler and fragment job of another context, otherwise
-         * we end up with tiler heap corruption.
-         */
-        pthread_mutex_t submit_lock;
+   /* The tiler heap is shared by all contexts, and is written by tiler
+    * jobs and read by fragment job. We need to ensure that a
+    * vertex/tiler job chain from one context is not inserted between
+    * the vertex/tiler and fragment job of another context, otherwise
+    * we end up with tiler heap corruption.
+    */
+   pthread_mutex_t submit_lock;
 
-        /* Sample positions are preloaded into a write-once constant buffer,
-         * such that they can be referenced fore free later. Needed
-         * unconditionally on Bifrost, and useful for sharing with Midgard */
+   /* Sample positions are preloaded into a write-once constant buffer,
+    * such that they can be referenced fore free later. Needed
+    * unconditionally on Bifrost, and useful for sharing with Midgard */
 
-        struct panfrost_bo *sample_positions;
+   struct panfrost_bo *sample_positions;
 };
 
-void
-panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev);
+void panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev);
 
-void
-panfrost_close_device(struct panfrost_device *dev);
+void panfrost_close_device(struct panfrost_device *dev);
 
-bool
-panfrost_supports_compressed_format(struct panfrost_device *dev, unsigned fmt);
+bool panfrost_supports_compressed_format(struct panfrost_device *dev,
+                                         unsigned fmt);
 
-void
-panfrost_upload_sample_positions(struct panfrost_device *dev);
+void panfrost_upload_sample_positions(struct panfrost_device *dev);
 
-mali_ptr
-panfrost_sample_positions(const struct panfrost_device *dev,
-                enum mali_sample_pattern pattern);
-void
-panfrost_query_sample_position(
-                enum mali_sample_pattern pattern,
-                unsigned sample_idx,
-                float *out);
+mali_ptr panfrost_sample_positions(const struct panfrost_device *dev,
+                                   enum mali_sample_pattern pattern);
+void panfrost_query_sample_position(enum mali_sample_pattern pattern,
+                                    unsigned sample_idx, float *out);
 
-unsigned
-panfrost_query_l2_slices(const struct panfrost_device *dev);
+unsigned panfrost_query_l2_slices(const struct panfrost_device *dev);
 
 static inline struct panfrost_bo *
 pan_lookup_bo(struct panfrost_device *dev, uint32_t gem_handle)
 {
-        return (struct panfrost_bo *)util_sparse_array_get(&dev->bo_map, gem_handle);
+   return (struct panfrost_bo *)util_sparse_array_get(&dev->bo_map, gem_handle);
 }
 
 static inline bool
 pan_is_bifrost(const struct panfrost_device *dev)
 {
-        return dev->arch >= 6 && dev->arch <= 7;
+   return dev->arch >= 6 && dev->arch <= 7;
 }
 
-const struct panfrost_model * panfrost_get_model(uint32_t gpu_id);
+const struct panfrost_model *panfrost_get_model(uint32_t gpu_id);
 
 #if defined(__cplusplus)
 } // extern "C"
diff --git a/src/panfrost/lib/pan_earlyzs.c b/src/panfrost/lib/pan_earlyzs.c
index da27cf906ad..d3c82c1dc55 100644
--- a/src/panfrost/lib/pan_earlyzs.c
+++ b/src/panfrost/lib/pan_earlyzs.c
@@ -32,10 +32,10 @@
 static enum pan_earlyzs
 best_early_mode(bool zs_always_passes)
 {
-        if (zs_always_passes)
-                return PAN_EARLYZS_WEAK_EARLY;
-        else
-                return PAN_EARLYZS_FORCE_EARLY;
+   if (zs_always_passes)
+      return PAN_EARLYZS_WEAK_EARLY;
+   else
+      return PAN_EARLYZS_FORCE_EARLY;
 }
 
 /*
@@ -44,59 +44,56 @@ best_early_mode(bool zs_always_passes)
  * lookup table, synchronized with pan_earlyzs_get.
  */
 static struct pan_earlyzs_state
-analyze(const struct pan_shader_info *s,
-        bool writes_zs_or_oq,
-        bool alpha_to_coverage,
-        bool zs_always_passes)
+analyze(const struct pan_shader_info *s, bool writes_zs_or_oq,
+        bool alpha_to_coverage, bool zs_always_passes)
 {
-        /* If the shader writes depth or stencil, all depth/stencil tests must
-         * be deferred until the value is known after the ZS_EMIT instruction,
-         * if present. ZS_EMIT must precede ATEST, so the value is known when
-         * ATEST executes, justifying the late test/update.
-         */
-        bool shader_writes_zs = (s->fs.writes_depth || s->fs.writes_stencil);
-        bool late_update = shader_writes_zs;
-        bool late_kill = shader_writes_zs;
+   /* If the shader writes depth or stencil, all depth/stencil tests must
+    * be deferred until the value is known after the ZS_EMIT instruction,
+    * if present. ZS_EMIT must precede ATEST, so the value is known when
+    * ATEST executes, justifying the late test/update.
+    */
+   bool shader_writes_zs = (s->fs.writes_depth || s->fs.writes_stencil);
+   bool late_update = shader_writes_zs;
+   bool late_kill = shader_writes_zs;
 
-        /* Late coverage updates are required if the coverage mask depends on
-         * the results of the shader. Discards are implemented as coverage mask
-         * updates and must be considered. Strictly, depth/stencil writes may
-         * also update the coverage mask, but these already force late updates.
-         */
-        bool late_coverage = s->fs.writes_coverage ||
-                             s->fs.can_discard ||
-                             alpha_to_coverage;
+   /* Late coverage updates are required if the coverage mask depends on
+    * the results of the shader. Discards are implemented as coverage mask
+    * updates and must be considered. Strictly, depth/stencil writes may
+    * also update the coverage mask, but these already force late updates.
+    */
+   bool late_coverage =
+      s->fs.writes_coverage || s->fs.can_discard || alpha_to_coverage;
 
-        /* Late coverage mask updates may affect the value written to the
-         * depth/stencil buffer (if a pixel is discarded entirely). However,
-         * they do not affect depth/stencil testing. So they may only matter if
-         * depth or stencil is written.
-         *
-         * That dependency does mean late coverage mask updates require late
-         * depth/stencil updates.
-         *
-         * Similarly, occlusion queries count samples that pass the
-         * depth/stencil tests, so occlusion queries with late coverage also
-         * require a late update.
-         */
-        late_update |= (late_coverage && writes_zs_or_oq);
+   /* Late coverage mask updates may affect the value written to the
+    * depth/stencil buffer (if a pixel is discarded entirely). However,
+    * they do not affect depth/stencil testing. So they may only matter if
+    * depth or stencil is written.
+    *
+    * That dependency does mean late coverage mask updates require late
+    * depth/stencil updates.
+    *
+    * Similarly, occlusion queries count samples that pass the
+    * depth/stencil tests, so occlusion queries with late coverage also
+    * require a late update.
+    */
+   late_update |= (late_coverage && writes_zs_or_oq);
 
-        /* Side effects require late depth/stencil tests to ensure the shader
-         * isn't killed before the side effects execute.
-         */
-        late_kill |= s->writes_global;
+   /* Side effects require late depth/stencil tests to ensure the shader
+    * isn't killed before the side effects execute.
+    */
+   late_kill |= s->writes_global;
 
-        /* Finally, the shader may override and force early fragment tests */
-        late_update &= !s->fs.early_fragment_tests;
-        late_kill   &= !s->fs.early_fragment_tests;
+   /* Finally, the shader may override and force early fragment tests */
+   late_update &= !s->fs.early_fragment_tests;
+   late_kill &= !s->fs.early_fragment_tests;
 
-        /* Collect results */
-        enum pan_earlyzs early_mode = best_early_mode(zs_always_passes);
+   /* Collect results */
+   enum pan_earlyzs early_mode = best_early_mode(zs_always_passes);
 
-        return (struct pan_earlyzs_state) {
-                .update = late_update ? PAN_EARLYZS_FORCE_LATE : early_mode,
-                .kill   = late_kill   ? PAN_EARLYZS_FORCE_LATE : early_mode,
-        };
+   return (struct pan_earlyzs_state){
+      .update = late_update ? PAN_EARLYZS_FORCE_LATE : early_mode,
+      .kill = late_kill ? PAN_EARLYZS_FORCE_LATE : early_mode,
+   };
 }
 
 /*
@@ -106,14 +103,14 @@ analyze(const struct pan_shader_info *s,
 struct pan_earlyzs_lut
 pan_earlyzs_analyze(const struct pan_shader_info *s)
 {
-        struct pan_earlyzs_lut lut;
+   struct pan_earlyzs_lut lut;
 
-        for (unsigned v0 = 0; v0 < 2; ++v0) {
-                for (unsigned v1 = 0; v1 < 2; ++v1) {
-                        for (unsigned v2 = 0; v2 < 2; ++v2)
-                                lut.states[v0][v1][v2] = analyze(s, v0, v1, v2);
-                }
-        }
+   for (unsigned v0 = 0; v0 < 2; ++v0) {
+      for (unsigned v1 = 0; v1 < 2; ++v1) {
+         for (unsigned v2 = 0; v2 < 2; ++v2)
+            lut.states[v0][v1][v2] = analyze(s, v0, v1, v2);
+      }
+   }
 
-        return lut;
+   return lut;
 }
diff --git a/src/panfrost/lib/pan_earlyzs.h b/src/panfrost/lib/pan_earlyzs.h
index f0a0af496c9..fdc626b00aa 100644
--- a/src/panfrost/lib/pan_earlyzs.h
+++ b/src/panfrost/lib/pan_earlyzs.h
@@ -32,21 +32,21 @@ extern "C" {
 
 /* Matches hardware Pixel Kill enum on Bifrost and Valhall */
 enum pan_earlyzs {
-        PAN_EARLYZS_FORCE_EARLY = 0,
-        PAN_EARLYZS_WEAK_EARLY = 2,
-        PAN_EARLYZS_FORCE_LATE = 3
+   PAN_EARLYZS_FORCE_EARLY = 0,
+   PAN_EARLYZS_WEAK_EARLY = 2,
+   PAN_EARLYZS_FORCE_LATE = 3
 };
 
 /* Early-ZS pair. */
 struct pan_earlyzs_state {
-        /* Z/S test and update */
-        enum pan_earlyzs update : 2;
+   /* Z/S test and update */
+   enum pan_earlyzs update : 2;
 
-        /* Pixel kill */
-        enum pan_earlyzs kill : 2;
+   /* Pixel kill */
+   enum pan_earlyzs kill : 2;
 
-        /* So it fits in a byte */
-        unsigned padding : 4;
+   /* So it fits in a byte */
+   unsigned padding : 4;
 };
 
 /* Internal lookup table. Users should treat as an opaque structure and only
@@ -54,7 +54,7 @@ struct pan_earlyzs_state {
  * for definition of the arrays.
  */
 struct pan_earlyzs_lut {
-        struct pan_earlyzs_state states[2][2][2];
+   struct pan_earlyzs_state states[2][2][2];
 };
 
 /*
@@ -62,11 +62,10 @@ struct pan_earlyzs_lut {
  * defined inline in the header.
  */
 static inline struct pan_earlyzs_state
-pan_earlyzs_get(struct pan_earlyzs_lut lut,
-                bool writes_zs_or_oq, bool alpha_to_coverage,
-                bool zs_always_passes)
+pan_earlyzs_get(struct pan_earlyzs_lut lut, bool writes_zs_or_oq,
+                bool alpha_to_coverage, bool zs_always_passes)
 {
-        return lut.states[writes_zs_or_oq][alpha_to_coverage][zs_always_passes];
+   return lut.states[writes_zs_or_oq][alpha_to_coverage][zs_always_passes];
 }
 
 struct pan_shader_info;
diff --git a/src/panfrost/lib/pan_encoder.h b/src/panfrost/lib/pan_encoder.h
index 68349996cde..8a3018b763c 100644
--- a/src/panfrost/lib/pan_encoder.h
+++ b/src/panfrost/lib/pan_encoder.h
@@ -30,23 +30,21 @@
 #include "util/macros.h"
 
 #include <stdbool.h>
+#include "genxml/gen_macros.h"
 #include "util/format/u_format.h"
 #include "pan_bo.h"
-#include "genxml/gen_macros.h"
 #include "pan_device.h"
 
 /* Tiler structure size computation */
 
-unsigned
-panfrost_tiler_header_size(unsigned width, unsigned height, unsigned mask, bool hierarchy);
+unsigned panfrost_tiler_header_size(unsigned width, unsigned height,
+                                    unsigned mask, bool hierarchy);
 
-unsigned
-panfrost_tiler_full_size(unsigned width, unsigned height, unsigned mask, bool hierarchy);
+unsigned panfrost_tiler_full_size(unsigned width, unsigned height,
+                                  unsigned mask, bool hierarchy);
 
-unsigned
-panfrost_choose_hierarchy_mask(
-        unsigned width, unsigned height,
-        unsigned vertex_count, bool hierarchy);
+unsigned panfrost_choose_hierarchy_mask(unsigned width, unsigned height,
+                                        unsigned vertex_count, bool hierarchy);
 
 #if defined(PAN_ARCH) && PAN_ARCH <= 5
 static inline unsigned
@@ -54,36 +52,34 @@ panfrost_tiler_get_polygon_list_size(const struct panfrost_device *dev,
                                      unsigned fb_width, unsigned fb_height,
                                      bool has_draws)
 {
-        if (!has_draws)
-                return MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE + 4;
+   if (!has_draws)
+      return MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE + 4;
 
-        bool hierarchy = !dev->model->quirks.no_hierarchical_tiling;
-        unsigned hierarchy_mask =
-                panfrost_choose_hierarchy_mask(fb_width, fb_height, 1, hierarchy);
+   bool hierarchy = !dev->model->quirks.no_hierarchical_tiling;
+   unsigned hierarchy_mask =
+      panfrost_choose_hierarchy_mask(fb_width, fb_height, 1, hierarchy);
 
-        return panfrost_tiler_full_size(fb_width, fb_height, hierarchy_mask, hierarchy) +
-                panfrost_tiler_header_size(fb_width, fb_height, hierarchy_mask, hierarchy);
+   return panfrost_tiler_full_size(fb_width, fb_height, hierarchy_mask,
+                                   hierarchy) +
+          panfrost_tiler_header_size(fb_width, fb_height, hierarchy_mask,
+                                     hierarchy);
 }
 #endif
 
 /* Stack sizes */
 
-unsigned
-panfrost_get_stack_shift(unsigned stack_size);
+unsigned panfrost_get_stack_shift(unsigned stack_size);
 
-unsigned
-panfrost_get_total_stack_size(
-                unsigned thread_size,
-                unsigned threads_per_core,
-                unsigned core_id_range);
+unsigned panfrost_get_total_stack_size(unsigned thread_size,
+                                       unsigned threads_per_core,
+                                       unsigned core_id_range);
 
 /* Attributes / instancing */
 
-unsigned
-panfrost_padded_vertex_count(unsigned vertex_count);
+unsigned panfrost_padded_vertex_count(unsigned vertex_count);
 
-unsigned
-panfrost_compute_magic_divisor(unsigned hw_divisor, unsigned *o_shift, unsigned *extra_flags);
+unsigned panfrost_compute_magic_divisor(unsigned hw_divisor, unsigned *o_shift,
+                                        unsigned *extra_flags);
 
 #ifdef PAN_ARCH
 /* Records for gl_VertexID and gl_InstanceID use special encodings on Midgard */
@@ -91,41 +87,38 @@ panfrost_compute_magic_divisor(unsigned hw_divisor, unsigned *o_shift, unsigned
 #if PAN_ARCH <= 5
 static inline void
 panfrost_vertex_id(unsigned padded_count,
-                   struct mali_attribute_buffer_packed *attr,
-                   bool instanced)
+                   struct mali_attribute_buffer_packed *attr, bool instanced)
 {
-        pan_pack(attr, ATTRIBUTE_VERTEX_ID, cfg) {
-                if (instanced) {
-                        cfg.divisor_r = __builtin_ctz(padded_count);
-                        cfg.divisor_p = padded_count >> (cfg.divisor_r + 1);
-                } else {
-                        /* Large values so the modulo is a no-op */
-                        cfg.divisor_r = 0x1F;
-                        cfg.divisor_p = 0x4;
-                }
-        }
+   pan_pack(attr, ATTRIBUTE_VERTEX_ID, cfg) {
+      if (instanced) {
+         cfg.divisor_r = __builtin_ctz(padded_count);
+         cfg.divisor_p = padded_count >> (cfg.divisor_r + 1);
+      } else {
+         /* Large values so the modulo is a no-op */
+         cfg.divisor_r = 0x1F;
+         cfg.divisor_p = 0x4;
+      }
+   }
 }
 
 static inline void
 panfrost_instance_id(unsigned padded_count,
-                     struct mali_attribute_buffer_packed *attr,
-                     bool instanced)
+                     struct mali_attribute_buffer_packed *attr, bool instanced)
 {
-        pan_pack(attr, ATTRIBUTE_INSTANCE_ID, cfg) {
-                if (!instanced || padded_count <= 1) {
-                        /* Divide by large number to force to 0 */
-                        cfg.divisor_p = ((1u << 31) - 1);
-                        cfg.divisor_r = 0x1F;
-                        cfg.divisor_e = 0x1;
-                } else if(util_is_power_of_two_or_zero(padded_count)) {
-                        /* Can't underflow since padded_count >= 2 */
-                        cfg.divisor_r = __builtin_ctz(padded_count) - 1;
-                } else {
-                        cfg.divisor_p =
-                                panfrost_compute_magic_divisor(padded_count,
-                                        &cfg.divisor_r, &cfg.divisor_e);
-                }
-        }
+   pan_pack(attr, ATTRIBUTE_INSTANCE_ID, cfg) {
+      if (!instanced || padded_count <= 1) {
+         /* Divide by large number to force to 0 */
+         cfg.divisor_p = ((1u << 31) - 1);
+         cfg.divisor_r = 0x1F;
+         cfg.divisor_e = 0x1;
+      } else if (util_is_power_of_two_or_zero(padded_count)) {
+         /* Can't underflow since padded_count >= 2 */
+         cfg.divisor_r = __builtin_ctz(padded_count) - 1;
+      } else {
+         cfg.divisor_p = panfrost_compute_magic_divisor(
+            padded_count, &cfg.divisor_r, &cfg.divisor_e);
+      }
+   }
 }
 #endif /* PAN_ARCH <= 5 */
 
@@ -135,14 +128,18 @@ panfrost_instance_id(unsigned padded_count,
 static inline enum mali_func
 panfrost_flip_compare_func(enum mali_func f)
 {
-        switch (f) {
-        case MALI_FUNC_LESS: return MALI_FUNC_GREATER;
-        case MALI_FUNC_GREATER: return MALI_FUNC_LESS;
-        case MALI_FUNC_LEQUAL: return MALI_FUNC_GEQUAL;
-        case MALI_FUNC_GEQUAL: return MALI_FUNC_LEQUAL;
-        default: return f;
-        }
-
+   switch (f) {
+   case MALI_FUNC_LESS:
+      return MALI_FUNC_GREATER;
+   case MALI_FUNC_GREATER:
+      return MALI_FUNC_LESS;
+   case MALI_FUNC_LEQUAL:
+      return MALI_FUNC_GEQUAL;
+   case MALI_FUNC_GEQUAL:
+      return MALI_FUNC_LEQUAL;
+   default:
+      return f;
+   }
 }
 
 #if PAN_ARCH <= 7
@@ -152,59 +149,59 @@ panfrost_flip_compare_func(enum mali_func f)
  * together in a dynamic bitfield, packed by this routine. */
 
 static inline void
-panfrost_pack_work_groups_compute(
-        struct mali_invocation_packed *out,
-        unsigned num_x, unsigned num_y, unsigned num_z,
-        unsigned size_x, unsigned size_y, unsigned size_z,
-        bool quirk_graphics, bool indirect_dispatch)
+panfrost_pack_work_groups_compute(struct mali_invocation_packed *out,
+                                  unsigned num_x, unsigned num_y,
+                                  unsigned num_z, unsigned size_x,
+                                  unsigned size_y, unsigned size_z,
+                                  bool quirk_graphics, bool indirect_dispatch)
 {
-        /* The values needing packing, in order, and the corresponding shifts.
-         * Indicies into shift are off-by-one to make the logic easier */
+   /* The values needing packing, in order, and the corresponding shifts.
+    * Indicies into shift are off-by-one to make the logic easier */
 
-        unsigned values[6] = { size_x, size_y, size_z, num_x, num_y, num_z };
-        unsigned shifts[7] = { 0 };
-        uint32_t packed = 0;
+   unsigned values[6] = {size_x, size_y, size_z, num_x, num_y, num_z};
+   unsigned shifts[7] = {0};
+   uint32_t packed = 0;
 
-        for (unsigned i = 0; i < 6; ++i) {
-                /* Must be positive, otherwise we underflow */
-                assert(values[i] >= 1);
+   for (unsigned i = 0; i < 6; ++i) {
+      /* Must be positive, otherwise we underflow */
+      assert(values[i] >= 1);
 
-                /* OR it in, shifting as required */
-                packed |= ((values[i] - 1) << shifts[i]);
+      /* OR it in, shifting as required */
+      packed |= ((values[i] - 1) << shifts[i]);
 
-                /* How many bits did we use? */
-                unsigned bit_count = util_logbase2_ceil(values[i]);
+      /* How many bits did we use? */
+      unsigned bit_count = util_logbase2_ceil(values[i]);
 
-                /* Set the next shift accordingly */
-                shifts[i + 1] = shifts[i] + bit_count;
-        }
+      /* Set the next shift accordingly */
+      shifts[i + 1] = shifts[i] + bit_count;
+   }
 
-        pan_pack(out, INVOCATION, cfg) {
-                cfg.invocations = packed;
-                cfg.size_y_shift = shifts[1];
-                cfg.size_z_shift = shifts[2];
-                cfg.workgroups_x_shift = shifts[3];
+   pan_pack(out, INVOCATION, cfg) {
+      cfg.invocations = packed;
+      cfg.size_y_shift = shifts[1];
+      cfg.size_z_shift = shifts[2];
+      cfg.workgroups_x_shift = shifts[3];
 
-                if (!indirect_dispatch) {
-                        /* Leave zero for the dispatch shader */
-                        cfg.workgroups_y_shift = shifts[4];
-                        cfg.workgroups_z_shift = shifts[5];
-                }
+      if (!indirect_dispatch) {
+         /* Leave zero for the dispatch shader */
+         cfg.workgroups_y_shift = shifts[4];
+         cfg.workgroups_z_shift = shifts[5];
+      }
 
-                /* Quirk: for non-instanced graphics, the blob sets
-                 * workgroups_z_shift = 32. This doesn't appear to matter to
-                 * the hardware, but it's good to be bit-identical. */
+      /* Quirk: for non-instanced graphics, the blob sets
+       * workgroups_z_shift = 32. This doesn't appear to matter to
+       * the hardware, but it's good to be bit-identical. */
 
-                if (quirk_graphics && (num_z <= 1))
-                        cfg.workgroups_z_shift = 32;
+      if (quirk_graphics && (num_z <= 1))
+         cfg.workgroups_z_shift = 32;
 
-                /* For graphics, set to the minimum efficient value. For
-                 * compute, must equal the workgroup X shift for barriers to
-                 * function correctly */
+      /* For graphics, set to the minimum efficient value. For
+       * compute, must equal the workgroup X shift for barriers to
+       * function correctly */
 
-                cfg.thread_group_split = quirk_graphics ?
-                        MALI_SPLIT_MIN_EFFICIENT : cfg.workgroups_x_shift;
-        }
+      cfg.thread_group_split =
+         quirk_graphics ? MALI_SPLIT_MIN_EFFICIENT : cfg.workgroups_x_shift;
+   }
 }
 #endif
 
@@ -213,19 +210,19 @@ panfrost_pack_work_groups_compute(
 static inline enum mali_z_internal_format
 panfrost_get_z_internal_format(enum pipe_format fmt)
 {
-         switch (fmt) {
-         case PIPE_FORMAT_Z16_UNORM:
-         case PIPE_FORMAT_Z16_UNORM_S8_UINT:
-                return MALI_Z_INTERNAL_FORMAT_D16;
-         case PIPE_FORMAT_Z24_UNORM_S8_UINT:
-         case PIPE_FORMAT_Z24X8_UNORM:
-                return MALI_Z_INTERNAL_FORMAT_D24;
-         case PIPE_FORMAT_Z32_FLOAT:
-         case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
-                return MALI_Z_INTERNAL_FORMAT_D32;
-         default:
-                unreachable("Unsupported depth/stencil format.");
-         }
+   switch (fmt) {
+   case PIPE_FORMAT_Z16_UNORM:
+   case PIPE_FORMAT_Z16_UNORM_S8_UINT:
+      return MALI_Z_INTERNAL_FORMAT_D16;
+   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+   case PIPE_FORMAT_Z24X8_UNORM:
+      return MALI_Z_INTERNAL_FORMAT_D24;
+   case PIPE_FORMAT_Z32_FLOAT:
+   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+      return MALI_Z_INTERNAL_FORMAT_D32;
+   default:
+      unreachable("Unsupported depth/stencil format.");
+   }
 }
 #endif
 
@@ -236,13 +233,13 @@ static inline void
 panfrost_make_resource_table(struct panfrost_ptr base, unsigned index,
                              mali_ptr address, unsigned resource_count)
 {
-        if (resource_count == 0)
-                return;
+   if (resource_count == 0)
+      return;
 
-        pan_pack(base.cpu + index * pan_size(RESOURCE), RESOURCE, cfg) {
-                cfg.address = address;
-                cfg.size = resource_count * pan_size(BUFFER);
-        }
+   pan_pack(base.cpu + index * pan_size(RESOURCE), RESOURCE, cfg) {
+      cfg.address = address;
+      cfg.size = resource_count * pan_size(BUFFER);
+   }
 }
 #endif
 
diff --git a/src/panfrost/lib/pan_format.c b/src/panfrost/lib/pan_format.c
index a2ffd1214d6..1b4007585b8 100644
--- a/src/panfrost/lib/pan_format.c
+++ b/src/panfrost/lib/pan_format.c
@@ -24,86 +24,90 @@
  *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
  */
 
-#include "genxml/gen_macros.h"
 #include "pan_format.h"
+#include "genxml/gen_macros.h"
 #include "util/format/u_format.h"
 
 /* Convenience */
 
-#define MALI_BLEND_AU_R8G8B8A8    (MALI_RGBA8_TB    << 12)
-#define MALI_BLEND_PU_R8G8B8A8    (MALI_RGBA8_TB    << 12)
+#define MALI_BLEND_AU_R8G8B8A8    (MALI_RGBA8_TB << 12)
+#define MALI_BLEND_PU_R8G8B8A8    (MALI_RGBA8_TB << 12)
 #define MALI_BLEND_AU_R10G10B10A2 (MALI_RGB10_A2_TB << 12)
 #define MALI_BLEND_PU_R10G10B10A2 (MALI_RGB10_A2_TB << 12)
-#define MALI_BLEND_AU_R8G8B8A2    (MALI_RGB8_A2_AU  << 12)
-#define MALI_BLEND_PU_R8G8B8A2    (MALI_RGB8_A2_PU  << 12)
-#define MALI_BLEND_AU_R4G4B4A4    (MALI_RGBA4_AU    << 12)
-#define MALI_BLEND_PU_R4G4B4A4    (MALI_RGBA4_PU    << 12)
-#define MALI_BLEND_AU_R5G6B5A0    (MALI_R5G6B5_AU   << 12)
-#define MALI_BLEND_PU_R5G6B5A0    (MALI_R5G6B5_PU   << 12)
-#define MALI_BLEND_AU_R5G5B5A1    (MALI_RGB5_A1_AU  << 12)
-#define MALI_BLEND_PU_R5G5B5A1    (MALI_RGB5_A1_PU  << 12)
+#define MALI_BLEND_AU_R8G8B8A2    (MALI_RGB8_A2_AU << 12)
+#define MALI_BLEND_PU_R8G8B8A2    (MALI_RGB8_A2_PU << 12)
+#define MALI_BLEND_AU_R4G4B4A4    (MALI_RGBA4_AU << 12)
+#define MALI_BLEND_PU_R4G4B4A4    (MALI_RGBA4_PU << 12)
+#define MALI_BLEND_AU_R5G6B5A0    (MALI_R5G6B5_AU << 12)
+#define MALI_BLEND_PU_R5G6B5A0    (MALI_R5G6B5_PU << 12)
+#define MALI_BLEND_AU_R5G5B5A1    (MALI_RGB5_A1_AU << 12)
+#define MALI_BLEND_PU_R5G5B5A1    (MALI_RGB5_A1_PU << 12)
 
 #if PAN_ARCH <= 6
-#define BFMT2(pipe, internal, writeback, srgb) \
-        [PIPE_FORMAT_##pipe] = { \
-                MALI_COLOR_BUFFER_INTERNAL_FORMAT_## internal, \
-                MALI_COLOR_FORMAT_## writeback, \
-                { MALI_BLEND_PU_ ## internal | (srgb ? (1 << 20) : 0) | \
-                        PAN_V6_SWIZZLE(R, G, B, A), \
-                  MALI_BLEND_AU_ ## internal | (srgb ? (1 << 20) : 0) | \
-                        PAN_V6_SWIZZLE(R, G, B, A), }, \
-        }
+#define BFMT2(pipe, internal, writeback, srgb)                                 \
+   [PIPE_FORMAT_##pipe] = {                                                    \
+      MALI_COLOR_BUFFER_INTERNAL_FORMAT_##internal,                            \
+      MALI_COLOR_FORMAT_##writeback,                                           \
+      {                                                                        \
+         MALI_BLEND_PU_##internal | (srgb ? (1 << 20) : 0) |                   \
+            PAN_V6_SWIZZLE(R, G, B, A),                                        \
+         MALI_BLEND_AU_##internal | (srgb ? (1 << 20) : 0) |                   \
+            PAN_V6_SWIZZLE(R, G, B, A),                                        \
+      },                                                                       \
+   }
 #else
-#define BFMT2(pipe, internal, writeback, srgb) \
-        [PIPE_FORMAT_##pipe] = { \
-                MALI_COLOR_BUFFER_INTERNAL_FORMAT_## internal, \
-                MALI_COLOR_FORMAT_## writeback, \
-                { MALI_BLEND_PU_ ## internal | (srgb ? (1 << 20) : 0), \
-                  MALI_BLEND_AU_ ## internal | (srgb ? (1 << 20) : 0), }, \
-        }
+#define BFMT2(pipe, internal, writeback, srgb)                                 \
+   [PIPE_FORMAT_##pipe] = {                                                    \
+      MALI_COLOR_BUFFER_INTERNAL_FORMAT_##internal,                            \
+      MALI_COLOR_FORMAT_##writeback,                                           \
+      {                                                                        \
+         MALI_BLEND_PU_##internal | (srgb ? (1 << 20) : 0),                    \
+         MALI_BLEND_AU_##internal | (srgb ? (1 << 20) : 0),                    \
+      },                                                                       \
+   }
 #endif
 
-#define BFMT(pipe, internal_and_writeback) \
-        BFMT2(pipe, internal_and_writeback, internal_and_writeback, 0)
+#define BFMT(pipe, internal_and_writeback)                                     \
+   BFMT2(pipe, internal_and_writeback, internal_and_writeback, 0)
 
-#define BFMT_SRGB(pipe, writeback) \
-        BFMT2(pipe ##_UNORM, R8G8B8A8, writeback, 0), \
-        BFMT2(pipe ##_SRGB, R8G8B8A8, writeback, 1)
+#define BFMT_SRGB(pipe, writeback)                                             \
+   BFMT2(pipe##_UNORM, R8G8B8A8, writeback, 0),                                \
+      BFMT2(pipe##_SRGB, R8G8B8A8, writeback, 1)
 
 const struct pan_blendable_format
-GENX(panfrost_blendable_formats)[PIPE_FORMAT_COUNT] = {
-        BFMT_SRGB(L8, R8),
-        BFMT_SRGB(L8A8, R8G8),
-        BFMT_SRGB(R8, R8),
-        BFMT_SRGB(R8G8, R8G8),
-        BFMT_SRGB(R8G8B8, R8G8B8),
+   GENX(panfrost_blendable_formats)[PIPE_FORMAT_COUNT] = {
+      BFMT_SRGB(L8, R8),
+      BFMT_SRGB(L8A8, R8G8),
+      BFMT_SRGB(R8, R8),
+      BFMT_SRGB(R8G8, R8G8),
+      BFMT_SRGB(R8G8B8, R8G8B8),
 
-        BFMT_SRGB(B8G8R8A8, R8G8B8A8),
-        BFMT_SRGB(B8G8R8X8, R8G8B8A8),
-        BFMT_SRGB(A8R8G8B8, R8G8B8A8),
-        BFMT_SRGB(X8R8G8B8, R8G8B8A8),
-        BFMT_SRGB(A8B8G8R8, R8G8B8A8),
-        BFMT_SRGB(X8B8G8R8, R8G8B8A8),
-        BFMT_SRGB(R8G8B8X8, R8G8B8A8),
-        BFMT_SRGB(R8G8B8A8, R8G8B8A8),
+      BFMT_SRGB(B8G8R8A8, R8G8B8A8),
+      BFMT_SRGB(B8G8R8X8, R8G8B8A8),
+      BFMT_SRGB(A8R8G8B8, R8G8B8A8),
+      BFMT_SRGB(X8R8G8B8, R8G8B8A8),
+      BFMT_SRGB(A8B8G8R8, R8G8B8A8),
+      BFMT_SRGB(X8B8G8R8, R8G8B8A8),
+      BFMT_SRGB(R8G8B8X8, R8G8B8A8),
+      BFMT_SRGB(R8G8B8A8, R8G8B8A8),
 
-        BFMT2(A8_UNORM, R8G8B8A8, R8, 0),
-        BFMT2(I8_UNORM, R8G8B8A8, R8, 0),
-        BFMT2(R5G6B5_UNORM, R5G6B5A0, R5G6B5, 0),
-        BFMT2(B5G6R5_UNORM, R5G6B5A0, R5G6B5, 0),
+      BFMT2(A8_UNORM, R8G8B8A8, R8, 0),
+      BFMT2(I8_UNORM, R8G8B8A8, R8, 0),
+      BFMT2(R5G6B5_UNORM, R5G6B5A0, R5G6B5, 0),
+      BFMT2(B5G6R5_UNORM, R5G6B5A0, R5G6B5, 0),
 
-        BFMT(A4B4G4R4_UNORM, R4G4B4A4),
-        BFMT(B4G4R4A4_UNORM, R4G4B4A4),
-        BFMT(R4G4B4A4_UNORM, R4G4B4A4),
+      BFMT(A4B4G4R4_UNORM, R4G4B4A4),
+      BFMT(B4G4R4A4_UNORM, R4G4B4A4),
+      BFMT(R4G4B4A4_UNORM, R4G4B4A4),
 
-        BFMT(R10G10B10A2_UNORM, R10G10B10A2),
-        BFMT(B10G10R10A2_UNORM, R10G10B10A2),
-        BFMT(R10G10B10X2_UNORM, R10G10B10A2),
-        BFMT(B10G10R10X2_UNORM, R10G10B10A2),
+      BFMT(R10G10B10A2_UNORM, R10G10B10A2),
+      BFMT(B10G10R10A2_UNORM, R10G10B10A2),
+      BFMT(R10G10B10X2_UNORM, R10G10B10A2),
+      BFMT(B10G10R10X2_UNORM, R10G10B10A2),
 
-        BFMT(B5G5R5A1_UNORM, R5G5B5A1),
-        BFMT(R5G5B5A1_UNORM, R5G5B5A1),
-        BFMT(B5G5R5X1_UNORM, R5G5B5A1),
+      BFMT(B5G5R5A1_UNORM, R5G5B5A1),
+      BFMT(R5G5B5A1_UNORM, R5G5B5A1),
+      BFMT(B5G5R5X1_UNORM, R5G5B5A1),
 };
 
 /* Convenience */
@@ -145,13 +149,11 @@ GENX(panfrost_blendable_formats)[PIPE_FORMAT_COUNT] = {
 #define V6_RRRR PAN_V6_SWIZZLE(R, R, R, R)
 #define V6_GGGG PAN_V6_SWIZZLE(G, G, G, G)
 
-#define FMT(pipe, mali, swizzle, srgb, flags) \
-        [PIPE_FORMAT_ ## pipe] = { \
-            .hw = ( V6_ ## swizzle ) | \
-                (( MALI_ ## mali ) << 12) | \
-                ((( SRGB_ ## srgb)) << 20), \
-            .bind = FLAGS_ ## flags, \
-        }
+#define FMT(pipe, mali, swizzle, srgb, flags)                                  \
+   [PIPE_FORMAT_##pipe] = {                                                    \
+      .hw = (V6_##swizzle) | ((MALI_##mali) << 12) | (((SRGB_##srgb)) << 20),  \
+      .bind = FLAGS_##flags,                                                   \
+   }
 #else
 
 #define MALI_RGB_COMPONENT_ORDER_R001 MALI_RGB_COMPONENT_ORDER_RGB1
@@ -160,13 +162,12 @@ GENX(panfrost_blendable_formats)[PIPE_FORMAT_COUNT] = {
 #define MALI_RGB_COMPONENT_ORDER_GBA1 MALI_RGB_COMPONENT_ORDER_1RGB
 #define MALI_RGB_COMPONENT_ORDER_ABG1 MALI_RGB_COMPONENT_ORDER_1BGR
 
-#define FMT(pipe, mali, swizzle, srgb, flags) \
-        [PIPE_FORMAT_ ## pipe] = { \
-            .hw = ( MALI_RGB_COMPONENT_ORDER_ ## swizzle ) | \
-                (( MALI_ ## mali ) << 12) | \
-                ((( SRGB_ ## srgb)) << 20), \
-            .bind = FLAGS_ ## flags, \
-        }
+#define FMT(pipe, mali, swizzle, srgb, flags)                                  \
+   [PIPE_FORMAT_##pipe] = {                                                    \
+      .hw = (MALI_RGB_COMPONENT_ORDER_##swizzle) | ((MALI_##mali) << 12) |     \
+            (((SRGB_##srgb)) << 20),                                           \
+      .bind = FLAGS_##flags,                                                   \
+   }
 #endif
 
 /* clang-format off */
@@ -613,36 +614,40 @@ const struct panfrost_format GENX(panfrost_pipe_format)[PIPE_FORMAT_COUNT] = {
 struct pan_decomposed_swizzle
 GENX(pan_decompose_swizzle)(enum mali_rgb_component_order order)
 {
-#define CASE(case_, pre_, R_, G_, B_, A_) \
-        case MALI_RGB_COMPONENT_ORDER_##case_: \
-                return (struct pan_decomposed_swizzle) { \
-                        MALI_RGB_COMPONENT_ORDER_##pre_, { \
-                                PIPE_SWIZZLE_##R_, PIPE_SWIZZLE_##G_, \
-                                PIPE_SWIZZLE_##B_, PIPE_SWIZZLE_##A_, \
-                        }, \
-                };
+#define CASE(case_, pre_, R_, G_, B_, A_)                                      \
+   case MALI_RGB_COMPONENT_ORDER_##case_:                                      \
+      return (struct pan_decomposed_swizzle){                                  \
+         MALI_RGB_COMPONENT_ORDER_##pre_,                                      \
+         {                                                                     \
+            PIPE_SWIZZLE_##R_,                                                 \
+            PIPE_SWIZZLE_##G_,                                                 \
+            PIPE_SWIZZLE_##B_,                                                 \
+            PIPE_SWIZZLE_##A_,                                                 \
+         },                                                                    \
+      };
 
-        switch (order) {
-        CASE(RGBA, RGBA, X, Y, Z, W);
-        CASE(GRBA, RGBA, Y, X, Z, W);
-        CASE(BGRA, RGBA, Z, Y, X, W);
-        CASE(ARGB, RGBA, Y, Z, W, X);
-        CASE(AGRB, RGBA, Z, Y, W, X);
-        CASE(ABGR, RGBA, W, Z, Y, X);
-        CASE(RGB1, RGB1, X, Y, Z, W);
-        CASE(GRB1, RGB1, Y, X, Z, W);
-        CASE(BGR1, RGB1, Z, Y, X, W);
-        CASE(1RGB, RGB1, Y, Z, W, X);
-        CASE(1GRB, RGB1, Z, Y, W, X);
-        CASE(1BGR, RGB1, W, Z, Y, X);
-        CASE(RRRR, RRRR, X, Y, Z, W);
-        CASE(RRR1, RRR1, X, Y, Z, W);
-        CASE(RRRA, RRRA, X, Y, Z, W);
-        CASE(000A, 000A, X, Y, Z, W);
-        CASE(0001, 0001, X, Y, Z, W);
-        CASE(0000, 0000, X, Y, Z, W);
-        default: unreachable("Invalid case for texturing");
-        }
+   switch (order) {
+      CASE(RGBA, RGBA, X, Y, Z, W);
+      CASE(GRBA, RGBA, Y, X, Z, W);
+      CASE(BGRA, RGBA, Z, Y, X, W);
+      CASE(ARGB, RGBA, Y, Z, W, X);
+      CASE(AGRB, RGBA, Z, Y, W, X);
+      CASE(ABGR, RGBA, W, Z, Y, X);
+      CASE(RGB1, RGB1, X, Y, Z, W);
+      CASE(GRB1, RGB1, Y, X, Z, W);
+      CASE(BGR1, RGB1, Z, Y, X, W);
+      CASE(1RGB, RGB1, Y, Z, W, X);
+      CASE(1GRB, RGB1, Z, Y, W, X);
+      CASE(1BGR, RGB1, W, Z, Y, X);
+      CASE(RRRR, RRRR, X, Y, Z, W);
+      CASE(RRR1, RRR1, X, Y, Z, W);
+      CASE(RRRA, RRRA, X, Y, Z, W);
+      CASE(000A, 000A, X, Y, Z, W);
+      CASE(0001, 0001, X, Y, Z, W);
+      CASE(0000, 0000, X, Y, Z, W);
+   default:
+      unreachable("Invalid case for texturing");
+   }
 
 #undef CASE
 }
diff --git a/src/panfrost/lib/pan_format.h b/src/panfrost/lib/pan_format.h
index a723a31b4d3..babf6d637c4 100644
--- a/src/panfrost/lib/pan_format.h
+++ b/src/panfrost/lib/pan_format.h
@@ -37,61 +37,62 @@
 typedef uint32_t mali_pixel_format;
 
 struct panfrost_format {
-        mali_pixel_format hw;
-        unsigned bind;
+   mali_pixel_format hw;
+   unsigned bind;
 };
 
 struct pan_blendable_format {
-        /* enum mali_color_buffer_internal_format */ uint16_t internal;
-        /* enum mali_mfbd_color_format */ uint16_t writeback;
+   /* enum mali_color_buffer_internal_format */ uint16_t internal;
+   /* enum mali_mfbd_color_format */ uint16_t writeback;
 
-        /* Indexed by the dithered? flag. So _PU first, then _AU */
-        mali_pixel_format bifrost[2];
+   /* Indexed by the dithered? flag. So _PU first, then _AU */
+   mali_pixel_format bifrost[2];
 };
 
-extern const struct pan_blendable_format panfrost_blendable_formats_v6[PIPE_FORMAT_COUNT];
-extern const struct pan_blendable_format panfrost_blendable_formats_v7[PIPE_FORMAT_COUNT];
-extern const struct pan_blendable_format panfrost_blendable_formats_v9[PIPE_FORMAT_COUNT];
+extern const struct pan_blendable_format
+   panfrost_blendable_formats_v6[PIPE_FORMAT_COUNT];
+extern const struct pan_blendable_format
+   panfrost_blendable_formats_v7[PIPE_FORMAT_COUNT];
+extern const struct pan_blendable_format
+   panfrost_blendable_formats_v9[PIPE_FORMAT_COUNT];
 extern const struct panfrost_format panfrost_pipe_format_v6[PIPE_FORMAT_COUNT];
 extern const struct panfrost_format panfrost_pipe_format_v7[PIPE_FORMAT_COUNT];
 extern const struct panfrost_format panfrost_pipe_format_v9[PIPE_FORMAT_COUNT];
 
 /* Helpers to construct swizzles */
 
-#define PAN_V6_SWIZZLE(R, G, B, A) ( \
-        ((MALI_CHANNEL_ ## R) << 0) | \
-        ((MALI_CHANNEL_ ## G) << 3) | \
-        ((MALI_CHANNEL_ ## B) << 6) | \
-        ((MALI_CHANNEL_ ## A) << 9))
+#define PAN_V6_SWIZZLE(R, G, B, A)                                             \
+   (((MALI_CHANNEL_##R) << 0) | ((MALI_CHANNEL_##G) << 3) |                    \
+    ((MALI_CHANNEL_##B) << 6) | ((MALI_CHANNEL_##A) << 9))
 
 static inline unsigned
 panfrost_get_default_swizzle(unsigned components)
 {
-        switch (components) {
-        case 1:
-                return PAN_V6_SWIZZLE(R, 0, 0, 1);
-        case 2:
-                return PAN_V6_SWIZZLE(R, G, 0, 1);
-        case 3:
-                return PAN_V6_SWIZZLE(R, G, B, 1);
-        case 4:
-                return PAN_V6_SWIZZLE(R, G, B, A);
-        default:
-                unreachable("Invalid number of components");
-        }
+   switch (components) {
+   case 1:
+      return PAN_V6_SWIZZLE(R, 0, 0, 1);
+   case 2:
+      return PAN_V6_SWIZZLE(R, G, 0, 1);
+   case 3:
+      return PAN_V6_SWIZZLE(R, G, B, 1);
+   case 4:
+      return PAN_V6_SWIZZLE(R, G, B, A);
+   default:
+      unreachable("Invalid number of components");
+   }
 }
 
 #if PAN_ARCH == 7
 struct pan_decomposed_swizzle {
-        /* Component ordering to apply first */
-        enum mali_rgb_component_order pre;
+   /* Component ordering to apply first */
+   enum mali_rgb_component_order pre;
 
-        /* Bijective swizzle applied after */
-        unsigned char post[4];
+   /* Bijective swizzle applied after */
+   unsigned char post[4];
 };
 
 struct pan_decomposed_swizzle
-GENX(pan_decompose_swizzle)(enum mali_rgb_component_order order);
+   GENX(pan_decompose_swizzle)(enum mali_rgb_component_order order);
 #endif
 
 #endif
diff --git a/src/panfrost/lib/pan_indirect_dispatch.c b/src/panfrost/lib/pan_indirect_dispatch.c
index 8a6ad81167d..8f7e75e50ba 100644
--- a/src/panfrost/lib/pan_indirect_dispatch.c
+++ b/src/panfrost/lib/pan_indirect_dispatch.c
@@ -22,144 +22,146 @@
  *
  */
 
-#include <stdio.h>
-#include "pan_bo.h"
-#include "pan_shader.h"
-#include "pan_scoreboard.h"
-#include "pan_encoder.h"
 #include "pan_indirect_dispatch.h"
-#include "pan_pool.h"
-#include "pan_util.h"
+#include <stdio.h>
 #include "compiler/nir/nir_builder.h"
-#include "util/u_memory.h"
 #include "util/macros.h"
+#include "util/u_memory.h"
+#include "pan_bo.h"
+#include "pan_encoder.h"
+#include "pan_pool.h"
+#include "pan_scoreboard.h"
+#include "pan_shader.h"
+#include "pan_util.h"
 
-#define get_input_field(b, name) \
-        nir_load_push_constant(b, \
-               1, sizeof(((struct pan_indirect_dispatch_info *)0)->name) * 8, \
-               nir_imm_int(b, 0), \
-               .base = offsetof(struct pan_indirect_dispatch_info, name))
+#define get_input_field(b, name)                                               \
+   nir_load_push_constant(                                                     \
+      b, 1, sizeof(((struct pan_indirect_dispatch_info *)0)->name) * 8,        \
+      nir_imm_int(b, 0),                                                       \
+      .base = offsetof(struct pan_indirect_dispatch_info, name))
 
 static mali_ptr
 get_rsd(const struct panfrost_device *dev)
 {
-        return dev->indirect_dispatch.descs->ptr.gpu;
+   return dev->indirect_dispatch.descs->ptr.gpu;
 }
 
 static mali_ptr
 get_tls(const struct panfrost_device *dev)
 {
-        return dev->indirect_dispatch.descs->ptr.gpu +
-               pan_size(RENDERER_STATE);
+   return dev->indirect_dispatch.descs->ptr.gpu + pan_size(RENDERER_STATE);
 }
 
 static void
 pan_indirect_dispatch_init(struct panfrost_device *dev)
 {
-        nir_builder b =
-                nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
-                                               GENX(pan_shader_get_compiler_options)(),
-                                               "%s", "indirect_dispatch");
-        nir_ssa_def *zero = nir_imm_int(&b, 0);
-        nir_ssa_def *one = nir_imm_int(&b, 1);
-        nir_ssa_def *num_wg = nir_load_global(&b, get_input_field(&b, indirect_dim), 4, 3, 32);
-        nir_ssa_def *num_wg_x = nir_channel(&b, num_wg, 0);
-        nir_ssa_def *num_wg_y = nir_channel(&b, num_wg, 1);
-        nir_ssa_def *num_wg_z = nir_channel(&b, num_wg, 2);
+   nir_builder b = nir_builder_init_simple_shader(
+      MESA_SHADER_COMPUTE, GENX(pan_shader_get_compiler_options)(), "%s",
+      "indirect_dispatch");
+   nir_ssa_def *zero = nir_imm_int(&b, 0);
+   nir_ssa_def *one = nir_imm_int(&b, 1);
+   nir_ssa_def *num_wg =
+      nir_load_global(&b, get_input_field(&b, indirect_dim), 4, 3, 32);
+   nir_ssa_def *num_wg_x = nir_channel(&b, num_wg, 0);
+   nir_ssa_def *num_wg_y = nir_channel(&b, num_wg, 1);
+   nir_ssa_def *num_wg_z = nir_channel(&b, num_wg, 2);
 
-        nir_ssa_def *job_hdr_ptr = get_input_field(&b, job);
-        nir_ssa_def *num_wg_flat = nir_imul(&b, num_wg_x, nir_imul(&b, num_wg_y, num_wg_z));
+   nir_ssa_def *job_hdr_ptr = get_input_field(&b, job);
+   nir_ssa_def *num_wg_flat =
+      nir_imul(&b, num_wg_x, nir_imul(&b, num_wg_y, num_wg_z));
 
-        nir_push_if(&b, nir_ieq(&b, num_wg_flat, zero));
-        {
-                nir_ssa_def *type_ptr = nir_iadd(&b, job_hdr_ptr, nir_imm_int64(&b, 4 * 4));
-                nir_ssa_def *ntype = nir_imm_intN_t(&b, (MALI_JOB_TYPE_NULL << 1) | 1, 8);
-                nir_store_global(&b, type_ptr, 1, ntype, 1);
-        }
-        nir_push_else(&b, NULL);
-        {
-                nir_ssa_def *job_dim_ptr = nir_iadd(&b, job_hdr_ptr,
-                                nir_imm_int64(&b, pan_section_offset(COMPUTE_JOB, INVOCATION)));
-                nir_ssa_def *num_wg_x_m1 = nir_isub(&b, num_wg_x, one);
-                nir_ssa_def *num_wg_y_m1 = nir_isub(&b, num_wg_y, one);
-                nir_ssa_def *num_wg_z_m1 = nir_isub(&b, num_wg_z, one);
-                nir_ssa_def *job_dim = nir_load_global(&b, job_dim_ptr, 8, 2, 32);
-                nir_ssa_def *dims = nir_channel(&b, job_dim, 0);
-                nir_ssa_def *split = nir_channel(&b, job_dim, 1);
-                nir_ssa_def *num_wg_x_split = nir_iand_imm(&b, nir_ushr_imm(&b, split, 10), 0x3f);
-                nir_ssa_def *num_wg_y_split = nir_iadd(&b, num_wg_x_split,
-                                nir_isub_imm(&b, 32, nir_uclz(&b, num_wg_x_m1)));
-                nir_ssa_def *num_wg_z_split = nir_iadd(&b, num_wg_y_split,
-                                nir_isub_imm(&b, 32, nir_uclz(&b, num_wg_y_m1)));
-                split = nir_ior(&b, split,
-                                nir_ior(&b,
-                                        nir_ishl(&b, num_wg_y_split, nir_imm_int(&b, 16)),
-                                        nir_ishl(&b, num_wg_z_split, nir_imm_int(&b, 22))));
-                dims = nir_ior(&b, dims,
-                               nir_ior(&b, nir_ishl(&b, num_wg_x_m1, num_wg_x_split),
-                                       nir_ior(&b, nir_ishl(&b, num_wg_y_m1, num_wg_y_split),
-                                               nir_ishl(&b, num_wg_z_m1, num_wg_z_split))));
+   nir_push_if(&b, nir_ieq(&b, num_wg_flat, zero));
+   {
+      nir_ssa_def *type_ptr =
+         nir_iadd(&b, job_hdr_ptr, nir_imm_int64(&b, 4 * 4));
+      nir_ssa_def *ntype = nir_imm_intN_t(&b, (MALI_JOB_TYPE_NULL << 1) | 1, 8);
+      nir_store_global(&b, type_ptr, 1, ntype, 1);
+   }
+   nir_push_else(&b, NULL);
+   {
+      nir_ssa_def *job_dim_ptr = nir_iadd(
+         &b, job_hdr_ptr,
+         nir_imm_int64(&b, pan_section_offset(COMPUTE_JOB, INVOCATION)));
+      nir_ssa_def *num_wg_x_m1 = nir_isub(&b, num_wg_x, one);
+      nir_ssa_def *num_wg_y_m1 = nir_isub(&b, num_wg_y, one);
+      nir_ssa_def *num_wg_z_m1 = nir_isub(&b, num_wg_z, one);
+      nir_ssa_def *job_dim = nir_load_global(&b, job_dim_ptr, 8, 2, 32);
+      nir_ssa_def *dims = nir_channel(&b, job_dim, 0);
+      nir_ssa_def *split = nir_channel(&b, job_dim, 1);
+      nir_ssa_def *num_wg_x_split =
+         nir_iand_imm(&b, nir_ushr_imm(&b, split, 10), 0x3f);
+      nir_ssa_def *num_wg_y_split = nir_iadd(
+         &b, num_wg_x_split, nir_isub_imm(&b, 32, nir_uclz(&b, num_wg_x_m1)));
+      nir_ssa_def *num_wg_z_split = nir_iadd(
+         &b, num_wg_y_split, nir_isub_imm(&b, 32, nir_uclz(&b, num_wg_y_m1)));
+      split =
+         nir_ior(&b, split,
+                 nir_ior(&b, nir_ishl(&b, num_wg_y_split, nir_imm_int(&b, 16)),
+                         nir_ishl(&b, num_wg_z_split, nir_imm_int(&b, 22))));
+      dims =
+         nir_ior(&b, dims,
+                 nir_ior(&b, nir_ishl(&b, num_wg_x_m1, num_wg_x_split),
+                         nir_ior(&b, nir_ishl(&b, num_wg_y_m1, num_wg_y_split),
+                                 nir_ishl(&b, num_wg_z_m1, num_wg_z_split))));
 
-                nir_store_global(&b, job_dim_ptr, 8, nir_vec2(&b, dims, split), 3);
+      nir_store_global(&b, job_dim_ptr, 8, nir_vec2(&b, dims, split), 3);
 
-                nir_ssa_def *num_wg_x_ptr = get_input_field(&b, num_wg_sysval[0]);
+      nir_ssa_def *num_wg_x_ptr = get_input_field(&b, num_wg_sysval[0]);
 
-                nir_push_if(&b, nir_ine(&b, num_wg_x_ptr, nir_imm_int64(&b, 0)));
-                {
-                        nir_store_global(&b, num_wg_x_ptr, 8, num_wg_x, 1);
-                        nir_store_global(&b, get_input_field(&b, num_wg_sysval[1]), 8, num_wg_y, 1);
-                        nir_store_global(&b, get_input_field(&b, num_wg_sysval[2]), 8, num_wg_z, 1);
-                }
-                nir_pop_if(&b, NULL);
-        }
+      nir_push_if(&b, nir_ine(&b, num_wg_x_ptr, nir_imm_int64(&b, 0)));
+      {
+         nir_store_global(&b, num_wg_x_ptr, 8, num_wg_x, 1);
+         nir_store_global(&b, get_input_field(&b, num_wg_sysval[1]), 8,
+                          num_wg_y, 1);
+         nir_store_global(&b, get_input_field(&b, num_wg_sysval[2]), 8,
+                          num_wg_z, 1);
+      }
+      nir_pop_if(&b, NULL);
+   }
 
-        nir_pop_if(&b, NULL);
+   nir_pop_if(&b, NULL);
 
-        struct panfrost_compile_inputs inputs = {
-                .gpu_id = dev->gpu_id,
-                .fixed_sysval_ubo = -1,
-                .no_ubo_to_push = true,
-        };
-        struct pan_shader_info shader_info;
-        struct util_dynarray binary;
+   struct panfrost_compile_inputs inputs = {
+      .gpu_id = dev->gpu_id,
+      .fixed_sysval_ubo = -1,
+      .no_ubo_to_push = true,
+   };
+   struct pan_shader_info shader_info;
+   struct util_dynarray binary;
 
-        util_dynarray_init(&binary, NULL);
-        GENX(pan_shader_compile)(b.shader, &inputs, &binary, &shader_info);
+   util_dynarray_init(&binary, NULL);
+   GENX(pan_shader_compile)(b.shader, &inputs, &binary, &shader_info);
 
-        ralloc_free(b.shader);
+   ralloc_free(b.shader);
 
-        assert(!shader_info.tls_size);
-        assert(!shader_info.wls_size);
-        assert(!shader_info.sysvals.sysval_count);
+   assert(!shader_info.tls_size);
+   assert(!shader_info.wls_size);
+   assert(!shader_info.sysvals.sysval_count);
 
-        shader_info.push.count =
-                DIV_ROUND_UP(sizeof(struct pan_indirect_dispatch_info), 4);
+   shader_info.push.count =
+      DIV_ROUND_UP(sizeof(struct pan_indirect_dispatch_info), 4);
 
-        dev->indirect_dispatch.bin =
-                panfrost_bo_create(dev, binary.size, PAN_BO_EXECUTE,
-                                "Indirect dispatch shader");
+   dev->indirect_dispatch.bin = panfrost_bo_create(
+      dev, binary.size, PAN_BO_EXECUTE, "Indirect dispatch shader");
 
-        memcpy(dev->indirect_dispatch.bin->ptr.cpu, binary.data, binary.size);
-        util_dynarray_fini(&binary);
+   memcpy(dev->indirect_dispatch.bin->ptr.cpu, binary.data, binary.size);
+   util_dynarray_fini(&binary);
 
-        dev->indirect_dispatch.descs =
-                panfrost_bo_create(dev,
-                                   pan_size(RENDERER_STATE) +
-                                   pan_size(LOCAL_STORAGE),
-                                   0, "Indirect dispatch descriptors");
+   dev->indirect_dispatch.descs = panfrost_bo_create(
+      dev, pan_size(RENDERER_STATE) + pan_size(LOCAL_STORAGE), 0,
+      "Indirect dispatch descriptors");
 
-        mali_ptr address = dev->indirect_dispatch.bin->ptr.gpu;
+   mali_ptr address = dev->indirect_dispatch.bin->ptr.gpu;
 
-        void *rsd = dev->indirect_dispatch.descs->ptr.cpu;
-        pan_pack(rsd, RENDERER_STATE, cfg) {
-                pan_shader_prepare_rsd(&shader_info, address, &cfg);
-        }
+   void *rsd = dev->indirect_dispatch.descs->ptr.cpu;
+   pan_pack(rsd, RENDERER_STATE, cfg) {
+      pan_shader_prepare_rsd(&shader_info, address, &cfg);
+   }
 
-        void *tsd = dev->indirect_dispatch.descs->ptr.cpu +
-                    pan_size(RENDERER_STATE);
-        pan_pack(tsd, LOCAL_STORAGE, ls) {
-                ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
-        };
+   void *tsd = dev->indirect_dispatch.descs->ptr.cpu + pan_size(RENDERER_STATE);
+   pan_pack(tsd, LOCAL_STORAGE, ls) {
+      ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
+   };
 }
 
 unsigned
@@ -167,38 +169,35 @@ GENX(pan_indirect_dispatch_emit)(struct pan_pool *pool,
                                  struct pan_scoreboard *scoreboard,
                                  const struct pan_indirect_dispatch_info *inputs)
 {
-        struct panfrost_device *dev = pool->dev;
-        struct panfrost_ptr job =
-                pan_pool_alloc_desc(pool, COMPUTE_JOB);
-        void *invocation =
-                pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION);
+   struct panfrost_device *dev = pool->dev;
+   struct panfrost_ptr job = pan_pool_alloc_desc(pool, COMPUTE_JOB);
+   void *invocation = pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION);
 
-        /* If we haven't compiled the indirect dispatch shader yet, do it now */
-        if (!dev->indirect_dispatch.bin)
-                pan_indirect_dispatch_init(dev);
+   /* If we haven't compiled the indirect dispatch shader yet, do it now */
+   if (!dev->indirect_dispatch.bin)
+      pan_indirect_dispatch_init(dev);
 
-        panfrost_pack_work_groups_compute(invocation,
-                                          1, 1, 1, 1, 1, 1,
-                                          false, false);
+   panfrost_pack_work_groups_compute(invocation, 1, 1, 1, 1, 1, 1, false,
+                                     false);
 
-        pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
-                cfg.job_task_split = 2;
-        }
+   pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
+      cfg.job_task_split = 2;
+   }
 
-        pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) {
-                cfg.state = get_rsd(dev);
-                cfg.thread_storage = get_tls(pool->dev);
-                cfg.push_uniforms =
-                        pan_pool_upload_aligned(pool, inputs, sizeof(*inputs), 16);
-        }
+   pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) {
+      cfg.state = get_rsd(dev);
+      cfg.thread_storage = get_tls(pool->dev);
+      cfg.push_uniforms =
+         pan_pool_upload_aligned(pool, inputs, sizeof(*inputs), 16);
+   }
 
-        return panfrost_add_job(pool, scoreboard, MALI_JOB_TYPE_COMPUTE,
-                                false, true, 0, 0, &job, false);
+   return panfrost_add_job(pool, scoreboard, MALI_JOB_TYPE_COMPUTE, false, true,
+                           0, 0, &job, false);
 }
 
 void
 GENX(pan_indirect_dispatch_cleanup)(struct panfrost_device *dev)
 {
-        panfrost_bo_unreference(dev->indirect_dispatch.bin);
-        panfrost_bo_unreference(dev->indirect_dispatch.descs);
+   panfrost_bo_unreference(dev->indirect_dispatch.bin);
+   panfrost_bo_unreference(dev->indirect_dispatch.descs);
 }
diff --git a/src/panfrost/lib/pan_indirect_dispatch.h b/src/panfrost/lib/pan_indirect_dispatch.h
index f39e5f9fce4..0dd86f04988 100644
--- a/src/panfrost/lib/pan_indirect_dispatch.h
+++ b/src/panfrost/lib/pan_indirect_dispatch.h
@@ -24,25 +24,23 @@
 #ifndef __PAN_INDIRECT_DISPATCH_SHADERS_H__
 #define __PAN_INDIRECT_DISPATCH_SHADERS_H__
 
-#include "pan_scoreboard.h"
 #include "genxml/gen_macros.h"
+#include "pan_scoreboard.h"
 
 struct pan_device;
 struct pan_scoreboard;
 struct pan_pool;
 
 struct pan_indirect_dispatch_info {
-        mali_ptr job;
-        mali_ptr indirect_dim;
-        mali_ptr num_wg_sysval[3];
+   mali_ptr job;
+   mali_ptr indirect_dim;
+   mali_ptr num_wg_sysval[3];
 } PACKED;
 
-unsigned
-GENX(pan_indirect_dispatch_emit)(struct pan_pool *pool,
-                                 struct pan_scoreboard *scoreboard,
-                                 const struct pan_indirect_dispatch_info *dispatch_info);
+unsigned GENX(pan_indirect_dispatch_emit)(
+   struct pan_pool *pool, struct pan_scoreboard *scoreboard,
+   const struct pan_indirect_dispatch_info *dispatch_info);
 
-void
-GENX(pan_indirect_dispatch_cleanup)(struct panfrost_device *dev);
+void GENX(pan_indirect_dispatch_cleanup)(struct panfrost_device *dev);
 
 #endif
diff --git a/src/panfrost/lib/pan_layout.c b/src/panfrost/lib/pan_layout.c
index bcb5af97f4f..981779c9bc3 100644
--- a/src/panfrost/lib/pan_layout.c
+++ b/src/panfrost/lib/pan_layout.c
@@ -35,33 +35,27 @@
 /* clang-format on */
 uint64_t pan_best_modifiers[PAN_MODIFIER_COUNT] = {
    DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
-                           AFBC_FORMAT_MOD_TILED |
-                           AFBC_FORMAT_MOD_SC |
-                           AFBC_FORMAT_MOD_SPARSE |
-                           AFBC_FORMAT_MOD_YTR),
+                           AFBC_FORMAT_MOD_TILED | AFBC_FORMAT_MOD_SC |
+                           AFBC_FORMAT_MOD_SPARSE | AFBC_FORMAT_MOD_YTR),
 
    DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
-                           AFBC_FORMAT_MOD_TILED |
-                           AFBC_FORMAT_MOD_SC |
+                           AFBC_FORMAT_MOD_TILED | AFBC_FORMAT_MOD_SC |
                            AFBC_FORMAT_MOD_SPARSE),
 
    DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
-                           AFBC_FORMAT_MOD_SPARSE |
-                           AFBC_FORMAT_MOD_YTR),
+                           AFBC_FORMAT_MOD_SPARSE | AFBC_FORMAT_MOD_YTR),
 
    DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
                            AFBC_FORMAT_MOD_SPARSE),
 
    DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED,
-   DRM_FORMAT_MOD_LINEAR
-};
+   DRM_FORMAT_MOD_LINEAR};
 
 /* Table of AFBC superblock sizes */
-static const struct pan_block_size
-afbc_superblock_sizes[] = {
-   [AFBC_FORMAT_MOD_BLOCK_SIZE_16x16] = { 16, 16 },
-   [AFBC_FORMAT_MOD_BLOCK_SIZE_32x8]  = { 32,  8 },
-   [AFBC_FORMAT_MOD_BLOCK_SIZE_64x4]  = { 64,  4 },
+static const struct pan_block_size afbc_superblock_sizes[] = {
+   [AFBC_FORMAT_MOD_BLOCK_SIZE_16x16] = {16, 16},
+   [AFBC_FORMAT_MOD_BLOCK_SIZE_32x8] = {32, 8},
+   [AFBC_FORMAT_MOD_BLOCK_SIZE_64x4] = {64, 4},
 };
 /* clang-format off */
 
diff --git a/src/panfrost/lib/pan_pool.h b/src/panfrost/lib/pan_pool.h
index 01c8348c41d..c499994399a 100644
--- a/src/panfrost/lib/pan_pool.h
+++ b/src/panfrost/lib/pan_pool.h
@@ -34,100 +34,99 @@
 /* Represents grow-only memory. */
 
 struct pan_pool {
-        /* Parent device for allocation */
-        struct panfrost_device *dev;
+   /* Parent device for allocation */
+   struct panfrost_device *dev;
 
-        /* Label for created BOs */
-        const char *label;
+   /* Label for created BOs */
+   const char *label;
 
-        /* BO flags to use in the pool */
-        unsigned create_flags;
+   /* BO flags to use in the pool */
+   unsigned create_flags;
 
-        /* Minimum size for allocated BOs. */
-        size_t slab_size;
+   /* Minimum size for allocated BOs. */
+   size_t slab_size;
 };
 
 static inline void
 pan_pool_init(struct pan_pool *pool, struct panfrost_device *dev,
               unsigned create_flags, size_t slab_size, const char *label)
 {
-        pool->dev = dev;
-        pool->create_flags = create_flags;
-        pool->slab_size = slab_size;
-        pool->label = label;
+   pool->dev = dev;
+   pool->create_flags = create_flags;
+   pool->slab_size = slab_size;
+   pool->label = label;
 }
 
 /* Represents a fat pointer for GPU-mapped memory, returned from the transient
  * allocator and not used for much else */
 
-struct panfrost_ptr
-pan_pool_alloc_aligned(struct pan_pool *pool, size_t sz, unsigned alignment);
+struct panfrost_ptr pan_pool_alloc_aligned(struct pan_pool *pool, size_t sz,
+                                           unsigned alignment);
 
-#define PAN_POOL_ALLOCATOR(pool_subclass, alloc_func) \
-struct panfrost_ptr \
-pan_pool_alloc_aligned(struct pan_pool *p, size_t sz, unsigned alignment) \
-{ \
-        pool_subclass *pool = container_of(p, pool_subclass, base); \
-        return alloc_func(pool, sz, alignment); \
-}
+#define PAN_POOL_ALLOCATOR(pool_subclass, alloc_func)                          \
+   struct panfrost_ptr pan_pool_alloc_aligned(struct pan_pool *p, size_t sz,   \
+                                              unsigned alignment)              \
+   {                                                                           \
+      pool_subclass *pool = container_of(p, pool_subclass, base);              \
+      return alloc_func(pool, sz, alignment);                                  \
+   }
 
 static inline mali_ptr
-pan_pool_upload_aligned(struct pan_pool *pool, const void *data, size_t sz, unsigned alignment)
+pan_pool_upload_aligned(struct pan_pool *pool, const void *data, size_t sz,
+                        unsigned alignment)
 {
-        struct panfrost_ptr transfer = pan_pool_alloc_aligned(pool, sz, alignment);
-        memcpy(transfer.cpu, data, sz);
-        return transfer.gpu;
+   struct panfrost_ptr transfer = pan_pool_alloc_aligned(pool, sz, alignment);
+   memcpy(transfer.cpu, data, sz);
+   return transfer.gpu;
 }
 
 static inline mali_ptr
 pan_pool_upload(struct pan_pool *pool, const void *data, size_t sz)
 {
-        return pan_pool_upload_aligned(pool, data, sz, sz);
+   return pan_pool_upload_aligned(pool, data, sz, sz);
 }
 
 struct pan_desc_alloc_info {
-        unsigned size;
-        unsigned align;
-        unsigned nelems;
+   unsigned size;
+   unsigned align;
+   unsigned nelems;
 };
 
-#define PAN_DESC_ARRAY(count, name) \
-        { \
-                .size = pan_size(name), \
-                .align = pan_alignment(name), \
-                .nelems = count, \
-        }
+#define PAN_DESC_ARRAY(count, name)                                            \
+   {                                                                           \
+      .size = pan_size(name), .align = pan_alignment(name), .nelems = count,   \
+   }
 
 #define PAN_DESC(name) PAN_DESC_ARRAY(1, name)
 
-#define PAN_DESC_AGGREGATE(...) \
-        (struct pan_desc_alloc_info[]) { \
-                __VA_ARGS__, \
-                { 0 }, \
-        }
+#define PAN_DESC_AGGREGATE(...)                                                \
+   (struct pan_desc_alloc_info[])                                              \
+   {                                                                           \
+      __VA_ARGS__, {0},                                                        \
+   }
 
 static inline struct panfrost_ptr
 pan_pool_alloc_descs(struct pan_pool *pool,
                      const struct pan_desc_alloc_info *descs)
 {
-        unsigned size = 0;
-        unsigned align = descs[0].align;
+   unsigned size = 0;
+   unsigned align = descs[0].align;
 
-        for (unsigned i = 0; descs[i].size; i++) {
-                assert(!(size & (descs[i].align - 1)));
-                size += descs[i].size * descs[i].nelems;
-        }
+   for (unsigned i = 0; descs[i].size; i++) {
+      assert(!(size & (descs[i].align - 1)));
+      size += descs[i].size * descs[i].nelems;
+   }
 
-        return pan_pool_alloc_aligned(pool, size, align);
+   return pan_pool_alloc_aligned(pool, size, align);
 }
 
-#define pan_pool_alloc_desc(pool, name) \
-        pan_pool_alloc_descs(pool, PAN_DESC_AGGREGATE(PAN_DESC(name)))
+#define pan_pool_alloc_desc(pool, name)                                        \
+   pan_pool_alloc_descs(pool, PAN_DESC_AGGREGATE(PAN_DESC(name)))
 
-#define pan_pool_alloc_desc_array(pool, count, name) \
-        pan_pool_alloc_descs(pool, PAN_DESC_AGGREGATE(PAN_DESC_ARRAY(count, name)))
+#define pan_pool_alloc_desc_array(pool, count, name)                           \
+   pan_pool_alloc_descs(pool, PAN_DESC_AGGREGATE(PAN_DESC_ARRAY(count, name)))
 
-#define pan_pool_alloc_desc_aggregate(pool, ...) \
-        pan_pool_alloc_descs(pool, PAN_DESC_AGGREGATE(__VA_ARGS__))
+#define pan_pool_alloc_desc_aggregate(pool, ...)                               \
+   pan_pool_alloc_descs(pool, PAN_DESC_AGGREGATE(__VA_ARGS__))
 
 #endif
diff --git a/src/panfrost/lib/pan_props.c b/src/panfrost/lib/pan_props.c
index f7a5d22bbf5..7b698dec8e8 100644
--- a/src/panfrost/lib/pan_props.c
+++ b/src/panfrost/lib/pan_props.c
@@ -26,31 +26,30 @@
 
 #include <xf86drm.h>
 
-#include "util/u_math.h"
-#include "util/macros.h"
-#include "util/hash_table.h"
-#include "util/u_thread.h"
 #include "drm-uapi/panfrost_drm.h"
-#include "pan_encoder.h"
-#include "pan_device.h"
+#include "util/hash_table.h"
+#include "util/macros.h"
+#include "util/u_math.h"
+#include "util/u_thread.h"
 #include "pan_bo.h"
+#include "pan_device.h"
+#include "pan_encoder.h"
 #include "pan_texture.h"
-#include "wrap.h"
 #include "pan_util.h"
+#include "wrap.h"
 
 /* Fixed "minimum revisions" */
-#define NO_ANISO (~0)
+#define NO_ANISO  (~0)
 #define HAS_ANISO (0)
 
-#define MODEL(gpu_id_, shortname, counters_, min_rev_anisotropic_, tib_size_, quirks_) \
-        { \
-                .gpu_id = gpu_id_, \
-                .name = "Mali-" shortname " (Panfrost)", \
-                .performance_counters = counters_, \
-                .min_rev_anisotropic = min_rev_anisotropic_, \
-                .tilebuffer_size = tib_size_, \
-                .quirks = quirks_, \
-        }
+#define MODEL(gpu_id_, shortname, counters_, min_rev_anisotropic_, tib_size_,  \
+              quirks_)                                                         \
+   {                                                                           \
+      .gpu_id = gpu_id_, .name = "Mali-" shortname " (Panfrost)",              \
+      .performance_counters = counters_,                                       \
+      .min_rev_anisotropic = min_rev_anisotropic_,                             \
+      .tilebuffer_size = tib_size_, .quirks = quirks_,                         \
+   }
 
 /* Table of supported Mali GPUs */
 /* clang-format off */
@@ -85,92 +84,90 @@ const struct panfrost_model panfrost_model_list[] = {
 const struct panfrost_model *
 panfrost_get_model(uint32_t gpu_id)
 {
-        for (unsigned i = 0; i < ARRAY_SIZE(panfrost_model_list); ++i) {
-                if (panfrost_model_list[i].gpu_id == gpu_id)
-                        return &panfrost_model_list[i];
-        }
+   for (unsigned i = 0; i < ARRAY_SIZE(panfrost_model_list); ++i) {
+      if (panfrost_model_list[i].gpu_id == gpu_id)
+         return &panfrost_model_list[i];
+   }
 
-        return NULL;
+   return NULL;
 }
 
 /* Abstraction over the raw drm_panfrost_get_param ioctl for fetching
  * information about devices */
 
 static __u64
-panfrost_query_raw(
-                int fd,
-                enum drm_panfrost_param param,
-                bool required,
-                unsigned default_value)
+panfrost_query_raw(int fd, enum drm_panfrost_param param, bool required,
+                   unsigned default_value)
 {
-        struct drm_panfrost_get_param get_param = {0,};
-        ASSERTED int ret;
+   struct drm_panfrost_get_param get_param = {
+      0,
+   };
+   ASSERTED int ret;
 
-        get_param.param = param;
-        ret = drmIoctl(fd, DRM_IOCTL_PANFROST_GET_PARAM, &get_param);
+   get_param.param = param;
+   ret = drmIoctl(fd, DRM_IOCTL_PANFROST_GET_PARAM, &get_param);
 
-        if (ret) {
-                assert(!required);
-                return default_value;
-        }
+   if (ret) {
+      assert(!required);
+      return default_value;
+   }
 
-        return get_param.value;
+   return get_param.value;
 }
 
 static unsigned
 panfrost_query_gpu_version(int fd)
 {
-        return panfrost_query_raw(fd, DRM_PANFROST_PARAM_GPU_PROD_ID, true, 0);
+   return panfrost_query_raw(fd, DRM_PANFROST_PARAM_GPU_PROD_ID, true, 0);
 }
 
 static unsigned
 panfrost_query_gpu_revision(int fd)
 {
-        return panfrost_query_raw(fd, DRM_PANFROST_PARAM_GPU_REVISION, true, 0);
+   return panfrost_query_raw(fd, DRM_PANFROST_PARAM_GPU_REVISION, true, 0);
 }
 
 unsigned
 panfrost_query_l2_slices(const struct panfrost_device *dev)
 {
-        /* Query MEM_FEATURES register */
-        uint32_t mem_features =
-                panfrost_query_raw(dev->fd, DRM_PANFROST_PARAM_MEM_FEATURES,
-                                   true, 0);
+   /* Query MEM_FEATURES register */
+   uint32_t mem_features =
+      panfrost_query_raw(dev->fd, DRM_PANFROST_PARAM_MEM_FEATURES, true, 0);
 
-        /* L2_SLICES is MEM_FEATURES[11:8] minus(1) */
-        return ((mem_features >> 8) & 0xF) + 1;
+   /* L2_SLICES is MEM_FEATURES[11:8] minus(1) */
+   return ((mem_features >> 8) & 0xF) + 1;
 }
 
 static struct panfrost_tiler_features
 panfrost_query_tiler_features(int fd)
 {
-        /* Default value (2^9 bytes and 8 levels) to match old behaviour */
-        uint32_t raw = panfrost_query_raw(fd, DRM_PANFROST_PARAM_TILER_FEATURES,
-                        false, 0x809);
+   /* Default value (2^9 bytes and 8 levels) to match old behaviour */
+   uint32_t raw =
+      panfrost_query_raw(fd, DRM_PANFROST_PARAM_TILER_FEATURES, false, 0x809);
 
-        /* Bin size is log2 in the first byte, max levels in the second byte */
-        return (struct panfrost_tiler_features) {
-                .bin_size = (1 << (raw & BITFIELD_MASK(5))),
-                .max_levels = (raw >> 8) & BITFIELD_MASK(4),
-        };
+   /* Bin size is log2 in the first byte, max levels in the second byte */
+   return (struct panfrost_tiler_features){
+      .bin_size = (1 << (raw & BITFIELD_MASK(5))),
+      .max_levels = (raw >> 8) & BITFIELD_MASK(4),
+   };
 }
 
 static unsigned
 panfrost_query_core_count(int fd, unsigned *core_id_range)
 {
-        /* On older kernels, worst-case to 16 cores */
+   /* On older kernels, worst-case to 16 cores */
 
-        unsigned mask = panfrost_query_raw(fd,
-                        DRM_PANFROST_PARAM_SHADER_PRESENT, false, 0xffff);
+   unsigned mask =
+      panfrost_query_raw(fd, DRM_PANFROST_PARAM_SHADER_PRESENT, false, 0xffff);
 
-        /* Some cores might be absent. In some cases, we care
-         * about the range of core IDs (that is, the greatest core ID + 1). If
-         * the core mask is contiguous, this equals the core count.
-         */
-        *core_id_range = util_last_bit(mask);
+   /* Some cores might be absent. In some cases, we care
+    * about the range of core IDs (that is, the greatest core ID + 1). If
+    * the core mask is contiguous, this equals the core count.
+    */
+   *core_id_range = util_last_bit(mask);
 
-        /* The actual core count skips overs the gaps */
-        return util_bitcount(mask);
+   /* The actual core count skips overs the gaps */
+   return util_bitcount(mask);
 }
 
 /* Architectural maximums, since this register may be not implemented
@@ -180,57 +177,52 @@ panfrost_query_core_count(int fd, unsigned *core_id_range)
 static unsigned
 panfrost_max_thread_count(unsigned arch)
 {
-        switch (arch) {
-        /* Midgard */
-        case 4:
-        case 5:
-                return 256;
+   switch (arch) {
+   /* Midgard */
+   case 4:
+   case 5:
+      return 256;
 
-        /* Bifrost, first generation */
-        case 6:
-                return 384;
+   /* Bifrost, first generation */
+   case 6:
+      return 384;
 
-        /* Bifrost, second generation (G31 is 512 but it doesn't matter) */
-        case 7:
-                return 768;
+   /* Bifrost, second generation (G31 is 512 but it doesn't matter) */
+   case 7:
+      return 768;
 
-        /* Valhall (for completeness) */
-        default:
-                return 1024;
-        }
+   /* Valhall (for completeness) */
+   default:
+      return 1024;
+   }
 }
 
 static unsigned
 panfrost_query_thread_tls_alloc(int fd, unsigned major)
 {
-        unsigned tls = panfrost_query_raw(fd,
-                        DRM_PANFROST_PARAM_THREAD_TLS_ALLOC, false, 0);
+   unsigned tls =
+      panfrost_query_raw(fd, DRM_PANFROST_PARAM_THREAD_TLS_ALLOC, false, 0);
 
-        return (tls > 0) ? tls : panfrost_max_thread_count(major);
+   return (tls > 0) ? tls : panfrost_max_thread_count(major);
 }
 
 static uint32_t
 panfrost_query_compressed_formats(int fd)
 {
-        /* If unspecified, assume ASTC/ETC only. Factory default for Juno, and
-         * should exist on any Mali configuration. All hardware should report
-         * these texture formats but the kernel might not be new enough. */
+   /* If unspecified, assume ASTC/ETC only. Factory default for Juno, and
+    * should exist on any Mali configuration. All hardware should report
+    * these texture formats but the kernel might not be new enough. */
 
-        uint32_t default_set =
-                (1 << MALI_ETC2_RGB8) |
-                (1 << MALI_ETC2_R11_UNORM) |
-                (1 << MALI_ETC2_RGBA8) |
-                (1 << MALI_ETC2_RG11_UNORM) |
-                (1 << MALI_ETC2_R11_SNORM) |
-                (1 << MALI_ETC2_RG11_SNORM) |
-                (1 << MALI_ETC2_RGB8A1) |
-                (1 << MALI_ASTC_3D_LDR) |
-                (1 << MALI_ASTC_3D_HDR) |
-                (1 << MALI_ASTC_2D_LDR) |
-                (1 << MALI_ASTC_2D_HDR);
+   uint32_t default_set = (1 << MALI_ETC2_RGB8) | (1 << MALI_ETC2_R11_UNORM) |
+                          (1 << MALI_ETC2_RGBA8) | (1 << MALI_ETC2_RG11_UNORM) |
+                          (1 << MALI_ETC2_R11_SNORM) |
+                          (1 << MALI_ETC2_RG11_SNORM) |
+                          (1 << MALI_ETC2_RGB8A1) | (1 << MALI_ASTC_3D_LDR) |
+                          (1 << MALI_ASTC_3D_HDR) | (1 << MALI_ASTC_2D_LDR) |
+                          (1 << MALI_ASTC_2D_HDR);
 
-        return panfrost_query_raw(fd, DRM_PANFROST_PARAM_TEXTURE_FEATURES0,
-                        false, default_set);
+   return panfrost_query_raw(fd, DRM_PANFROST_PARAM_TEXTURE_FEATURES0, false,
+                             default_set);
 }
 
 /* DRM_PANFROST_PARAM_TEXTURE_FEATURES0 will return a bitmask of supported
@@ -239,13 +231,13 @@ panfrost_query_compressed_formats(int fd)
 bool
 panfrost_supports_compressed_format(struct panfrost_device *dev, unsigned fmt)
 {
-        if (MALI_EXTRACT_TYPE(fmt) != MALI_FORMAT_COMPRESSED)
-                return true;
+   if (MALI_EXTRACT_TYPE(fmt) != MALI_FORMAT_COMPRESSED)
+      return true;
 
-        unsigned idx = fmt & ~MALI_FORMAT_COMPRESSED;
-        assert(idx < 32);
+   unsigned idx = fmt & ~MALI_FORMAT_COMPRESSED;
+   assert(idx < 32);
 
-        return dev->compressed_formats & (1 << idx);
+   return dev->compressed_formats & (1 << idx);
 }
 
 /* Check for AFBC hardware support. AFBC is introduced in v5. Implementations
@@ -254,11 +246,10 @@ panfrost_supports_compressed_format(struct panfrost_device *dev, unsigned fmt)
 static bool
 panfrost_query_afbc(int fd, unsigned arch)
 {
-        unsigned reg = panfrost_query_raw(fd,
-                                          DRM_PANFROST_PARAM_AFBC_FEATURES,
-                                          false, 0);
+   unsigned reg =
+      panfrost_query_raw(fd, DRM_PANFROST_PARAM_AFBC_FEATURES, false, 0);
 
-        return (arch >= 5) && (reg == 0);
+   return (arch >= 5) && (reg == 0);
 }
 
 /*
@@ -271,83 +262,83 @@ panfrost_query_afbc(int fd, unsigned arch)
 static unsigned
 panfrost_query_optimal_tib_size(const struct panfrost_device *dev)
 {
-        /* Preconditions ensure the returned value is a multiple of 1 KiB, the
-         * granularity of the colour buffer allocation field.
-         */
-        assert(dev->model->tilebuffer_size >= 2048);
-        assert(util_is_power_of_two_nonzero(dev->model->tilebuffer_size));
+   /* Preconditions ensure the returned value is a multiple of 1 KiB, the
+    * granularity of the colour buffer allocation field.
+    */
+   assert(dev->model->tilebuffer_size >= 2048);
+   assert(util_is_power_of_two_nonzero(dev->model->tilebuffer_size));
 
-        return dev->model->tilebuffer_size / 2;
+   return dev->model->tilebuffer_size / 2;
 }
 
 void
 panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev)
 {
-        dev->fd = fd;
-        dev->memctx = memctx;
-        dev->gpu_id = panfrost_query_gpu_version(fd);
-        dev->arch = pan_arch(dev->gpu_id);
-        dev->kernel_version = drmGetVersion(fd);
-        dev->revision = panfrost_query_gpu_revision(fd);
-        dev->model = panfrost_get_model(dev->gpu_id);
+   dev->fd = fd;
+   dev->memctx = memctx;
+   dev->gpu_id = panfrost_query_gpu_version(fd);
+   dev->arch = pan_arch(dev->gpu_id);
+   dev->kernel_version = drmGetVersion(fd);
+   dev->revision = panfrost_query_gpu_revision(fd);
+   dev->model = panfrost_get_model(dev->gpu_id);
 
-        /* If we don't recognize the model, bail early */
-        if (!dev->model)
-                return;
+   /* If we don't recognize the model, bail early */
+   if (!dev->model)
+      return;
 
-        dev->core_count = panfrost_query_core_count(fd, &dev->core_id_range);
-        dev->thread_tls_alloc = panfrost_query_thread_tls_alloc(fd, dev->arch);
-        dev->optimal_tib_size = panfrost_query_optimal_tib_size(dev);
-        dev->compressed_formats = panfrost_query_compressed_formats(fd);
-        dev->tiler_features = panfrost_query_tiler_features(fd);
-        dev->has_afbc = panfrost_query_afbc(fd, dev->arch);
+   dev->core_count = panfrost_query_core_count(fd, &dev->core_id_range);
+   dev->thread_tls_alloc = panfrost_query_thread_tls_alloc(fd, dev->arch);
+   dev->optimal_tib_size = panfrost_query_optimal_tib_size(dev);
+   dev->compressed_formats = panfrost_query_compressed_formats(fd);
+   dev->tiler_features = panfrost_query_tiler_features(fd);
+   dev->has_afbc = panfrost_query_afbc(fd, dev->arch);
 
-        if (dev->arch <= 6)
-                dev->formats = panfrost_pipe_format_v6;
-        else if (dev->arch <= 7)
-                dev->formats = panfrost_pipe_format_v7;
-        else
-                dev->formats = panfrost_pipe_format_v9;
+   if (dev->arch <= 6)
+      dev->formats = panfrost_pipe_format_v6;
+   else if (dev->arch <= 7)
+      dev->formats = panfrost_pipe_format_v7;
+   else
+      dev->formats = panfrost_pipe_format_v9;
 
-        util_sparse_array_init(&dev->bo_map, sizeof(struct panfrost_bo), 512);
+   util_sparse_array_init(&dev->bo_map, sizeof(struct panfrost_bo), 512);
 
-        pthread_mutex_init(&dev->bo_cache.lock, NULL);
-        list_inithead(&dev->bo_cache.lru);
+   pthread_mutex_init(&dev->bo_cache.lock, NULL);
+   list_inithead(&dev->bo_cache.lru);
 
-        for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i)
-                list_inithead(&dev->bo_cache.buckets[i]);
+   for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i)
+      list_inithead(&dev->bo_cache.buckets[i]);
 
-        /* Initialize pandecode before we start allocating */
-        if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
-                pandecode_initialize(!(dev->debug & PAN_DBG_TRACE));
+   /* Initialize pandecode before we start allocating */
+   if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
+      pandecode_initialize(!(dev->debug & PAN_DBG_TRACE));
 
-        /* Tiler heap is internally required by the tiler, which can only be
-         * active for a single job chain at once, so a single heap can be
-         * shared across batches/contextes */
+   /* Tiler heap is internally required by the tiler, which can only be
+    * active for a single job chain at once, so a single heap can be
+    * shared across batches/contextes */
 
-        dev->tiler_heap = panfrost_bo_create(dev, 128 * 1024 * 1024,
-                        PAN_BO_INVISIBLE | PAN_BO_GROWABLE, "Tiler heap");
+   dev->tiler_heap = panfrost_bo_create(
+      dev, 128 * 1024 * 1024, PAN_BO_INVISIBLE | PAN_BO_GROWABLE, "Tiler heap");
 
-        pthread_mutex_init(&dev->submit_lock, NULL);
+   pthread_mutex_init(&dev->submit_lock, NULL);
 
-        /* Done once on init */
-        panfrost_upload_sample_positions(dev);
+   /* Done once on init */
+   panfrost_upload_sample_positions(dev);
 }
 
 void
 panfrost_close_device(struct panfrost_device *dev)
 {
-        /* If we don't recognize the model, the rest of the device won't exist,
-         * we will have early-exited the device open.
-         */
-        if (dev->model) {
-                pthread_mutex_destroy(&dev->submit_lock);
-                panfrost_bo_unreference(dev->tiler_heap);
-                panfrost_bo_cache_evict_all(dev);
-                pthread_mutex_destroy(&dev->bo_cache.lock);
-                util_sparse_array_finish(&dev->bo_map);
-        }
+   /* If we don't recognize the model, the rest of the device won't exist,
+    * we will have early-exited the device open.
+    */
+   if (dev->model) {
+      pthread_mutex_destroy(&dev->submit_lock);
+      panfrost_bo_unreference(dev->tiler_heap);
+      panfrost_bo_cache_evict_all(dev);
+      pthread_mutex_destroy(&dev->bo_cache.lock);
+      util_sparse_array_finish(&dev->bo_map);
+   }
 
-        drmFreeVersion(dev->kernel_version);
-        close(dev->fd);
+   drmFreeVersion(dev->kernel_version);
+   close(dev->fd);
 }
diff --git a/src/panfrost/lib/pan_samples.c b/src/panfrost/lib/pan_samples.c
index 2a9de8798cf..18205f2e3e6 100644
--- a/src/panfrost/lib/pan_samples.c
+++ b/src/panfrost/lib/pan_samples.c
@@ -21,8 +21,8 @@
  * SOFTWARE.
  */
 
-#include "pan_device.h"
 #include "pan_bo.h"
+#include "pan_device.h"
 
 /* Sample positions are specified partially in hardware, partially in software
  * on Mali. On Midgard, sample positions are completely fixed but need to be
@@ -40,25 +40,25 @@
  */
 
 struct mali_sample_position {
-        uint16_t x, y;
+   uint16_t x, y;
 } __attribute__((packed));
 
 struct mali_sample_positions {
-        struct mali_sample_position positions[32];
-        struct mali_sample_position origin;
-        struct mali_sample_position padding[64 - (32 + 1)];
+   struct mali_sample_position positions[32];
+   struct mali_sample_position origin;
+   struct mali_sample_position padding[64 - (32 + 1)];
 } __attribute__((packed));
 
 /* SAMPLE16 constructs a single sample in terms of 1/16's of the grid, centered
  * at the origin. SAMPLE4/8 swap the units for legibility. */
 
-#define SAMPLE16(x, y) { \
-        (((x) + 8) * (256 / 16)), \
-        (((y) + 8) * (256 / 16)) \
-}
+#define SAMPLE16(x, y)                                                         \
+   {                                                                           \
+      (((x) + 8) * (256 / 16)), (((y) + 8) * (256 / 16))                       \
+   }
 
-#define SAMPLE8(x, y) SAMPLE16((x) * 2, (y) * 2)
-#define SAMPLE4(x, y) SAMPLE16((x) * 4, (y) * 4)
+#define SAMPLE8(x, y) SAMPLE16((x)*2, (y)*2)
+#define SAMPLE4(x, y) SAMPLE16((x)*4, (y)*4)
 
 /* clang-format off */
 const struct mali_sample_positions sample_position_lut[] = {
@@ -129,34 +129,32 @@ const struct mali_sample_positions sample_position_lut[] = {
 
 mali_ptr
 panfrost_sample_positions(const struct panfrost_device *dev,
-                enum mali_sample_pattern pattern)
+                          enum mali_sample_pattern pattern)
 {
-        assert(pattern < ARRAY_SIZE(sample_position_lut));
-        unsigned offset = (pattern * sizeof(sample_position_lut[0]));
-        return dev->sample_positions->ptr.gpu + offset;
+   assert(pattern < ARRAY_SIZE(sample_position_lut));
+   unsigned offset = (pattern * sizeof(sample_position_lut[0]));
+   return dev->sample_positions->ptr.gpu + offset;
 }
 
 void
 panfrost_upload_sample_positions(struct panfrost_device *dev)
 {
-        STATIC_ASSERT(sizeof(sample_position_lut) < 4096);
-        dev->sample_positions = panfrost_bo_create(dev, 4096, 0, "Sample positions");
+   STATIC_ASSERT(sizeof(sample_position_lut) < 4096);
+   dev->sample_positions = panfrost_bo_create(dev, 4096, 0, "Sample positions");
 
-        memcpy(dev->sample_positions->ptr.cpu, sample_position_lut,
-                        sizeof(sample_position_lut));
+   memcpy(dev->sample_positions->ptr.cpu, sample_position_lut,
+          sizeof(sample_position_lut));
 }
 
 /* CPU side LUT query, to implement glGetMultisamplefv */
 
 void
-panfrost_query_sample_position(
-                enum mali_sample_pattern pattern,
-                unsigned sample_idx,
-                float *out)
+panfrost_query_sample_position(enum mali_sample_pattern pattern,
+                               unsigned sample_idx, float *out)
 {
-        struct mali_sample_position pos =
-                sample_position_lut[pattern].positions[sample_idx];
+   struct mali_sample_position pos =
+      sample_position_lut[pattern].positions[sample_idx];
 
-        out[0] = DECODE_FIXED_16(pos.x);
-        out[1] = DECODE_FIXED_16(pos.y);
+   out[0] = DECODE_FIXED_16(pos.x);
+   out[1] = DECODE_FIXED_16(pos.y);
 }
diff --git a/src/panfrost/lib/pan_scoreboard.h b/src/panfrost/lib/pan_scoreboard.h
index f6476c66651..4cd4c46fb48 100644
--- a/src/panfrost/lib/pan_scoreboard.h
+++ b/src/panfrost/lib/pan_scoreboard.h
@@ -31,27 +31,27 @@
 #include "pan_pool.h"
 
 struct pan_scoreboard {
-        /* The first job in the batch */
-        mali_ptr first_job;
+   /* The first job in the batch */
+   mali_ptr first_job;
 
-        /* The number of jobs in the primary batch, essentially */
-        unsigned job_index;
+   /* The number of jobs in the primary batch, essentially */
+   unsigned job_index;
 
-        /* A CPU-side pointer to the previous job for next_job linking */
-        struct mali_job_header_packed *prev_job;
+   /* A CPU-side pointer to the previous job for next_job linking */
+   struct mali_job_header_packed *prev_job;
 
-        /* A CPU-side pointer to the first tiler job for dep updates when
-         * injecting a reload tiler job.
-         */
-        struct mali_job_header_packed *first_tiler;
-        uint32_t first_tiler_dep1;
+   /* A CPU-side pointer to the first tiler job for dep updates when
+    * injecting a reload tiler job.
+    */
+   struct mali_job_header_packed *first_tiler;
+   uint32_t first_tiler_dep1;
 
-        /* The dependency for tiler jobs (i.e. the index of the last emitted
-         * tiler job, or zero if none have been emitted) */
-        unsigned tiler_dep;
+   /* The dependency for tiler jobs (i.e. the index of the last emitted
+    * tiler job, or zero if none have been emitted) */
+   unsigned tiler_dep;
 
-        /* The job index of the WRITE_VALUE job (before it has been created) */
-        unsigned write_value_index;
+   /* The job index of the WRITE_VALUE job (before it has been created) */
+   unsigned write_value_index;
 };
 
 #ifdef PAN_ARCH
@@ -132,16 +132,16 @@ static bool
 panfrost_job_uses_tiling(enum mali_job_type type)
 {
 #if PAN_ARCH >= 9
-        if (type == MALI_JOB_TYPE_MALLOC_VERTEX)
-                return true;
+   if (type == MALI_JOB_TYPE_MALLOC_VERTEX)
+      return true;
 #endif
 
 #if PAN_ARCH >= 6
-        if (type == MALI_JOB_TYPE_INDEXED_VERTEX)
-                return true;
+   if (type == MALI_JOB_TYPE_INDEXED_VERTEX)
+      return true;
 #endif
 
-        return (type == MALI_JOB_TYPE_TILER);
+   return (type == MALI_JOB_TYPE_TILER);
 }
 
 /* Generates, uploads, and queues a a new job. All fields are written in order
@@ -154,83 +154,80 @@ panfrost_job_uses_tiling(enum mali_job_type type)
  * not wallpapering and set this, dragons will eat you. */
 
 static inline unsigned
-panfrost_add_job(struct pan_pool *pool,
-                 struct pan_scoreboard *scoreboard,
-                 enum mali_job_type type,
-                 bool barrier, bool suppress_prefetch,
+panfrost_add_job(struct pan_pool *pool, struct pan_scoreboard *scoreboard,
+                 enum mali_job_type type, bool barrier, bool suppress_prefetch,
                  unsigned local_dep, unsigned global_dep,
-                 const struct panfrost_ptr *job,
-                 bool inject)
+                 const struct panfrost_ptr *job, bool inject)
 {
-        if (panfrost_job_uses_tiling(type)) {
-                /* Tiler jobs must be chained, and on Midgard, the first tiler
-                 * job must depend on the write value job, whose index we
-                 * reserve now */
+   if (panfrost_job_uses_tiling(type)) {
+      /* Tiler jobs must be chained, and on Midgard, the first tiler
+       * job must depend on the write value job, whose index we
+       * reserve now */
 
-                if (PAN_ARCH <= 5 && !scoreboard->write_value_index)
-                        scoreboard->write_value_index = ++scoreboard->job_index;
+      if (PAN_ARCH <= 5 && !scoreboard->write_value_index)
+         scoreboard->write_value_index = ++scoreboard->job_index;
 
-                if (scoreboard->tiler_dep && !inject)
-                        global_dep = scoreboard->tiler_dep;
-                else if (PAN_ARCH <= 5)
-                        global_dep = scoreboard->write_value_index;
-        }
+      if (scoreboard->tiler_dep && !inject)
+         global_dep = scoreboard->tiler_dep;
+      else if (PAN_ARCH <= 5)
+         global_dep = scoreboard->write_value_index;
+   }
 
-        /* Assign the index */
-        unsigned index = ++scoreboard->job_index;
+   /* Assign the index */
+   unsigned index = ++scoreboard->job_index;
 
-        pan_pack(job->cpu, JOB_HEADER, header) {
-                header.type = type;
-                header.barrier = barrier;
-                header.suppress_prefetch = suppress_prefetch;
-                header.index = index;
-                header.dependency_1 = local_dep;
-                header.dependency_2 = global_dep;
+   pan_pack(job->cpu, JOB_HEADER, header) {
+      header.type = type;
+      header.barrier = barrier;
+      header.suppress_prefetch = suppress_prefetch;
+      header.index = index;
+      header.dependency_1 = local_dep;
+      header.dependency_2 = global_dep;
 
-                if (inject)
-                        header.next = scoreboard->first_job;
-        }
+      if (inject)
+         header.next = scoreboard->first_job;
+   }
 
-        if (inject) {
-                assert(type == MALI_JOB_TYPE_TILER && "only for blit shaders");
+   if (inject) {
+      assert(type == MALI_JOB_TYPE_TILER && "only for blit shaders");
 
-                if (scoreboard->first_tiler) {
-                        /* Manual update of the dep2 field. This is bad,
-                         * don't copy this pattern.
-                         */
-                        scoreboard->first_tiler->opaque[5] =
-                                scoreboard->first_tiler_dep1 | (index << 16);
-                }
+      if (scoreboard->first_tiler) {
+         /* Manual update of the dep2 field. This is bad,
+          * don't copy this pattern.
+          */
+         scoreboard->first_tiler->opaque[5] =
+            scoreboard->first_tiler_dep1 | (index << 16);
+      }
 
-                scoreboard->first_tiler = (void *)job->cpu;
-                scoreboard->first_tiler_dep1 = local_dep;
-                scoreboard->first_job = job->gpu;
-                return index;
-        }
+      scoreboard->first_tiler = (void *)job->cpu;
+      scoreboard->first_tiler_dep1 = local_dep;
+      scoreboard->first_job = job->gpu;
+      return index;
+   }
 
-        /* Form a chain */
-        if (panfrost_job_uses_tiling(type)) {
-                if (!scoreboard->first_tiler) {
-                        scoreboard->first_tiler = (void *)job->cpu;
-                        scoreboard->first_tiler_dep1 = local_dep;
-                }
-                scoreboard->tiler_dep = index;
-        }
+   /* Form a chain */
+   if (panfrost_job_uses_tiling(type)) {
+      if (!scoreboard->first_tiler) {
+         scoreboard->first_tiler = (void *)job->cpu;
+         scoreboard->first_tiler_dep1 = local_dep;
+      }
+      scoreboard->tiler_dep = index;
+   }
 
-        if (scoreboard->prev_job) {
-                /* Manual update of the next pointer. This is bad, don't copy
-                 * this pattern.
-                 * TODO: Find a way to defer last job header emission until we
-                 * have a new job to queue or the batch is ready for execution.
-                 */
-                scoreboard->prev_job->opaque[6] = job->gpu;
-                scoreboard->prev_job->opaque[7] = job->gpu >> 32;
-	} else {
-                scoreboard->first_job = job->gpu;
-        }
+   if (scoreboard->prev_job) {
+      /* Manual update of the next pointer. This is bad, don't copy
+       * this pattern.
+       * TODO: Find a way to defer last job header emission until we
+       * have a new job to queue or the batch is ready for execution.
+       */
+      scoreboard->prev_job->opaque[6] = job->gpu;
+      scoreboard->prev_job->opaque[7] = job->gpu >> 32;
+   } else {
+      scoreboard->first_job = job->gpu;
+   }
 
-        scoreboard->prev_job = (struct mali_job_header_packed *)job->cpu;
-        return index;
+   scoreboard->prev_job = (struct mali_job_header_packed *)job->cpu;
+   return index;
 }
 
 /* Generates a write value job, used to initialize the tiler structures. Note
@@ -241,30 +238,30 @@ panfrost_scoreboard_initialize_tiler(struct pan_pool *pool,
                                      struct pan_scoreboard *scoreboard,
                                      mali_ptr polygon_list)
 {
-        struct panfrost_ptr transfer = { 0 };
+   struct panfrost_ptr transfer = {0};
 
-        /* Check if we even need tiling */
-        if (PAN_ARCH >= 6 || !scoreboard->first_tiler)
-                return transfer;
+   /* Check if we even need tiling */
+   if (PAN_ARCH >= 6 || !scoreboard->first_tiler)
+      return transfer;
 
-        /* Okay, we do. Let's generate it. We'll need the job's polygon list
-         * regardless of size. */
+   /* Okay, we do. Let's generate it. We'll need the job's polygon list
+    * regardless of size. */
 
-        transfer = pan_pool_alloc_desc(pool, WRITE_VALUE_JOB);
+   transfer = pan_pool_alloc_desc(pool, WRITE_VALUE_JOB);
 
-        pan_section_pack(transfer.cpu, WRITE_VALUE_JOB, HEADER, header) {
-                header.type = MALI_JOB_TYPE_WRITE_VALUE;
-                header.index = scoreboard->write_value_index;
-                header.next = scoreboard->first_job;
-        }
+   pan_section_pack(transfer.cpu, WRITE_VALUE_JOB, HEADER, header) {
+      header.type = MALI_JOB_TYPE_WRITE_VALUE;
+      header.index = scoreboard->write_value_index;
+      header.next = scoreboard->first_job;
+   }
 
-        pan_section_pack(transfer.cpu, WRITE_VALUE_JOB, PAYLOAD, payload) {
-                payload.address = polygon_list;
-                payload.type = MALI_WRITE_VALUE_TYPE_ZERO;
-        }
+   pan_section_pack(transfer.cpu, WRITE_VALUE_JOB, PAYLOAD, payload) {
+      payload.address = polygon_list;
+      payload.type = MALI_WRITE_VALUE_TYPE_ZERO;
+   }
 
-        scoreboard->first_job = transfer.gpu;
-        return transfer;
+   scoreboard->first_job = transfer.gpu;
+   return transfer;
 }
 #endif /* PAN_ARCH */
 
diff --git a/src/panfrost/lib/pan_scratch.c b/src/panfrost/lib/pan_scratch.c
index 91d8bd65564..9e687ba173e 100644
--- a/src/panfrost/lib/pan_scratch.c
+++ b/src/panfrost/lib/pan_scratch.c
@@ -24,8 +24,8 @@
  *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
  */
 
-#include "util/u_math.h"
 #include "util/macros.h"
+#include "util/u_math.h"
 #include "pan_encoder.h"
 
 /* Midgard has a small register file, so shaders with high register pressure
@@ -66,22 +66,21 @@
 unsigned
 panfrost_get_stack_shift(unsigned stack_size)
 {
-        if (stack_size)
-                return util_logbase2_ceil(DIV_ROUND_UP(stack_size, 16));
-        else
-                return 0;
+   if (stack_size)
+      return util_logbase2_ceil(DIV_ROUND_UP(stack_size, 16));
+   else
+      return 0;
 }
 
 /* Computes the aligned stack size given the shift and thread count. */
 
 unsigned
-panfrost_get_total_stack_size(
-                unsigned thread_size,
-                unsigned threads_per_core,
-                unsigned core_id_range)
+panfrost_get_total_stack_size(unsigned thread_size, unsigned threads_per_core,
+                              unsigned core_id_range)
 {
-        unsigned size_per_thread = (thread_size == 0) ? 0 :
-                util_next_power_of_two(ALIGN_POT(thread_size, 16));
+   unsigned size_per_thread =
+      (thread_size == 0) ? 0
+                         : util_next_power_of_two(ALIGN_POT(thread_size, 16));
 
-        return size_per_thread * threads_per_core * core_id_range;
+   return size_per_thread * threads_per_core * core_id_range;
 }
diff --git a/src/panfrost/lib/pan_shader.c b/src/panfrost/lib/pan_shader.c
index 73c00befe76..b956183ce7b 100644
--- a/src/panfrost/lib/pan_shader.c
+++ b/src/panfrost/lib/pan_shader.c
@@ -22,8 +22,8 @@
  * SOFTWARE.
  */
 
-#include "pan_device.h"
 #include "pan_shader.h"
+#include "pan_device.h"
 #include "pan_format.h"
 
 #if PAN_ARCH <= 5
@@ -36,9 +36,9 @@ const nir_shader_compiler_options *
 GENX(pan_shader_get_compiler_options)(void)
 {
 #if PAN_ARCH >= 6
-        return &bifrost_nir_options;
+   return &bifrost_nir_options;
 #else
-        return &midgard_nir_options;
+   return &midgard_nir_options;
 #endif
 }
 
@@ -46,177 +46,172 @@ GENX(pan_shader_get_compiler_options)(void)
 static enum mali_register_file_format
 bifrost_blend_type_from_nir(nir_alu_type nir_type)
 {
-        switch(nir_type) {
-        case 0: /* Render target not in use */
-                return 0;
-        case nir_type_float16:
-                return MALI_REGISTER_FILE_FORMAT_F16;
-        case nir_type_float32:
-                return MALI_REGISTER_FILE_FORMAT_F32;
-        case nir_type_int32:
-                return MALI_REGISTER_FILE_FORMAT_I32;
-        case nir_type_uint32:
-                return MALI_REGISTER_FILE_FORMAT_U32;
-        case nir_type_int16:
-                return MALI_REGISTER_FILE_FORMAT_I16;
-        case nir_type_uint16:
-                return MALI_REGISTER_FILE_FORMAT_U16;
-        default:
-                unreachable("Unsupported blend shader type for NIR alu type");
-                return 0;
-        }
+   switch (nir_type) {
+   case 0: /* Render target not in use */
+      return 0;
+   case nir_type_float16:
+      return MALI_REGISTER_FILE_FORMAT_F16;
+   case nir_type_float32:
+      return MALI_REGISTER_FILE_FORMAT_F32;
+   case nir_type_int32:
+      return MALI_REGISTER_FILE_FORMAT_I32;
+   case nir_type_uint32:
+      return MALI_REGISTER_FILE_FORMAT_U32;
+   case nir_type_int16:
+      return MALI_REGISTER_FILE_FORMAT_I16;
+   case nir_type_uint16:
+      return MALI_REGISTER_FILE_FORMAT_U16;
+   default:
+      unreachable("Unsupported blend shader type for NIR alu type");
+      return 0;
+   }
 }
 
 #if PAN_ARCH <= 7
 enum mali_register_file_format
 GENX(pan_fixup_blend_type)(nir_alu_type T_size, enum pipe_format format)
 {
-        const struct util_format_description *desc = util_format_description(format);
-        unsigned size = nir_alu_type_get_type_size(T_size);
-        nir_alu_type T_format = pan_unpacked_type_for_format(desc);
-        nir_alu_type T = nir_alu_type_get_base_type(T_format) | size;
+   const struct util_format_description *desc = util_format_description(format);
+   unsigned size = nir_alu_type_get_type_size(T_size);
+   nir_alu_type T_format = pan_unpacked_type_for_format(desc);
+   nir_alu_type T = nir_alu_type_get_base_type(T_format) | size;
 
-        return bifrost_blend_type_from_nir(T);
+   return bifrost_blend_type_from_nir(T);
 }
 #endif
 #endif
 
 void
-GENX(pan_shader_compile)(nir_shader *s,
-                         struct panfrost_compile_inputs *inputs,
+GENX(pan_shader_compile)(nir_shader *s, struct panfrost_compile_inputs *inputs,
                          struct util_dynarray *binary,
                          struct pan_shader_info *info)
 {
-        memset(info, 0, sizeof(*info));
+   memset(info, 0, sizeof(*info));
 
 #if PAN_ARCH >= 6
-        bifrost_compile_shader_nir(s, inputs, binary, info);
+   bifrost_compile_shader_nir(s, inputs, binary, info);
 #else
-        for (unsigned i = 0; i < ARRAY_SIZE(inputs->rt_formats); i++) {
-                enum pipe_format fmt = inputs->rt_formats[i];
-                unsigned wb_fmt = panfrost_blendable_formats_v6[fmt].writeback;
+   for (unsigned i = 0; i < ARRAY_SIZE(inputs->rt_formats); i++) {
+      enum pipe_format fmt = inputs->rt_formats[i];
+      unsigned wb_fmt = panfrost_blendable_formats_v6[fmt].writeback;
 
-                if (wb_fmt < MALI_COLOR_FORMAT_R8)
-                        inputs->raw_fmt_mask |= BITFIELD_BIT(i);
-        }
+      if (wb_fmt < MALI_COLOR_FORMAT_R8)
+         inputs->raw_fmt_mask |= BITFIELD_BIT(i);
+   }
 
-        midgard_compile_shader_nir(s, inputs, binary, info);
+   midgard_compile_shader_nir(s, inputs, binary, info);
 #endif
 
-        info->stage = s->info.stage;
-        info->contains_barrier = s->info.uses_memory_barrier ||
-                                 s->info.uses_control_barrier;
-        info->separable = s->info.separate_shader;
+   info->stage = s->info.stage;
+   info->contains_barrier =
+      s->info.uses_memory_barrier || s->info.uses_control_barrier;
+   info->separable = s->info.separate_shader;
 
-        switch (info->stage) {
-        case MESA_SHADER_VERTEX:
-                info->attributes_read = s->info.inputs_read;
-                info->attributes_read_count = util_bitcount64(info->attributes_read);
-                info->attribute_count = info->attributes_read_count;
+   switch (info->stage) {
+   case MESA_SHADER_VERTEX:
+      info->attributes_read = s->info.inputs_read;
+      info->attributes_read_count = util_bitcount64(info->attributes_read);
+      info->attribute_count = info->attributes_read_count;
 
 #if PAN_ARCH <= 5
-                bool vertex_id = BITSET_TEST(s->info.system_values_read,
-                                             SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
-                if (vertex_id)
-                        info->attribute_count = MAX2(info->attribute_count, PAN_VERTEX_ID + 1);
+      bool vertex_id = BITSET_TEST(s->info.system_values_read,
+                                   SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
+      if (vertex_id)
+         info->attribute_count = MAX2(info->attribute_count, PAN_VERTEX_ID + 1);
 
-                bool instance_id = BITSET_TEST(s->info.system_values_read,
-                                               SYSTEM_VALUE_INSTANCE_ID);
-                if (instance_id)
-                        info->attribute_count = MAX2(info->attribute_count, PAN_INSTANCE_ID + 1);
+      bool instance_id =
+         BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID);
+      if (instance_id)
+         info->attribute_count =
+            MAX2(info->attribute_count, PAN_INSTANCE_ID + 1);
 #endif
 
-                info->vs.writes_point_size =
-                        s->info.outputs_written & (1 << VARYING_SLOT_PSIZ);
+      info->vs.writes_point_size =
+         s->info.outputs_written & (1 << VARYING_SLOT_PSIZ);
 
 #if PAN_ARCH >= 9
-                info->varyings.output_count =
-                        util_last_bit(s->info.outputs_written >> VARYING_SLOT_VAR0);
+      info->varyings.output_count =
+         util_last_bit(s->info.outputs_written >> VARYING_SLOT_VAR0);
 #endif
-                break;
-        case MESA_SHADER_FRAGMENT:
-                if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
-                        info->fs.writes_depth = true;
-                if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL))
-                        info->fs.writes_stencil = true;
-                if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK))
-                        info->fs.writes_coverage = true;
+      break;
+   case MESA_SHADER_FRAGMENT:
+      if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
+         info->fs.writes_depth = true;
+      if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL))
+         info->fs.writes_stencil = true;
+      if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK))
+         info->fs.writes_coverage = true;
 
-                info->fs.outputs_read = s->info.outputs_read >> FRAG_RESULT_DATA0;
-                info->fs.outputs_written = s->info.outputs_written >> FRAG_RESULT_DATA0;
-                info->fs.sample_shading = s->info.fs.uses_sample_shading;
-                info->fs.untyped_color_outputs = s->info.fs.untyped_color_outputs;
+      info->fs.outputs_read = s->info.outputs_read >> FRAG_RESULT_DATA0;
+      info->fs.outputs_written = s->info.outputs_written >> FRAG_RESULT_DATA0;
+      info->fs.sample_shading = s->info.fs.uses_sample_shading;
+      info->fs.untyped_color_outputs = s->info.fs.untyped_color_outputs;
 
-                info->fs.can_discard = s->info.fs.uses_discard;
-                info->fs.early_fragment_tests = s->info.fs.early_fragment_tests;
+      info->fs.can_discard = s->info.fs.uses_discard;
+      info->fs.early_fragment_tests = s->info.fs.early_fragment_tests;
 
-                /* List of reasons we need to execute frag shaders when things
-                 * are masked off */
+      /* List of reasons we need to execute frag shaders when things
+       * are masked off */
 
-                info->fs.sidefx = s->info.writes_memory ||
-                                  s->info.fs.uses_discard ||
-                                  s->info.fs.uses_demote;
+      info->fs.sidefx = s->info.writes_memory || s->info.fs.uses_discard ||
+                        s->info.fs.uses_demote;
 
-                /* With suitable ZSA/blend, is early-z possible? */
-                info->fs.can_early_z =
-                        !info->fs.sidefx &&
-                        !info->fs.writes_depth &&
-                        !info->fs.writes_stencil &&
-                        !info->fs.writes_coverage;
+      /* With suitable ZSA/blend, is early-z possible? */
+      info->fs.can_early_z = !info->fs.sidefx && !info->fs.writes_depth &&
+                             !info->fs.writes_stencil &&
+                             !info->fs.writes_coverage;
 
-                /* Similiarly with suitable state, is FPK possible? */
-                info->fs.can_fpk =
-                        !info->fs.writes_depth &&
-                        !info->fs.writes_stencil &&
-                        !info->fs.writes_coverage &&
-                        !info->fs.can_discard &&
-                        !info->fs.outputs_read;
+      /* Similiarly with suitable state, is FPK possible? */
+      info->fs.can_fpk = !info->fs.writes_depth && !info->fs.writes_stencil &&
+                         !info->fs.writes_coverage && !info->fs.can_discard &&
+                         !info->fs.outputs_read;
 
-                /* Requires the same hardware guarantees, so grouped as one bit
-                 * in the hardware.
-                 */
-                info->contains_barrier |= s->info.fs.needs_quad_helper_invocations;
+      /* Requires the same hardware guarantees, so grouped as one bit
+       * in the hardware.
+       */
+      info->contains_barrier |= s->info.fs.needs_quad_helper_invocations;
 
-                info->fs.reads_frag_coord =
-                        (s->info.inputs_read & (1 << VARYING_SLOT_POS)) ||
-                        BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRAG_COORD);
-                info->fs.reads_point_coord =
-                        s->info.inputs_read & (1 << VARYING_SLOT_PNTC);
-                info->fs.reads_face =
-                        (s->info.inputs_read & (1 << VARYING_SLOT_FACE)) ||
-                        BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRONT_FACE);
+      info->fs.reads_frag_coord =
+         (s->info.inputs_read & (1 << VARYING_SLOT_POS)) ||
+         BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRAG_COORD);
+      info->fs.reads_point_coord =
+         s->info.inputs_read & (1 << VARYING_SLOT_PNTC);
+      info->fs.reads_face =
+         (s->info.inputs_read & (1 << VARYING_SLOT_FACE)) ||
+         BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRONT_FACE);
 #if PAN_ARCH >= 9
-                info->varyings.output_count =
-                        util_last_bit(s->info.outputs_read >> VARYING_SLOT_VAR0);
+      info->varyings.output_count =
+         util_last_bit(s->info.outputs_read >> VARYING_SLOT_VAR0);
 #endif
-                break;
-        default:
-                /* Everything else treated as compute */
-                info->wls_size = s->info.shared_size;
-                break;
-        }
+      break;
+   default:
+      /* Everything else treated as compute */
+      info->wls_size = s->info.shared_size;
+      break;
+   }
 
-        info->outputs_written = s->info.outputs_written;
+   info->outputs_written = s->info.outputs_written;
 
-        /* Sysvals have dedicated UBO */
-        info->ubo_count = s->info.num_ubos;
-        if (info->sysvals.sysval_count && inputs->fixed_sysval_ubo < 0)
-                info->ubo_count++;
+   /* Sysvals have dedicated UBO */
+   info->ubo_count = s->info.num_ubos;
+   if (info->sysvals.sysval_count && inputs->fixed_sysval_ubo < 0)
+      info->ubo_count++;
 
-        info->attribute_count += BITSET_LAST_BIT(s->info.images_used);
-        info->writes_global = s->info.writes_memory;
+   info->attribute_count += BITSET_LAST_BIT(s->info.images_used);
+   info->writes_global = s->info.writes_memory;
 
-        info->sampler_count = info->texture_count = BITSET_LAST_BIT(s->info.textures_used);
+   info->sampler_count = info->texture_count =
+      BITSET_LAST_BIT(s->info.textures_used);
 
-        unsigned execution_mode = s->info.float_controls_execution_mode;
-        info->ftz_fp16 = nir_is_denorm_flush_to_zero(execution_mode, 16);
-        info->ftz_fp32 = nir_is_denorm_flush_to_zero(execution_mode, 32);
+   unsigned execution_mode = s->info.float_controls_execution_mode;
+   info->ftz_fp16 = nir_is_denorm_flush_to_zero(execution_mode, 16);
+   info->ftz_fp32 = nir_is_denorm_flush_to_zero(execution_mode, 32);
 
 #if PAN_ARCH >= 6
-        /* This is "redundant" information, but is needed in a draw-time hot path */
-        for (unsigned i = 0; i < ARRAY_SIZE(info->bifrost.blend); ++i) {
-                info->bifrost.blend[i].format =
-                        bifrost_blend_type_from_nir(info->bifrost.blend[i].type);
-        }
+   /* This is "redundant" information, but is needed in a draw-time hot path */
+   for (unsigned i = 0; i < ARRAY_SIZE(info->bifrost.blend); ++i) {
+      info->bifrost.blend[i].format =
+         bifrost_blend_type_from_nir(info->bifrost.blend[i].type);
+   }
 #endif
 }
diff --git a/src/panfrost/lib/pan_shader.h b/src/panfrost/lib/pan_shader.h
index 223f52e4af4..406db3d37ce 100644
--- a/src/panfrost/lib/pan_shader.h
+++ b/src/panfrost/lib/pan_shader.h
@@ -29,38 +29,36 @@
 #include "panfrost/util/pan_ir.h"
 #include "panfrost/util/pan_lower_framebuffer.h"
 
-#include "pan_device.h"
 #include "genxml/gen_macros.h"
+#include "pan_device.h"
 
 struct panfrost_device;
 
 #ifdef PAN_ARCH
-const nir_shader_compiler_options *
-GENX(pan_shader_get_compiler_options)(void);
+const nir_shader_compiler_options *GENX(pan_shader_get_compiler_options)(void);
 
-void
-GENX(pan_shader_compile)(nir_shader *nir,
-                         struct panfrost_compile_inputs *inputs,
-                         struct util_dynarray *binary,
-                         struct pan_shader_info *info);
+void GENX(pan_shader_compile)(nir_shader *nir,
+                              struct panfrost_compile_inputs *inputs,
+                              struct util_dynarray *binary,
+                              struct pan_shader_info *info);
 
 #if PAN_ARCH >= 6 && PAN_ARCH <= 7
 enum mali_register_file_format
-GENX(pan_fixup_blend_type)(nir_alu_type T_size, enum pipe_format format);
+   GENX(pan_fixup_blend_type)(nir_alu_type T_size, enum pipe_format format);
 #endif
 
 #if PAN_ARCH >= 9
 static inline enum mali_shader_stage
 pan_shader_stage(const struct pan_shader_info *info)
 {
-        switch (info->stage) {
-        case MESA_SHADER_VERTEX:
-                return MALI_SHADER_STAGE_VERTEX;
-        case MESA_SHADER_FRAGMENT:
-                return MALI_SHADER_STAGE_FRAGMENT;
-        default:
-                return MALI_SHADER_STAGE_COMPUTE;
-        }
+   switch (info->stage) {
+   case MESA_SHADER_VERTEX:
+      return MALI_SHADER_STAGE_VERTEX;
+   case MESA_SHADER_FRAGMENT:
+      return MALI_SHADER_STAGE_FRAGMENT;
+   default:
+      return MALI_SHADER_STAGE_COMPUTE;
+   }
 }
 #endif
 
@@ -68,17 +66,17 @@ pan_shader_stage(const struct pan_shader_info *info)
 static inline enum mali_shader_register_allocation
 pan_register_allocation(unsigned work_reg_count)
 {
-        return (work_reg_count <= 32) ?
-                MALI_SHADER_REGISTER_ALLOCATION_32_PER_THREAD :
-                MALI_SHADER_REGISTER_ALLOCATION_64_PER_THREAD;
+   return (work_reg_count <= 32)
+             ? MALI_SHADER_REGISTER_ALLOCATION_32_PER_THREAD
+             : MALI_SHADER_REGISTER_ALLOCATION_64_PER_THREAD;
 }
 #endif
 
 static inline enum mali_depth_source
 pan_depth_source(const struct pan_shader_info *info)
 {
-        return info->fs.writes_depth ? MALI_DEPTH_SOURCE_SHADER :
-                                       MALI_DEPTH_SOURCE_FIXED_FUNCTION;
+   return info->fs.writes_depth ? MALI_DEPTH_SOURCE_SHADER
+                                : MALI_DEPTH_SOURCE_FIXED_FUNCTION;
 }
 
 #if PAN_ARCH <= 7
@@ -87,24 +85,22 @@ static inline void
 pan_shader_prepare_midgard_rsd(const struct pan_shader_info *info,
                                struct MALI_RENDERER_STATE *rsd)
 {
-        assert((info->push.count & 3) == 0);
+   assert((info->push.count & 3) == 0);
 
-        rsd->properties.uniform_count = info->push.count / 4;
-        rsd->properties.shader_has_side_effects = info->writes_global;
-        rsd->properties.fp_mode = MALI_FP_MODE_GL_INF_NAN_ALLOWED;
+   rsd->properties.uniform_count = info->push.count / 4;
+   rsd->properties.shader_has_side_effects = info->writes_global;
+   rsd->properties.fp_mode = MALI_FP_MODE_GL_INF_NAN_ALLOWED;
 
-        /* For fragment shaders, work register count, early-z, reads at draw-time */
+   /* For fragment shaders, work register count, early-z, reads at draw-time */
 
-        if (info->stage != MESA_SHADER_FRAGMENT) {
-                rsd->properties.work_register_count = info->work_reg_count;
-        } else {
-                rsd->properties.shader_reads_tilebuffer =
-                        info->fs.outputs_read;
+   if (info->stage != MESA_SHADER_FRAGMENT) {
+      rsd->properties.work_register_count = info->work_reg_count;
+   } else {
+      rsd->properties.shader_reads_tilebuffer = info->fs.outputs_read;
 
-                /* However, forcing early-z in the shader overrides draw-time */
-                rsd->properties.force_early_z =
-                        info->fs.early_fragment_tests;
-        }
+      /* However, forcing early-z in the shader overrides draw-time */
+      rsd->properties.force_early_z = info->fs.early_fragment_tests;
+   }
 }
 
 #else
@@ -112,37 +108,36 @@ pan_shader_prepare_midgard_rsd(const struct pan_shader_info *info,
 #define pan_preloads(reg) (preload & BITFIELD64_BIT(reg))
 
 static void
-pan_make_preload(gl_shader_stage stage,
-                 uint64_t preload,
+pan_make_preload(gl_shader_stage stage, uint64_t preload,
                  struct MALI_PRELOAD *out)
 {
-        switch (stage) {
-        case MESA_SHADER_VERTEX:
-                out->vertex.position_result_address_lo = pan_preloads(58);
-                out->vertex.position_result_address_hi = pan_preloads(59);
-                out->vertex.vertex_id = pan_preloads(61);
-                out->vertex.instance_id = pan_preloads(62);
-                break;
+   switch (stage) {
+   case MESA_SHADER_VERTEX:
+      out->vertex.position_result_address_lo = pan_preloads(58);
+      out->vertex.position_result_address_hi = pan_preloads(59);
+      out->vertex.vertex_id = pan_preloads(61);
+      out->vertex.instance_id = pan_preloads(62);
+      break;
 
-        case MESA_SHADER_FRAGMENT:
-                out->fragment.primitive_id = pan_preloads(57);
-                out->fragment.primitive_flags = pan_preloads(58);
-                out->fragment.fragment_position = pan_preloads(59);
-                out->fragment.sample_mask_id = pan_preloads(61);
-                out->fragment.coverage = true;
-                break;
+   case MESA_SHADER_FRAGMENT:
+      out->fragment.primitive_id = pan_preloads(57);
+      out->fragment.primitive_flags = pan_preloads(58);
+      out->fragment.fragment_position = pan_preloads(59);
+      out->fragment.sample_mask_id = pan_preloads(61);
+      out->fragment.coverage = true;
+      break;
 
-        default:
-                out->compute.local_invocation_xy = pan_preloads(55);
-                out->compute.local_invocation_z = pan_preloads(56);
-                out->compute.work_group_x = pan_preloads(57);
-                out->compute.work_group_y = pan_preloads(58);
-                out->compute.work_group_z = pan_preloads(59);
-                out->compute.global_invocation_x = pan_preloads(60);
-                out->compute.global_invocation_y = pan_preloads(61);
-                out->compute.global_invocation_z = pan_preloads(62);
-                break;
-        }
+   default:
+      out->compute.local_invocation_xy = pan_preloads(55);
+      out->compute.local_invocation_z = pan_preloads(56);
+      out->compute.work_group_x = pan_preloads(57);
+      out->compute.work_group_y = pan_preloads(58);
+      out->compute.work_group_z = pan_preloads(59);
+      out->compute.global_invocation_x = pan_preloads(60);
+      out->compute.global_invocation_y = pan_preloads(61);
+      out->compute.global_invocation_z = pan_preloads(62);
+      break;
+   }
 }
 
 #if PAN_ARCH == 7
@@ -150,25 +145,25 @@ static inline void
 pan_pack_message_preload(struct MALI_MESSAGE_PRELOAD *cfg,
                          const struct bifrost_message_preload *msg)
 {
-        enum mali_message_preload_register_format regfmt = msg->fp16 ?
-                MALI_MESSAGE_PRELOAD_REGISTER_FORMAT_F16 :
-                MALI_MESSAGE_PRELOAD_REGISTER_FORMAT_F32;
+   enum mali_message_preload_register_format regfmt =
+      msg->fp16 ? MALI_MESSAGE_PRELOAD_REGISTER_FORMAT_F16
+                : MALI_MESSAGE_PRELOAD_REGISTER_FORMAT_F32;
 
-        if (msg->enabled && msg->texture) {
-                cfg->type = MALI_MESSAGE_TYPE_VAR_TEX;
-                cfg->var_tex.varying_index = msg->varying_index;
-                cfg->var_tex.texture_index = msg->texture_index;
-                cfg->var_tex.register_format = regfmt;
-                cfg->var_tex.skip = msg->skip;
-                cfg->var_tex.zero_lod = msg->zero_lod;
-        } else if (msg->enabled) {
-                cfg->type = MALI_MESSAGE_TYPE_LD_VAR;
-                cfg->ld_var.varying_index = msg->varying_index;
-                cfg->ld_var.register_format = regfmt;
-                cfg->ld_var.num_components = msg->num_components;
-        } else {
-                cfg->type = MALI_MESSAGE_TYPE_DISABLED;
-        }
+   if (msg->enabled && msg->texture) {
+      cfg->type = MALI_MESSAGE_TYPE_VAR_TEX;
+      cfg->var_tex.varying_index = msg->varying_index;
+      cfg->var_tex.texture_index = msg->texture_index;
+      cfg->var_tex.register_format = regfmt;
+      cfg->var_tex.skip = msg->skip;
+      cfg->var_tex.zero_lod = msg->zero_lod;
+   } else if (msg->enabled) {
+      cfg->type = MALI_MESSAGE_TYPE_LD_VAR;
+      cfg->ld_var.varying_index = msg->varying_index;
+      cfg->ld_var.register_format = regfmt;
+      cfg->ld_var.num_components = msg->num_components;
+   } else {
+      cfg->type = MALI_MESSAGE_TYPE_DISABLED;
+   }
 }
 #endif
 
@@ -176,81 +171,79 @@ static inline void
 pan_shader_prepare_bifrost_rsd(const struct pan_shader_info *info,
                                struct MALI_RENDERER_STATE *rsd)
 {
-        unsigned fau_count = DIV_ROUND_UP(info->push.count, 2);
-        rsd->preload.uniform_count = fau_count;
+   unsigned fau_count = DIV_ROUND_UP(info->push.count, 2);
+   rsd->preload.uniform_count = fau_count;
 
 #if PAN_ARCH >= 7
-        rsd->properties.shader_register_allocation =
-                pan_register_allocation(info->work_reg_count);
+   rsd->properties.shader_register_allocation =
+      pan_register_allocation(info->work_reg_count);
 #endif
 
-        pan_make_preload(info->stage, info->preload, &rsd->preload);
+   pan_make_preload(info->stage, info->preload, &rsd->preload);
 
-        if (info->stage == MESA_SHADER_FRAGMENT) {
-                rsd->properties.shader_modifies_coverage =
-                        info->fs.writes_coverage || info->fs.can_discard;
+   if (info->stage == MESA_SHADER_FRAGMENT) {
+      rsd->properties.shader_modifies_coverage =
+         info->fs.writes_coverage || info->fs.can_discard;
 
-                rsd->properties.allow_forward_pixel_to_be_killed =
-                        !info->writes_global;
+      rsd->properties.allow_forward_pixel_to_be_killed = !info->writes_global;
 
 #if PAN_ARCH >= 7
-                rsd->properties.shader_wait_dependency_6 = info->bifrost.wait_6;
-                rsd->properties.shader_wait_dependency_7 = info->bifrost.wait_7;
+      rsd->properties.shader_wait_dependency_6 = info->bifrost.wait_6;
+      rsd->properties.shader_wait_dependency_7 = info->bifrost.wait_7;
 
-                pan_pack_message_preload(&rsd->message_preload_1, &info->bifrost.messages[0]);
-                pan_pack_message_preload(&rsd->message_preload_2, &info->bifrost.messages[1]);
+      pan_pack_message_preload(&rsd->message_preload_1,
+                               &info->bifrost.messages[0]);
+      pan_pack_message_preload(&rsd->message_preload_2,
+                               &info->bifrost.messages[1]);
 #endif
-        } else if (info->stage == MESA_SHADER_VERTEX && info->vs.secondary_enable) {
-                rsd->secondary_preload.uniform_count = fau_count;
+   } else if (info->stage == MESA_SHADER_VERTEX && info->vs.secondary_enable) {
+      rsd->secondary_preload.uniform_count = fau_count;
 
-                pan_make_preload(info->stage, info->vs.secondary_preload,
-                                 &rsd->secondary_preload);
+      pan_make_preload(info->stage, info->vs.secondary_preload,
+                       &rsd->secondary_preload);
 
-                rsd->secondary_shader = rsd->shader.shader +
-                                        info->vs.secondary_offset;
+      rsd->secondary_shader = rsd->shader.shader + info->vs.secondary_offset;
 
 #if PAN_ARCH >= 7
-                rsd->properties.secondary_shader_register_allocation =
-                        pan_register_allocation(info->vs.secondary_work_reg_count);
+      rsd->properties.secondary_shader_register_allocation =
+         pan_register_allocation(info->vs.secondary_work_reg_count);
 #endif
-        }
+   }
 }
 
 #endif
 
 static inline void
 pan_shader_prepare_rsd(const struct pan_shader_info *shader_info,
-                       mali_ptr shader_ptr,
-                       struct MALI_RENDERER_STATE *rsd)
+                       mali_ptr shader_ptr, struct MALI_RENDERER_STATE *rsd)
 {
 #if PAN_ARCH <= 5
-        shader_ptr |= shader_info->midgard.first_tag;
+   shader_ptr |= shader_info->midgard.first_tag;
 #endif
 
-        rsd->shader.shader = shader_ptr;
-        rsd->shader.attribute_count = shader_info->attribute_count;
-        rsd->shader.varying_count = shader_info->varyings.input_count +
-                                   shader_info->varyings.output_count;
-        rsd->shader.texture_count = shader_info->texture_count;
-        rsd->shader.sampler_count = shader_info->sampler_count;
-        rsd->properties.shader_contains_barrier = shader_info->contains_barrier;
-        rsd->properties.uniform_buffer_count = shader_info->ubo_count;
+   rsd->shader.shader = shader_ptr;
+   rsd->shader.attribute_count = shader_info->attribute_count;
+   rsd->shader.varying_count =
+      shader_info->varyings.input_count + shader_info->varyings.output_count;
+   rsd->shader.texture_count = shader_info->texture_count;
+   rsd->shader.sampler_count = shader_info->sampler_count;
+   rsd->properties.shader_contains_barrier = shader_info->contains_barrier;
+   rsd->properties.uniform_buffer_count = shader_info->ubo_count;
 
-        if (shader_info->stage == MESA_SHADER_FRAGMENT) {
-                rsd->properties.stencil_from_shader =
-                        shader_info->fs.writes_stencil;
-                rsd->properties.depth_source = pan_depth_source(shader_info);
+   if (shader_info->stage == MESA_SHADER_FRAGMENT) {
+      rsd->properties.stencil_from_shader = shader_info->fs.writes_stencil;
+      rsd->properties.depth_source = pan_depth_source(shader_info);
 
-                /* This also needs to be set if the API forces per-sample
-                 * shading, but that'll just got ORed in */
-                rsd->multisample_misc.evaluate_per_sample =
-                        shader_info->fs.sample_shading;
-        }
+      /* This also needs to be set if the API forces per-sample
+       * shading, but that'll just got ORed in */
+      rsd->multisample_misc.evaluate_per_sample =
+         shader_info->fs.sample_shading;
+   }
 
 #if PAN_ARCH >= 6
-        pan_shader_prepare_bifrost_rsd(shader_info, rsd);
+   pan_shader_prepare_bifrost_rsd(shader_info, rsd);
 #else
-        pan_shader_prepare_midgard_rsd(shader_info, rsd);
+   pan_shader_prepare_midgard_rsd(shader_info, rsd);
 #endif
 }
 #endif /* PAN_ARCH */
diff --git a/src/panfrost/lib/pan_texture.c b/src/panfrost/lib/pan_texture.c
index 36e8039e118..19c52c98760 100644
--- a/src/panfrost/lib/pan_texture.c
+++ b/src/panfrost/lib/pan_texture.c
@@ -25,9 +25,9 @@
  *
  */
 
+#include "pan_texture.h"
 #include "util/macros.h"
 #include "util/u_math.h"
-#include "pan_texture.h"
 
 #if PAN_ARCH >= 5
 /*
@@ -38,27 +38,39 @@
 static inline enum mali_astc_2d_dimension
 panfrost_astc_dim_2d(unsigned dim)
 {
-        switch (dim) {
-        case  4: return MALI_ASTC_2D_DIMENSION_4;
-        case  5: return MALI_ASTC_2D_DIMENSION_5;
-        case  6: return MALI_ASTC_2D_DIMENSION_6;
-        case  8: return MALI_ASTC_2D_DIMENSION_8;
-        case 10: return MALI_ASTC_2D_DIMENSION_10;
-        case 12: return MALI_ASTC_2D_DIMENSION_12;
-        default: unreachable("Invalid ASTC dimension");
-        }
+   switch (dim) {
+   case 4:
+      return MALI_ASTC_2D_DIMENSION_4;
+   case 5:
+      return MALI_ASTC_2D_DIMENSION_5;
+   case 6:
+      return MALI_ASTC_2D_DIMENSION_6;
+   case 8:
+      return MALI_ASTC_2D_DIMENSION_8;
+   case 10:
+      return MALI_ASTC_2D_DIMENSION_10;
+   case 12:
+      return MALI_ASTC_2D_DIMENSION_12;
+   default:
+      unreachable("Invalid ASTC dimension");
+   }
 }
 
 static inline enum mali_astc_3d_dimension
 panfrost_astc_dim_3d(unsigned dim)
 {
-        switch (dim) {
-        case  3: return MALI_ASTC_3D_DIMENSION_3;
-        case  4: return MALI_ASTC_3D_DIMENSION_4;
-        case  5: return MALI_ASTC_3D_DIMENSION_5;
-        case  6: return MALI_ASTC_3D_DIMENSION_6;
-        default: unreachable("Invalid ASTC dimension");
-        }
+   switch (dim) {
+   case 3:
+      return MALI_ASTC_3D_DIMENSION_3;
+   case 4:
+      return MALI_ASTC_3D_DIMENSION_4;
+   case 5:
+      return MALI_ASTC_3D_DIMENSION_5;
+   case 6:
+      return MALI_ASTC_3D_DIMENSION_6;
+   default:
+      unreachable("Invalid ASTC dimension");
+   }
 }
 #endif
 
@@ -69,51 +81,50 @@ panfrost_astc_dim_3d(unsigned dim)
 
 static unsigned
 panfrost_compression_tag(const struct util_format_description *desc,
-                         enum mali_texture_dimension dim,
-                         uint64_t modifier)
+                         enum mali_texture_dimension dim, uint64_t modifier)
 {
 #if PAN_ARCH >= 5 && PAN_ARCH <= 8
-        if (drm_is_afbc(modifier)) {
-                unsigned flags = (modifier & AFBC_FORMAT_MOD_YTR) ?
-                                 MALI_AFBC_SURFACE_FLAG_YTR : 0;
+   if (drm_is_afbc(modifier)) {
+      unsigned flags =
+         (modifier & AFBC_FORMAT_MOD_YTR) ? MALI_AFBC_SURFACE_FLAG_YTR : 0;
 
 #if PAN_ARCH >= 6
-                /* Prefetch enable */
-                flags |= MALI_AFBC_SURFACE_FLAG_PREFETCH;
+      /* Prefetch enable */
+      flags |= MALI_AFBC_SURFACE_FLAG_PREFETCH;
 
-                if (panfrost_afbc_is_wide(modifier))
-                        flags |= MALI_AFBC_SURFACE_FLAG_WIDE_BLOCK;
+      if (panfrost_afbc_is_wide(modifier))
+         flags |= MALI_AFBC_SURFACE_FLAG_WIDE_BLOCK;
 #endif
 
 #if PAN_ARCH >= 7
-                /* Tiled headers */
-                if (modifier & AFBC_FORMAT_MOD_TILED)
-                        flags |= MALI_AFBC_SURFACE_FLAG_TILED_HEADER;
+      /* Tiled headers */
+      if (modifier & AFBC_FORMAT_MOD_TILED)
+         flags |= MALI_AFBC_SURFACE_FLAG_TILED_HEADER;
 
-                /* Used to make sure AFBC headers don't point outside the AFBC
-                 * body. HW is using the AFBC surface stride to do this check,
-                 * which doesn't work for 3D textures because the surface
-                 * stride does not cover the body. Only supported on v7+.
-                 */
-                if (dim != MALI_TEXTURE_DIMENSION_3D)
-                        flags |= MALI_AFBC_SURFACE_FLAG_CHECK_PAYLOAD_RANGE;
+      /* Used to make sure AFBC headers don't point outside the AFBC
+       * body. HW is using the AFBC surface stride to do this check,
+       * which doesn't work for 3D textures because the surface
+       * stride does not cover the body. Only supported on v7+.
+       */
+      if (dim != MALI_TEXTURE_DIMENSION_3D)
+         flags |= MALI_AFBC_SURFACE_FLAG_CHECK_PAYLOAD_RANGE;
 #endif
 
-                return flags;
-        } else if (desc->layout == UTIL_FORMAT_LAYOUT_ASTC) {
-                if (desc->block.depth > 1) {
-                        return (panfrost_astc_dim_3d(desc->block.depth) << 4) |
-                               (panfrost_astc_dim_3d(desc->block.height) << 2) |
-                                panfrost_astc_dim_3d(desc->block.width);
-                } else {
-                        return (panfrost_astc_dim_2d(desc->block.height) << 3) |
-                                panfrost_astc_dim_2d(desc->block.width);
-                }
-        }
+      return flags;
+   } else if (desc->layout == UTIL_FORMAT_LAYOUT_ASTC) {
+      if (desc->block.depth > 1) {
+         return (panfrost_astc_dim_3d(desc->block.depth) << 4) |
+                (panfrost_astc_dim_3d(desc->block.height) << 2) |
+                panfrost_astc_dim_3d(desc->block.width);
+      } else {
+         return (panfrost_astc_dim_2d(desc->block.height) << 3) |
+                panfrost_astc_dim_2d(desc->block.width);
+      }
+   }
 #endif
 
-        /* Tags are not otherwise used */
-        return 0;
+   /* Tags are not otherwise used */
+   return 0;
 }
 
 /* Cubemaps have 6 faces as "layers" in between each actual layer. We
@@ -121,38 +132,37 @@ panfrost_compression_tag(const struct util_format_description *desc,
  * can they happen, perhaps from cubemap arrays? */
 
 static void
-panfrost_adjust_cube_dimensions(
-                unsigned *first_face, unsigned *last_face,
-                unsigned *first_layer, unsigned *last_layer)
+panfrost_adjust_cube_dimensions(unsigned *first_face, unsigned *last_face,
+                                unsigned *first_layer, unsigned *last_layer)
 {
-        *first_face = *first_layer % 6;
-        *last_face = *last_layer % 6;
-        *first_layer /= 6;
-        *last_layer /= 6;
+   *first_face = *first_layer % 6;
+   *last_face = *last_layer % 6;
+   *first_layer /= 6;
+   *last_layer /= 6;
 
-        assert((*first_layer == *last_layer) || (*first_face == 0 && *last_face == 5));
+   assert((*first_layer == *last_layer) ||
+          (*first_face == 0 && *last_face == 5));
 }
 
 /* Following the texture descriptor is a number of descriptors. How many? */
 
 static unsigned
-panfrost_texture_num_elements(
-                unsigned first_level, unsigned last_level,
-                unsigned first_layer, unsigned last_layer,
-                unsigned nr_samples, bool is_cube)
+panfrost_texture_num_elements(unsigned first_level, unsigned last_level,
+                              unsigned first_layer, unsigned last_layer,
+                              unsigned nr_samples, bool is_cube)
 {
-        unsigned first_face  = 0, last_face = 0;
+   unsigned first_face = 0, last_face = 0;
 
-        if (is_cube) {
-                panfrost_adjust_cube_dimensions(&first_face, &last_face,
-                                &first_layer, &last_layer);
-        }
+   if (is_cube) {
+      panfrost_adjust_cube_dimensions(&first_face, &last_face, &first_layer,
+                                      &last_layer);
+   }
 
-        unsigned levels = 1 + last_level - first_level;
-        unsigned layers = 1 + last_layer - first_layer;
-        unsigned faces  = 1 + last_face  - first_face;
+   unsigned levels = 1 + last_level - first_level;
+   unsigned layers = 1 + last_layer - first_layer;
+   unsigned faces = 1 + last_face - first_face;
 
-        return levels * layers * faces * MAX2(nr_samples, 1);
+   return levels * layers * faces * MAX2(nr_samples, 1);
 }
 
 /* Conservative estimate of the size of the texture payload a priori.
@@ -165,26 +175,25 @@ unsigned
 GENX(panfrost_estimate_texture_payload_size)(const struct pan_image_view *iview)
 {
 #if PAN_ARCH >= 9
-        size_t element_size = pan_size(PLANE);
+   size_t element_size = pan_size(PLANE);
 #else
-        /* Assume worst case. Overestimates on Midgard, but that's ok. */
-        size_t element_size = pan_size(SURFACE_WITH_STRIDE);
+   /* Assume worst case. Overestimates on Midgard, but that's ok. */
+   size_t element_size = pan_size(SURFACE_WITH_STRIDE);
 #endif
 
-        unsigned elements =
-                panfrost_texture_num_elements(iview->first_level, iview->last_level,
-                                              iview->first_layer, iview->last_layer,
-                                              iview->image->layout.nr_samples,
-                                              iview->dim == MALI_TEXTURE_DIMENSION_CUBE);
+   unsigned elements = panfrost_texture_num_elements(
+      iview->first_level, iview->last_level, iview->first_layer,
+      iview->last_layer, iview->image->layout.nr_samples,
+      iview->dim == MALI_TEXTURE_DIMENSION_CUBE);
 
-        return element_size * elements;
+   return element_size * elements;
 }
 
 struct panfrost_surface_iter {
-        unsigned layer, last_layer;
-        unsigned level, first_level, last_level;
-        unsigned face, first_face, last_face;
-        unsigned sample, first_sample, last_sample;
+   unsigned layer, last_layer;
+   unsigned level, first_level, last_level;
+   unsigned face, first_face, last_face;
+   unsigned sample, first_sample, last_sample;
 };
 
 static void
@@ -194,83 +203,81 @@ panfrost_surface_iter_begin(struct panfrost_surface_iter *iter,
                             unsigned first_face, unsigned last_face,
                             unsigned nr_samples)
 {
-        iter->layer = first_layer;
-        iter->last_layer = last_layer;
-        iter->level = iter->first_level = first_level;
-        iter->last_level = last_level;
-        iter->face = iter->first_face = first_face;
-        iter->last_face = last_face;
-        iter->sample = iter->first_sample = 0;
-        iter->last_sample = nr_samples - 1;
+   iter->layer = first_layer;
+   iter->last_layer = last_layer;
+   iter->level = iter->first_level = first_level;
+   iter->last_level = last_level;
+   iter->face = iter->first_face = first_face;
+   iter->last_face = last_face;
+   iter->sample = iter->first_sample = 0;
+   iter->last_sample = nr_samples - 1;
 }
 
 static bool
 panfrost_surface_iter_end(const struct panfrost_surface_iter *iter)
 {
-        return iter->layer > iter->last_layer;
+   return iter->layer > iter->last_layer;
 }
 
 static void
 panfrost_surface_iter_next(struct panfrost_surface_iter *iter)
 {
-#define INC_TEST(field) \
-        do { \
-                if (iter->field++ < iter->last_ ## field) \
-                       return; \
-                iter->field = iter->first_ ## field; \
-        } while (0)
+#define INC_TEST(field)                                                        \
+   do {                                                                        \
+      if (iter->field++ < iter->last_##field)                                  \
+         return;                                                               \
+      iter->field = iter->first_##field;                                       \
+   } while (0)
 
-        /* Ordering is different on v7: inner loop is iterating on levels */
-        if (PAN_ARCH >= 7)
-                INC_TEST(level);
+   /* Ordering is different on v7: inner loop is iterating on levels */
+   if (PAN_ARCH >= 7)
+      INC_TEST(level);
 
-        INC_TEST(sample);
-        INC_TEST(face);
+   INC_TEST(sample);
+   INC_TEST(face);
 
-        if (PAN_ARCH < 7)
-                INC_TEST(level);
+   if (PAN_ARCH < 7)
+      INC_TEST(level);
 
-        iter->layer++;
+   iter->layer++;
 
 #undef INC_TEST
 }
 
 static void
-panfrost_get_surface_strides(const struct pan_image_layout *layout,
-                             unsigned l,
+panfrost_get_surface_strides(const struct pan_image_layout *layout, unsigned l,
                              int32_t *row_stride, int32_t *surf_stride)
 {
-        const struct pan_image_slice_layout *slice = &layout->slices[l];
+   const struct pan_image_slice_layout *slice = &layout->slices[l];
 
-        if (drm_is_afbc(layout->modifier)) {
-                /* Pre v7 don't have a row stride field. This field is
-                 * repurposed as a Y offset which we don't use */
-                *row_stride = PAN_ARCH < 7 ? 0 : slice->row_stride;
-                *surf_stride = slice->afbc.surface_stride;
-        } else {
-                *row_stride = slice->row_stride;
-                *surf_stride = slice->surface_stride;
-        }
+   if (drm_is_afbc(layout->modifier)) {
+      /* Pre v7 don't have a row stride field. This field is
+       * repurposed as a Y offset which we don't use */
+      *row_stride = PAN_ARCH < 7 ? 0 : slice->row_stride;
+      *surf_stride = slice->afbc.surface_stride;
+   } else {
+      *row_stride = slice->row_stride;
+      *surf_stride = slice->surface_stride;
+   }
 }
 
 static mali_ptr
 panfrost_get_surface_pointer(const struct pan_image_layout *layout,
-                             enum mali_texture_dimension dim,
-                             mali_ptr base,
+                             enum mali_texture_dimension dim, mali_ptr base,
                              unsigned l, unsigned w, unsigned f, unsigned s)
 {
-        unsigned face_mult = dim == MALI_TEXTURE_DIMENSION_CUBE ? 6 : 1;
-        unsigned offset;
+   unsigned face_mult = dim == MALI_TEXTURE_DIMENSION_CUBE ? 6 : 1;
+   unsigned offset;
 
-        if (layout->dim == MALI_TEXTURE_DIMENSION_3D) {
-                assert(!f && !s);
-                offset = layout->slices[l].offset +
-                         (w * panfrost_get_layer_stride(layout, l));
-        } else {
-                offset = panfrost_texture_offset(layout, l, (w * face_mult) + f, s);
-        }
+   if (layout->dim == MALI_TEXTURE_DIMENSION_3D) {
+      assert(!f && !s);
+      offset =
+         layout->slices[l].offset + (w * panfrost_get_layer_stride(layout, l));
+   } else {
+      offset = panfrost_texture_offset(layout, l, (w * face_mult) + f, s);
+   }
 
-        return base + offset;
+   return base + offset;
 }
 
 #if PAN_ARCH >= 9
@@ -323,185 +330,191 @@ static enum mali_clump_format special_clump_formats[PIPE_FORMAT_COUNT] = {
 static enum mali_clump_format
 panfrost_clump_format(enum pipe_format format)
 {
-        /* First, try a special clump format. Note that the 0 encoding is for a
-         * raw clump format, which will never be in the special table.
-         */
-        if (special_clump_formats[format])
-                return special_clump_formats[format];
+   /* First, try a special clump format. Note that the 0 encoding is for a
+    * raw clump format, which will never be in the special table.
+    */
+   if (special_clump_formats[format])
+      return special_clump_formats[format];
 
-        /* Else, it's a raw format. Raw formats must not be compressed. */
-        assert(!util_format_is_compressed(format));
+   /* Else, it's a raw format. Raw formats must not be compressed. */
+   assert(!util_format_is_compressed(format));
 
-        /* Select the appropriate raw format. */
-        switch (util_format_get_blocksize(format)) {
-        case  1: return MALI_CLUMP_FORMAT_RAW8;
-        case  2: return MALI_CLUMP_FORMAT_RAW16;
-        case  3: return MALI_CLUMP_FORMAT_RAW24;
-        case  4: return MALI_CLUMP_FORMAT_RAW32;
-        case  6: return MALI_CLUMP_FORMAT_RAW48;
-        case  8: return MALI_CLUMP_FORMAT_RAW64;
-        case 12: return MALI_CLUMP_FORMAT_RAW96;
-        case 16: return MALI_CLUMP_FORMAT_RAW128;
-        default: unreachable("Invalid bpp");
-        }
+   /* Select the appropriate raw format. */
+   switch (util_format_get_blocksize(format)) {
+   case 1:
+      return MALI_CLUMP_FORMAT_RAW8;
+   case 2:
+      return MALI_CLUMP_FORMAT_RAW16;
+   case 3:
+      return MALI_CLUMP_FORMAT_RAW24;
+   case 4:
+      return MALI_CLUMP_FORMAT_RAW32;
+   case 6:
+      return MALI_CLUMP_FORMAT_RAW48;
+   case 8:
+      return MALI_CLUMP_FORMAT_RAW64;
+   case 12:
+      return MALI_CLUMP_FORMAT_RAW96;
+   case 16:
+      return MALI_CLUMP_FORMAT_RAW128;
+   default:
+      unreachable("Invalid bpp");
+   }
 }
 
 static enum mali_afbc_superblock_size
 translate_superblock_size(uint64_t modifier)
 {
-        assert(drm_is_afbc(modifier));
+   assert(drm_is_afbc(modifier));
 
-        switch (modifier & AFBC_FORMAT_MOD_BLOCK_SIZE_MASK) {
-        case AFBC_FORMAT_MOD_BLOCK_SIZE_16x16:
-                return MALI_AFBC_SUPERBLOCK_SIZE_16X16;
-        case AFBC_FORMAT_MOD_BLOCK_SIZE_32x8:
-                return MALI_AFBC_SUPERBLOCK_SIZE_32X8;
-        case AFBC_FORMAT_MOD_BLOCK_SIZE_64x4:
-                return MALI_AFBC_SUPERBLOCK_SIZE_64X4;
-        default:
-                unreachable("Invalid superblock size");
-        }
+   switch (modifier & AFBC_FORMAT_MOD_BLOCK_SIZE_MASK) {
+   case AFBC_FORMAT_MOD_BLOCK_SIZE_16x16:
+      return MALI_AFBC_SUPERBLOCK_SIZE_16X16;
+   case AFBC_FORMAT_MOD_BLOCK_SIZE_32x8:
+      return MALI_AFBC_SUPERBLOCK_SIZE_32X8;
+   case AFBC_FORMAT_MOD_BLOCK_SIZE_64x4:
+      return MALI_AFBC_SUPERBLOCK_SIZE_64X4;
+   default:
+      unreachable("Invalid superblock size");
+   }
 }
 
 static void
 panfrost_emit_plane(const struct pan_image_layout *layout,
-                    enum pipe_format format,
-                    mali_ptr pointer,
-                    unsigned level,
+                    enum pipe_format format, mali_ptr pointer, unsigned level,
                     void *payload)
 {
-        const struct util_format_description *desc =
-                util_format_description(layout->format);
+   const struct util_format_description *desc =
+      util_format_description(layout->format);
 
-        int32_t row_stride, surface_stride;
+   int32_t row_stride, surface_stride;
 
-        panfrost_get_surface_strides(layout, level, &row_stride, &surface_stride);
-        assert(row_stride >= 0 && surface_stride >= 0 && "negative stride");
+   panfrost_get_surface_strides(layout, level, &row_stride, &surface_stride);
+   assert(row_stride >= 0 && surface_stride >= 0 && "negative stride");
 
-        bool afbc = drm_is_afbc(layout->modifier);
+   bool afbc = drm_is_afbc(layout->modifier);
 
-        pan_pack(payload, PLANE, cfg) {
-                cfg.pointer = pointer;
-                cfg.row_stride = row_stride;
-                cfg.size = layout->data_size - layout->slices[level].offset;
+   pan_pack(payload, PLANE, cfg) {
+      cfg.pointer = pointer;
+      cfg.row_stride = row_stride;
+      cfg.size = layout->data_size - layout->slices[level].offset;
 
-                cfg.slice_stride = layout->nr_samples ?
-                                   layout->slices[level].surface_stride :
-                                   panfrost_get_layer_stride(layout, level);
+      cfg.slice_stride = layout->nr_samples
+                            ? layout->slices[level].surface_stride
+                            : panfrost_get_layer_stride(layout, level);
 
-                if (desc->layout == UTIL_FORMAT_LAYOUT_ASTC) {
-                        assert(!afbc);
+      if (desc->layout == UTIL_FORMAT_LAYOUT_ASTC) {
+         assert(!afbc);
 
-                        if (desc->block.depth > 1) {
-                                cfg.plane_type = MALI_PLANE_TYPE_ASTC_3D;
-                                cfg.astc._3d.block_width = panfrost_astc_dim_3d(desc->block.width);
-                                cfg.astc._3d.block_height = panfrost_astc_dim_3d(desc->block.height);
-                                cfg.astc._3d.block_depth = panfrost_astc_dim_3d(desc->block.depth);
-                        } else {
-                                cfg.plane_type = MALI_PLANE_TYPE_ASTC_2D;
-                                cfg.astc._2d.block_width = panfrost_astc_dim_2d(desc->block.width);
-                                cfg.astc._2d.block_height = panfrost_astc_dim_2d(desc->block.height);
-                        }
+         if (desc->block.depth > 1) {
+            cfg.plane_type = MALI_PLANE_TYPE_ASTC_3D;
+            cfg.astc._3d.block_width = panfrost_astc_dim_3d(desc->block.width);
+            cfg.astc._3d.block_height =
+               panfrost_astc_dim_3d(desc->block.height);
+            cfg.astc._3d.block_depth = panfrost_astc_dim_3d(desc->block.depth);
+         } else {
+            cfg.plane_type = MALI_PLANE_TYPE_ASTC_2D;
+            cfg.astc._2d.block_width = panfrost_astc_dim_2d(desc->block.width);
+            cfg.astc._2d.block_height =
+               panfrost_astc_dim_2d(desc->block.height);
+         }
 
-                        bool srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB);
+         bool srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB);
 
-                        /* Mesa does not advertise _HDR formats yet */
-                        cfg.astc.decode_hdr = false;
+         /* Mesa does not advertise _HDR formats yet */
+         cfg.astc.decode_hdr = false;
 
-                        /* sRGB formats decode to RGBA8 sRGB, which is narrow.
-                         *
-                         * Non-sRGB formats decode to RGBA16F which is wide.
-                         * With a future extension, we could decode non-sRGB
-                         * formats narrowly too, but this isn't wired up in Mesa
-                         * yet.
-                         */
-                        cfg.astc.decode_wide = !srgb;
-                } else if (afbc) {
-                        cfg.plane_type = MALI_PLANE_TYPE_AFBC;
-                        cfg.afbc.superblock_size = translate_superblock_size(layout->modifier);
-                        cfg.afbc.ytr = (layout->modifier & AFBC_FORMAT_MOD_YTR);
-                        cfg.afbc.tiled_header = (layout->modifier & AFBC_FORMAT_MOD_TILED);
-                        cfg.afbc.prefetch = true;
-                        cfg.afbc.compression_mode = pan_afbc_compression_mode(format);
-                        cfg.afbc.header_stride = layout->slices[level].afbc.header_size;
-                } else {
-                        cfg.plane_type = MALI_PLANE_TYPE_GENERIC;
-                        cfg.clump_format = panfrost_clump_format(format);
-                }
+         /* sRGB formats decode to RGBA8 sRGB, which is narrow.
+          *
+          * Non-sRGB formats decode to RGBA16F which is wide.
+          * With a future extension, we could decode non-sRGB
+          * formats narrowly too, but this isn't wired up in Mesa
+          * yet.
+          */
+         cfg.astc.decode_wide = !srgb;
+      } else if (afbc) {
+         cfg.plane_type = MALI_PLANE_TYPE_AFBC;
+         cfg.afbc.superblock_size = translate_superblock_size(layout->modifier);
+         cfg.afbc.ytr = (layout->modifier & AFBC_FORMAT_MOD_YTR);
+         cfg.afbc.tiled_header = (layout->modifier & AFBC_FORMAT_MOD_TILED);
+         cfg.afbc.prefetch = true;
+         cfg.afbc.compression_mode = pan_afbc_compression_mode(format);
+         cfg.afbc.header_stride = layout->slices[level].afbc.header_size;
+      } else {
+         cfg.plane_type = MALI_PLANE_TYPE_GENERIC;
+         cfg.clump_format = panfrost_clump_format(format);
+      }
 
-                if (!afbc && layout->modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED)
-                        cfg.clump_ordering = MALI_CLUMP_ORDERING_TILED_U_INTERLEAVED;
-                else if (!afbc)
-                        cfg.clump_ordering = MALI_CLUMP_ORDERING_LINEAR;
-        }
+      if (!afbc &&
+          layout->modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED)
+         cfg.clump_ordering = MALI_CLUMP_ORDERING_TILED_U_INTERLEAVED;
+      else if (!afbc)
+         cfg.clump_ordering = MALI_CLUMP_ORDERING_LINEAR;
+   }
 }
 #endif
 
 static void
 panfrost_emit_texture_payload(const struct pan_image_view *iview,
-                              enum pipe_format format,
-                              void *payload)
+                              enum pipe_format format, void *payload)
 {
-        const struct pan_image_layout *layout = &iview->image->layout;
-        ASSERTED const struct util_format_description *desc =
-                util_format_description(format);
+   const struct pan_image_layout *layout = &iview->image->layout;
+   ASSERTED const struct util_format_description *desc =
+      util_format_description(format);
 
-        mali_ptr base = iview->image->data.bo->ptr.gpu + iview->image->data.offset;
+   mali_ptr base = iview->image->data.bo->ptr.gpu + iview->image->data.offset;
 
-        if (iview->buf.size) {
-                assert (iview->dim == MALI_TEXTURE_DIMENSION_1D);
-                base += iview->buf.offset;
-        }
+   if (iview->buf.size) {
+      assert(iview->dim == MALI_TEXTURE_DIMENSION_1D);
+      base += iview->buf.offset;
+   }
 
-        /* panfrost_compression_tag() wants the dimension of the resource, not the
-         * one of the image view (those might differ).
-         */
-        base |= panfrost_compression_tag(desc, layout->dim, layout->modifier);
+   /* panfrost_compression_tag() wants the dimension of the resource, not the
+    * one of the image view (those might differ).
+    */
+   base |= panfrost_compression_tag(desc, layout->dim, layout->modifier);
 
-        /* v4 does not support compression */
-        assert(PAN_ARCH >= 5 || !drm_is_afbc(layout->modifier));
-        assert(PAN_ARCH >= 5 || desc->layout != UTIL_FORMAT_LAYOUT_ASTC);
+   /* v4 does not support compression */
+   assert(PAN_ARCH >= 5 || !drm_is_afbc(layout->modifier));
+   assert(PAN_ARCH >= 5 || desc->layout != UTIL_FORMAT_LAYOUT_ASTC);
 
-        /* Inject the addresses in, interleaving array indices, mip levels,
-         * cube faces, and strides in that order. On Bifrost and older, each
-         * sample had its own surface descriptor; on Valhall, they are fused
-         * into a single plane descriptor.
-         */
+   /* Inject the addresses in, interleaving array indices, mip levels,
+    * cube faces, and strides in that order. On Bifrost and older, each
+    * sample had its own surface descriptor; on Valhall, they are fused
+    * into a single plane descriptor.
+    */
 
-        unsigned first_layer = iview->first_layer, last_layer = iview->last_layer;
-        unsigned nr_samples = PAN_ARCH <= 7 ? layout->nr_samples : 1;
-        unsigned first_face = 0, last_face = 0;
+   unsigned first_layer = iview->first_layer, last_layer = iview->last_layer;
+   unsigned nr_samples = PAN_ARCH <= 7 ? layout->nr_samples : 1;
+   unsigned first_face = 0, last_face = 0;
 
-        if (iview->dim == MALI_TEXTURE_DIMENSION_CUBE) {
-                panfrost_adjust_cube_dimensions(&first_face, &last_face,
-                                                &first_layer, &last_layer);
-        }
+   if (iview->dim == MALI_TEXTURE_DIMENSION_CUBE) {
+      panfrost_adjust_cube_dimensions(&first_face, &last_face, &first_layer,
+                                      &last_layer);
+   }
 
-        struct panfrost_surface_iter iter;
+   struct panfrost_surface_iter iter;
 
-        for (panfrost_surface_iter_begin(&iter, first_layer, last_layer,
-                                         iview->first_level, iview->last_level,
-                                         first_face, last_face, nr_samples);
-             !panfrost_surface_iter_end(&iter);
-             panfrost_surface_iter_next(&iter)) {
-                mali_ptr pointer =
-                        panfrost_get_surface_pointer(layout, iview->dim, base,
-                                                     iter.level, iter.layer,
-                                                     iter.face, iter.sample);
+   for (panfrost_surface_iter_begin(&iter, first_layer, last_layer,
+                                    iview->first_level, iview->last_level,
+                                    first_face, last_face, nr_samples);
+        !panfrost_surface_iter_end(&iter); panfrost_surface_iter_next(&iter)) {
+      mali_ptr pointer =
+         panfrost_get_surface_pointer(layout, iview->dim, base, iter.level,
+                                      iter.layer, iter.face, iter.sample);
 
 #if PAN_ARCH >= 9
-                panfrost_emit_plane(layout, format, pointer, iter.level, payload);
-                payload += pan_size(PLANE);
+      panfrost_emit_plane(layout, format, pointer, iter.level, payload);
+      payload += pan_size(PLANE);
 #else
-                pan_pack(payload, SURFACE_WITH_STRIDE, cfg) {
-                        cfg.pointer = pointer;
-                        panfrost_get_surface_strides(layout, iter.level,
-                                                     &cfg.row_stride,
-                                                     &cfg.surface_stride);
-                }
-                payload += pan_size(SURFACE_WITH_STRIDE);
+      pan_pack(payload, SURFACE_WITH_STRIDE, cfg) {
+         cfg.pointer = pointer;
+         panfrost_get_surface_strides(layout, iter.level, &cfg.row_stride,
+                                      &cfg.surface_stride);
+      }
+      payload += pan_size(SURFACE_WITH_STRIDE);
 #endif
-        }
+   }
 }
 
 #if PAN_ARCH <= 7
@@ -510,14 +523,14 @@ panfrost_emit_texture_payload(const struct pan_image_view *iview,
 static enum mali_texture_layout
 panfrost_modifier_to_layout(uint64_t modifier)
 {
-        if (drm_is_afbc(modifier))
-                return MALI_TEXTURE_LAYOUT_AFBC;
-        else if (modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED)
-                return MALI_TEXTURE_LAYOUT_TILED;
-        else if (modifier == DRM_FORMAT_MOD_LINEAR)
-                return MALI_TEXTURE_LAYOUT_LINEAR;
-        else
-                unreachable("Invalid modifer");
+   if (drm_is_afbc(modifier))
+      return MALI_TEXTURE_LAYOUT_AFBC;
+   else if (modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED)
+      return MALI_TEXTURE_LAYOUT_TILED;
+   else if (modifier == DRM_FORMAT_MOD_LINEAR)
+      return MALI_TEXTURE_LAYOUT_LINEAR;
+   else
+      unreachable("Invalid modifer");
 }
 #endif
 
@@ -532,103 +545,99 @@ panfrost_modifier_to_layout(uint64_t modifier)
  */
 void
 GENX(panfrost_new_texture)(const struct panfrost_device *dev,
-                           const struct pan_image_view *iview,
-                           void *out, const struct panfrost_ptr *payload)
+                           const struct pan_image_view *iview, void *out,
+                           const struct panfrost_ptr *payload)
 {
-        const struct pan_image_layout *layout = &iview->image->layout;
-        enum pipe_format format = iview->format;
-        uint32_t mali_format = dev->formats[format].hw;
-        unsigned char swizzle[4];
+   const struct pan_image_layout *layout = &iview->image->layout;
+   enum pipe_format format = iview->format;
+   uint32_t mali_format = dev->formats[format].hw;
+   unsigned char swizzle[4];
 
-        if (PAN_ARCH >= 7 && util_format_is_depth_or_stencil(format)) {
-                /* v7+ doesn't have an _RRRR component order, combine the
-                 * user swizzle with a .XXXX swizzle to emulate that.
-                 */
-                static const unsigned char replicate_x[4] = {
-                        PIPE_SWIZZLE_X, PIPE_SWIZZLE_X,
-                        PIPE_SWIZZLE_X, PIPE_SWIZZLE_X,
-                };
+   if (PAN_ARCH >= 7 && util_format_is_depth_or_stencil(format)) {
+      /* v7+ doesn't have an _RRRR component order, combine the
+       * user swizzle with a .XXXX swizzle to emulate that.
+       */
+      static const unsigned char replicate_x[4] = {
+         PIPE_SWIZZLE_X,
+         PIPE_SWIZZLE_X,
+         PIPE_SWIZZLE_X,
+         PIPE_SWIZZLE_X,
+      };
 
-                util_format_compose_swizzles(replicate_x,
-                                             iview->swizzle,
-                                             swizzle);
-        } else if (PAN_ARCH == 7) {
+      util_format_compose_swizzles(replicate_x, iview->swizzle, swizzle);
+   } else if (PAN_ARCH == 7) {
 #if PAN_ARCH == 7
-                /* v7 (only) restricts component orders when AFBC is in use.
-                 * Rather than restrict AFBC, we use an allowed component order
-                 * with an invertible swizzle composed.
-                 */
-                enum mali_rgb_component_order orig =
-                        mali_format & BITFIELD_MASK(12);
-                struct pan_decomposed_swizzle decomposed =
-                        GENX(pan_decompose_swizzle)(orig);
+      /* v7 (only) restricts component orders when AFBC is in use.
+       * Rather than restrict AFBC, we use an allowed component order
+       * with an invertible swizzle composed.
+       */
+      enum mali_rgb_component_order orig = mali_format & BITFIELD_MASK(12);
+      struct pan_decomposed_swizzle decomposed =
+         GENX(pan_decompose_swizzle)(orig);
 
-                /* Apply the new component order */
-                mali_format = (mali_format & ~orig) | decomposed.pre;
+      /* Apply the new component order */
+      mali_format = (mali_format & ~orig) | decomposed.pre;
 
-                /* Compose the new swizzle */
-                util_format_compose_swizzles(decomposed.post, iview->swizzle,
-                                             swizzle);
+      /* Compose the new swizzle */
+      util_format_compose_swizzles(decomposed.post, iview->swizzle, swizzle);
 #endif
-        } else {
-                STATIC_ASSERT(sizeof(swizzle) == sizeof(iview->swizzle));
-                memcpy(swizzle, iview->swizzle, sizeof(swizzle));
-        }
+   } else {
+      STATIC_ASSERT(sizeof(swizzle) == sizeof(iview->swizzle));
+      memcpy(swizzle, iview->swizzle, sizeof(swizzle));
+   }
 
-        panfrost_emit_texture_payload(iview, format, payload->cpu);
+   panfrost_emit_texture_payload(iview, format, payload->cpu);
 
-        unsigned array_size = iview->last_layer - iview->first_layer + 1;
+   unsigned array_size = iview->last_layer - iview->first_layer + 1;
 
-        if (iview->dim == MALI_TEXTURE_DIMENSION_CUBE) {
-                assert(iview->first_layer % 6 == 0);
-                assert(iview->last_layer % 6 == 5);
-                array_size /=  6;
-        }
+   if (iview->dim == MALI_TEXTURE_DIMENSION_CUBE) {
+      assert(iview->first_layer % 6 == 0);
+      assert(iview->last_layer % 6 == 5);
+      array_size /= 6;
+   }
 
-        unsigned width;
+   unsigned width;
 
-        if (iview->buf.size) {
-                assert(iview->dim == MALI_TEXTURE_DIMENSION_1D);
-                assert(!iview->first_level && !iview->last_level);
-                assert(!iview->first_layer && !iview->last_layer);
-                assert(layout->nr_samples == 1);
-                assert(layout->height == 1 && layout->depth == 1);
-                assert(iview->buf.offset + iview->buf.size <= layout->width);
-                width = iview->buf.size;
-        } else {
-                width = u_minify(layout->width, iview->first_level);
-        }
+   if (iview->buf.size) {
+      assert(iview->dim == MALI_TEXTURE_DIMENSION_1D);
+      assert(!iview->first_level && !iview->last_level);
+      assert(!iview->first_layer && !iview->last_layer);
+      assert(layout->nr_samples == 1);
+      assert(layout->height == 1 && layout->depth == 1);
+      assert(iview->buf.offset + iview->buf.size <= layout->width);
+      width = iview->buf.size;
+   } else {
+      width = u_minify(layout->width, iview->first_level);
+   }
 
-        pan_pack(out, TEXTURE, cfg) {
-                cfg.dimension = iview->dim;
-                cfg.format = mali_format;
-                cfg.width = width;
-                cfg.height = u_minify(layout->height, iview->first_level);
-                if (iview->dim == MALI_TEXTURE_DIMENSION_3D)
-                        cfg.depth = u_minify(layout->depth, iview->first_level);
-                else
-                        cfg.sample_count = layout->nr_samples;
-                cfg.swizzle = panfrost_translate_swizzle_4(swizzle);
+   pan_pack(out, TEXTURE, cfg) {
+      cfg.dimension = iview->dim;
+      cfg.format = mali_format;
+      cfg.width = width;
+      cfg.height = u_minify(layout->height, iview->first_level);
+      if (iview->dim == MALI_TEXTURE_DIMENSION_3D)
+         cfg.depth = u_minify(layout->depth, iview->first_level);
+      else
+         cfg.sample_count = layout->nr_samples;
+      cfg.swizzle = panfrost_translate_swizzle_4(swizzle);
 #if PAN_ARCH >= 9
-                cfg.texel_interleave =
-                        (layout->modifier != DRM_FORMAT_MOD_LINEAR) ||
-                        util_format_is_compressed(format);
+      cfg.texel_interleave = (layout->modifier != DRM_FORMAT_MOD_LINEAR) ||
+                             util_format_is_compressed(format);
 #else
-                cfg.texel_ordering =
-                        panfrost_modifier_to_layout(layout->modifier);
+      cfg.texel_ordering = panfrost_modifier_to_layout(layout->modifier);
 #endif
-                cfg.levels = iview->last_level - iview->first_level + 1;
-                cfg.array_size = array_size;
+      cfg.levels = iview->last_level - iview->first_level + 1;
+      cfg.array_size = array_size;
 
 #if PAN_ARCH >= 6
-                cfg.surfaces = payload->gpu;
+      cfg.surfaces = payload->gpu;
 
-                /* We specify API-level LOD clamps in the sampler descriptor
-                 * and use these clamps simply for bounds checking */
-                cfg.minimum_lod = FIXED_16(0, false);
-                cfg.maximum_lod = FIXED_16(cfg.levels - 1, false);
+      /* We specify API-level LOD clamps in the sampler descriptor
+       * and use these clamps simply for bounds checking */
+      cfg.minimum_lod = FIXED_16(0, false);
+      cfg.maximum_lod = FIXED_16(cfg.levels - 1, false);
 #else
-                cfg.manual_stride = true;
+      cfg.manual_stride = true;
 #endif
-        }
+   }
 }
diff --git a/src/panfrost/lib/pan_texture.h b/src/panfrost/lib/pan_texture.h
index e6768ef80cf..a5b391e7afd 100644
--- a/src/panfrost/lib/pan_texture.h
+++ b/src/panfrost/lib/pan_texture.h
@@ -31,14 +31,14 @@
 #include "genxml/gen_macros.h"
 
 #include <stdbool.h>
-#include "drm-uapi/drm_fourcc.h"
-#include "util/format/u_format.h"
 #include "compiler/shader_enums.h"
+#include "drm-uapi/drm_fourcc.h"
 #include "genxml/gen_macros.h"
+#include "util/format/u_format.h"
 #include "pan_bo.h"
 #include "pan_device.h"
-#include "pan_util.h"
 #include "pan_format.h"
+#include "pan_util.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -48,104 +48,101 @@ extern "C" {
 extern uint64_t pan_best_modifiers[PAN_MODIFIER_COUNT];
 
 struct pan_image_slice_layout {
-        unsigned offset;
+   unsigned offset;
 
-        /* For AFBC images, the number of bytes between two rows of AFBC
-         * headers.
-         *
-         * For non-AFBC images, the number of bytes between two rows of texels.
-         * For linear images, this will equal the logical stride. For
-         * images that are compressed or interleaved, this will be greater than
-         * the logical stride.
-         */
-        unsigned row_stride;
+   /* For AFBC images, the number of bytes between two rows of AFBC
+    * headers.
+    *
+    * For non-AFBC images, the number of bytes between two rows of texels.
+    * For linear images, this will equal the logical stride. For
+    * images that are compressed or interleaved, this will be greater than
+    * the logical stride.
+    */
+   unsigned row_stride;
 
-        unsigned surface_stride;
+   unsigned surface_stride;
 
-        struct {
-                /* Size of the AFBC header preceding each slice */
-                unsigned header_size;
+   struct {
+      /* Size of the AFBC header preceding each slice */
+      unsigned header_size;
 
-                /* Size of the AFBC body */
-                unsigned body_size;
+      /* Size of the AFBC body */
+      unsigned body_size;
 
-                /* Stride between AFBC headers of two consecutive surfaces.
-                 * For 3D textures, this must be set to header size since
-                 * AFBC headers are allocated together, for 2D arrays this
-                 * should be set to size0, since AFBC headers are placed at
-                 * the beginning of each layer
-                 */
-                unsigned surface_stride;
-        } afbc;
+      /* Stride between AFBC headers of two consecutive surfaces.
+       * For 3D textures, this must be set to header size since
+       * AFBC headers are allocated together, for 2D arrays this
+       * should be set to size0, since AFBC headers are placed at
+       * the beginning of each layer
+       */
+      unsigned surface_stride;
+   } afbc;
 
-        /* If checksumming is enabled following the slice, what
-         * is its offset/stride? */
-        struct {
-                unsigned offset;
-                unsigned stride;
-                unsigned size;
-        } crc;
+   /* If checksumming is enabled following the slice, what
+    * is its offset/stride? */
+   struct {
+      unsigned offset;
+      unsigned stride;
+      unsigned size;
+   } crc;
 
-        unsigned size;
+   unsigned size;
 };
 
 struct pan_image_layout {
-        uint64_t modifier;
-        enum pipe_format format;
-        unsigned width, height, depth;
-        unsigned nr_samples;
-        enum mali_texture_dimension dim;
-        unsigned nr_slices;
-        unsigned array_size;
-        bool crc;
+   uint64_t modifier;
+   enum pipe_format format;
+   unsigned width, height, depth;
+   unsigned nr_samples;
+   enum mali_texture_dimension dim;
+   unsigned nr_slices;
+   unsigned array_size;
+   bool crc;
 
-        /* The remaining fields may be derived from the above by calling
-         * pan_image_layout_init
-         */
+   /* The remaining fields may be derived from the above by calling
+    * pan_image_layout_init
+    */
 
-        struct pan_image_slice_layout slices[MAX_MIP_LEVELS];
+   struct pan_image_slice_layout slices[MAX_MIP_LEVELS];
 
-        unsigned data_size;
-        unsigned array_stride;
+   unsigned data_size;
+   unsigned array_stride;
 };
 
 struct pan_image_mem {
-        struct panfrost_bo *bo;
-        unsigned offset;
+   struct panfrost_bo *bo;
+   unsigned offset;
 };
 
 struct pan_image {
-        struct pan_image_mem data;
-        struct pan_image_layout layout;
+   struct pan_image_mem data;
+   struct pan_image_layout layout;
 };
 
 struct pan_image_view {
-        /* Format, dimension and sample count of the view might differ from
-         * those of the image (2D view of a 3D image surface for instance).
-         */
-        enum pipe_format format;
-        enum mali_texture_dimension dim;
-        unsigned first_level, last_level;
-        unsigned first_layer, last_layer;
-        unsigned char swizzle[4];
-        const struct pan_image *image;
+   /* Format, dimension and sample count of the view might differ from
+    * those of the image (2D view of a 3D image surface for instance).
+    */
+   enum pipe_format format;
+   enum mali_texture_dimension dim;
+   unsigned first_level, last_level;
+   unsigned first_layer, last_layer;
+   unsigned char swizzle[4];
+   const struct pan_image *image;
 
-        /* If EXT_multisampled_render_to_texture is used, this may be
-         * greater than image->layout.nr_samples. */
-        unsigned nr_samples;
+   /* If EXT_multisampled_render_to_texture is used, this may be
+    * greater than image->layout.nr_samples. */
+   unsigned nr_samples;
 
-        /* Only valid if dim == 1D, needed to implement buffer views */
-        struct {
-                unsigned offset;
-                unsigned size;
-        } buf;
+   /* Only valid if dim == 1D, needed to implement buffer views */
+   struct {
+      unsigned offset;
+      unsigned size;
+   } buf;
 };
 
-unsigned
-panfrost_compute_checksum_size(
-        struct pan_image_slice_layout *slice,
-        unsigned width,
-        unsigned height);
+unsigned panfrost_compute_checksum_size(struct pan_image_slice_layout *slice,
+                                        unsigned width, unsigned height);
 
 /* AFBC format mode. The ordering is intended to match the Valhall hardware enum
  * ("AFBC Compression Mode"), but this enum is required in software on older
@@ -153,46 +150,42 @@ panfrost_compute_checksum_size(
  * unify these code paths.
  */
 enum pan_afbc_mode {
-        PAN_AFBC_MODE_R8,
-        PAN_AFBC_MODE_R8G8,
-        PAN_AFBC_MODE_R5G6B5,
-        PAN_AFBC_MODE_R4G4B4A4,
-        PAN_AFBC_MODE_R5G5B5A1,
-        PAN_AFBC_MODE_R8G8B8,
-        PAN_AFBC_MODE_R8G8B8A8,
-        PAN_AFBC_MODE_R10G10B10A2,
-        PAN_AFBC_MODE_R11G11B10,
-        PAN_AFBC_MODE_S8,
+   PAN_AFBC_MODE_R8,
+   PAN_AFBC_MODE_R8G8,
+   PAN_AFBC_MODE_R5G6B5,
+   PAN_AFBC_MODE_R4G4B4A4,
+   PAN_AFBC_MODE_R5G5B5A1,
+   PAN_AFBC_MODE_R8G8B8,
+   PAN_AFBC_MODE_R8G8B8A8,
+   PAN_AFBC_MODE_R10G10B10A2,
+   PAN_AFBC_MODE_R11G11B10,
+   PAN_AFBC_MODE_S8,
 
-        /* Sentintel signalling a format that cannot be compressed */
-        PAN_AFBC_MODE_INVALID
+   /* Sentintel signalling a format that cannot be compressed */
+   PAN_AFBC_MODE_INVALID
 };
 
-bool
-panfrost_format_supports_afbc(const struct panfrost_device *dev,
-                enum pipe_format format);
+bool panfrost_format_supports_afbc(const struct panfrost_device *dev,
+                                   enum pipe_format format);
 
-enum pan_afbc_mode
-panfrost_afbc_format(unsigned arch, enum pipe_format format);
+enum pan_afbc_mode panfrost_afbc_format(unsigned arch, enum pipe_format format);
 
 #define AFBC_HEADER_BYTES_PER_TILE 16
 
-bool
-panfrost_afbc_can_ytr(enum pipe_format format);
+bool panfrost_afbc_can_ytr(enum pipe_format format);
 
-bool
-panfrost_afbc_can_tile(const struct panfrost_device *dev);
+bool panfrost_afbc_can_tile(const struct panfrost_device *dev);
 
 /*
  * Represents the block size of a single plane. For AFBC, this represents the
  * superblock size. For u-interleaving, this represents the tile size.
  */
 struct pan_block_size {
-        /** Width of block */
-        unsigned width;
+   /** Width of block */
+   unsigned width;
 
-        /** Height of blocks */
-        unsigned height;
+   /** Height of blocks */
+   unsigned height;
 };
 
 struct pan_block_size panfrost_afbc_superblock_size(uint64_t modifier);
@@ -207,71 +200,63 @@ uint32_t pan_afbc_row_stride(uint64_t modifier, uint32_t width);
 
 uint32_t pan_afbc_stride_blocks(uint64_t modifier, uint32_t row_stride_bytes);
 
-struct pan_block_size
-panfrost_block_size(uint64_t modifier, enum pipe_format format);
+struct pan_block_size panfrost_block_size(uint64_t modifier,
+                                          enum pipe_format format);
 
 #ifdef PAN_ARCH
-unsigned
-GENX(panfrost_estimate_texture_payload_size)(const struct pan_image_view *iview);
+unsigned GENX(panfrost_estimate_texture_payload_size)(
+   const struct pan_image_view *iview);
 
-void
-GENX(panfrost_new_texture)(const struct panfrost_device *dev,
-                           const struct pan_image_view *iview,
-                           void *out,
-                           const struct panfrost_ptr *payload);
+void GENX(panfrost_new_texture)(const struct panfrost_device *dev,
+                                const struct pan_image_view *iview, void *out,
+                                const struct panfrost_ptr *payload);
 #endif
 
-unsigned
-panfrost_get_layer_stride(const struct pan_image_layout *layout,
-                          unsigned level);
+unsigned panfrost_get_layer_stride(const struct pan_image_layout *layout,
+                                   unsigned level);
 
-unsigned
-panfrost_texture_offset(const struct pan_image_layout *layout,
-                        unsigned level, unsigned array_idx,
-                        unsigned surface_idx);
+unsigned panfrost_texture_offset(const struct pan_image_layout *layout,
+                                 unsigned level, unsigned array_idx,
+                                 unsigned surface_idx);
 
 struct pan_pool;
 struct pan_scoreboard;
 
 /* DRM modifier helper */
 
-#define drm_is_afbc(mod) \
-        ((mod >> 52) == (DRM_FORMAT_MOD_ARM_TYPE_AFBC | \
-                (DRM_FORMAT_MOD_VENDOR_ARM << 4)))
+#define drm_is_afbc(mod)                                                       \
+   ((mod >> 52) ==                                                             \
+    (DRM_FORMAT_MOD_ARM_TYPE_AFBC | (DRM_FORMAT_MOD_VENDOR_ARM << 4)))
 
 struct pan_image_explicit_layout {
-        unsigned offset;
-        unsigned row_stride;
+   unsigned offset;
+   unsigned row_stride;
 };
 
 bool
 pan_image_layout_init(struct pan_image_layout *layout,
                       const struct pan_image_explicit_layout *explicit_layout);
 
-unsigned
-panfrost_get_legacy_stride(const struct pan_image_layout *layout,
-                           unsigned level);
+unsigned panfrost_get_legacy_stride(const struct pan_image_layout *layout,
+                                    unsigned level);
 
-unsigned
-panfrost_from_legacy_stride(unsigned legacy_stride,
-                            enum pipe_format format,
-                            uint64_t modifier);
+unsigned panfrost_from_legacy_stride(unsigned legacy_stride,
+                                     enum pipe_format format,
+                                     uint64_t modifier);
 
 struct pan_surface {
-        union {
-                mali_ptr data;
-                struct {
-                        mali_ptr header;
-                        mali_ptr body;
-                } afbc;
-        };
+   union {
+      mali_ptr data;
+      struct {
+         mali_ptr header;
+         mali_ptr body;
+      } afbc;
+   };
 };
 
-void
-pan_iview_get_surface(const struct pan_image_view *iview,
-                      unsigned level, unsigned layer, unsigned sample,
-                      struct pan_surface *surf);
-
+void pan_iview_get_surface(const struct pan_image_view *iview, unsigned level,
+                           unsigned layer, unsigned sample,
+                           struct pan_surface *surf);
 
 #if PAN_ARCH >= 9
 enum mali_afbc_compression_mode
diff --git a/src/panfrost/lib/pan_tiler.c b/src/panfrost/lib/pan_tiler.c
index e8bce5e2a60..2e3126af251 100644
--- a/src/panfrost/lib/pan_tiler.c
+++ b/src/panfrost/lib/pan_tiler.c
@@ -24,8 +24,8 @@
  *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
  */
 
-#include "util/u_math.h"
 #include "util/macros.h"
+#include "util/u_math.h"
 #include "pan_device.h"
 #include "pan_encoder.h"
 
@@ -179,7 +179,7 @@
  *      tile <= fb / (64 - 1) <= next_power_of_two(fb / (64 - 1))
  *
  * Hence we clamp up to align_pot(fb / (64 - 1)).
- 
+
  * Extending to use a selection heuristic left for future work.
  *
  * Once the tile size (w, h) is chosen, we compute the hierarchy "mask":
@@ -227,15 +227,16 @@
  * a a fixed-tile size (not any of a number of power-of-twos) */
 
 static unsigned
-pan_tile_count(unsigned width, unsigned height, unsigned tile_width, unsigned tile_height)
+pan_tile_count(unsigned width, unsigned height, unsigned tile_width,
+               unsigned tile_height)
 {
-        unsigned aligned_width = ALIGN_POT(width, tile_width);
-        unsigned aligned_height = ALIGN_POT(height, tile_height);
+   unsigned aligned_width = ALIGN_POT(width, tile_width);
+   unsigned aligned_height = ALIGN_POT(height, tile_height);
 
-        unsigned tile_count_x = aligned_width / tile_width;
-        unsigned tile_count_y = aligned_height / tile_height;
+   unsigned tile_count_x = aligned_width / tile_width;
+   unsigned tile_count_y = aligned_height / tile_height;
 
-        return tile_count_x * tile_count_y;
+   return tile_count_x * tile_count_y;
 }
 
 /* For `masked_count` of the smallest tile sizes masked out, computes how the
@@ -246,32 +247,29 @@ pan_tile_count(unsigned width, unsigned height, unsigned tile_width, unsigned ti
  * levels to find a byte count for all levels. */
 
 static unsigned
-panfrost_hierarchy_size(
-                unsigned width,
-                unsigned height,
-                unsigned mask,
-                unsigned bytes_per_tile)
+panfrost_hierarchy_size(unsigned width, unsigned height, unsigned mask,
+                        unsigned bytes_per_tile)
 {
-        unsigned size = PROLOGUE_SIZE;
+   unsigned size = PROLOGUE_SIZE;
 
-        /* Iterate hierarchy levels */
+   /* Iterate hierarchy levels */
 
-        for (unsigned b = 0; b < (MAX_TILE_SHIFT - MIN_TILE_SHIFT); ++b) {
-                /* Check if this level is enabled */
-                if (!(mask & (1 << b)))
-                        continue;
+   for (unsigned b = 0; b < (MAX_TILE_SHIFT - MIN_TILE_SHIFT); ++b) {
+      /* Check if this level is enabled */
+      if (!(mask & (1 << b)))
+         continue;
 
-                /* Shift from a level to a tile size */
-                unsigned tile_size = (1 << b) * MIN_TILE_SIZE;
+      /* Shift from a level to a tile size */
+      unsigned tile_size = (1 << b) * MIN_TILE_SIZE;
 
-                unsigned tile_count = pan_tile_count(width, height, tile_size, tile_size);
-                unsigned level_count = bytes_per_tile * tile_count;
+      unsigned tile_count = pan_tile_count(width, height, tile_size, tile_size);
+      unsigned level_count = bytes_per_tile * tile_count;
 
-                size += level_count;
-        }
+      size += level_count;
+   }
 
-        /* This size will be used as an offset, so ensure it's aligned */
-        return ALIGN_POT(size, 0x200);
+   /* This size will be used as an offset, so ensure it's aligned */
+   return ALIGN_POT(size, 0x200);
 }
 
 /* Implement the formula:
@@ -284,29 +282,32 @@ panfrost_hierarchy_size(
  */
 
 static unsigned
-panfrost_flat_size(unsigned width, unsigned height, unsigned dim, unsigned bytes_per_tile)
+panfrost_flat_size(unsigned width, unsigned height, unsigned dim,
+                   unsigned bytes_per_tile)
 {
-        /* First, extract the tile dimensions */
+   /* First, extract the tile dimensions */
 
-        unsigned tw = (1 << (dim & 0b111)) * 8;
-        unsigned th = (1 << ((dim & (0b111 << 6)) >> 6)) * 8;
+   unsigned tw = (1 << (dim & 0b111)) * 8;
+   unsigned th = (1 << ((dim & (0b111 << 6)) >> 6)) * 8;
 
-        /* tile_count is ceil(W/w) * ceil(H/h) */
-        unsigned raw = pan_tile_count(width, height, tw, th) * bytes_per_tile;
+   /* tile_count is ceil(W/w) * ceil(H/h) */
+   unsigned raw = pan_tile_count(width, height, tw, th) * bytes_per_tile;
 
-        /* Round down and add offset */
-        return 0x200 + ((raw / 0x200) * 0x200);
+   /* Round down and add offset */
+   return 0x200 + ((raw / 0x200) * 0x200);
 }
 
 /* Given a hierarchy mask and a framebuffer size, compute the header size */
 
 unsigned
-panfrost_tiler_header_size(unsigned width, unsigned height, unsigned mask, bool hierarchy)
+panfrost_tiler_header_size(unsigned width, unsigned height, unsigned mask,
+                           bool hierarchy)
 {
-        if (hierarchy)
-                return panfrost_hierarchy_size(width, height, mask, HEADER_BYTES_PER_TILE);
-        else
-                return panfrost_flat_size(width, height, mask, HEADER_BYTES_PER_TILE);
+   if (hierarchy)
+      return panfrost_hierarchy_size(width, height, mask,
+                                     HEADER_BYTES_PER_TILE);
+   else
+      return panfrost_flat_size(width, height, mask, HEADER_BYTES_PER_TILE);
 }
 
 /* The combined header/body is sized similarly (but it is significantly
@@ -315,38 +316,39 @@ panfrost_tiler_header_size(unsigned width, unsigned height, unsigned mask, bool
  */
 
 unsigned
-panfrost_tiler_full_size(unsigned width, unsigned height, unsigned mask, bool hierarchy)
+panfrost_tiler_full_size(unsigned width, unsigned height, unsigned mask,
+                         bool hierarchy)
 {
-        if (hierarchy)
-                return panfrost_hierarchy_size(width, height, mask, FULL_BYTES_PER_TILE);
-        else
-                return panfrost_flat_size(width, height, mask, FULL_BYTES_PER_TILE);
+   if (hierarchy)
+      return panfrost_hierarchy_size(width, height, mask, FULL_BYTES_PER_TILE);
+   else
+      return panfrost_flat_size(width, height, mask, FULL_BYTES_PER_TILE);
 }
 
 /* On GPUs without hierarchical tiling, we choose a tile size directly and
  * stuff it into the field otherwise known as hierarchy mask (not a mask). */
 
 static unsigned
-panfrost_choose_tile_size(
-        unsigned width, unsigned height, unsigned vertex_count)
+panfrost_choose_tile_size(unsigned width, unsigned height,
+                          unsigned vertex_count)
 {
-        /* Figure out the ideal tile size. Eventually a heuristic should be
-         * used for this */
+   /* Figure out the ideal tile size. Eventually a heuristic should be
+    * used for this */
 
-        unsigned best_w = 16;
-        unsigned best_h = 16;
+   unsigned best_w = 16;
+   unsigned best_h = 16;
 
-        /* Clamp so there are less than 64 tiles in each direction */
+   /* Clamp so there are less than 64 tiles in each direction */
 
-        best_w = MAX2(best_w, util_next_power_of_two(width / 63));
-        best_h = MAX2(best_h, util_next_power_of_two(height / 63));
+   best_w = MAX2(best_w, util_next_power_of_two(width / 63));
+   best_h = MAX2(best_h, util_next_power_of_two(height / 63));
 
-        /* We have our ideal tile size, so encode */
+   /* We have our ideal tile size, so encode */
 
-        unsigned exp_w = util_logbase2(best_w / 16);
-        unsigned exp_h = util_logbase2(best_h / 16);
+   unsigned exp_w = util_logbase2(best_w / 16);
+   unsigned exp_h = util_logbase2(best_h / 16);
 
-        return exp_w | (exp_h << 6);
+   return exp_w | (exp_h << 6);
 }
 
 /* In the future, a heuristic to choose a tiler hierarchy mask would go here.
@@ -356,19 +358,18 @@ panfrost_choose_tile_size(
  * don't really need all the smaller levels enabled */
 
 unsigned
-panfrost_choose_hierarchy_mask(
-        unsigned width, unsigned height,
-        unsigned vertex_count, bool hierarchy)
+panfrost_choose_hierarchy_mask(unsigned width, unsigned height,
+                               unsigned vertex_count, bool hierarchy)
 {
-        /* If there is no geometry, we don't bother enabling anything */
+   /* If there is no geometry, we don't bother enabling anything */
 
-        if (!vertex_count)
-                return 0x00;
+   if (!vertex_count)
+      return 0x00;
 
-        if (!hierarchy)
-                return panfrost_choose_tile_size(width, height, vertex_count);
+   if (!hierarchy)
+      return panfrost_choose_tile_size(width, height, vertex_count);
 
-        /* Otherwise, default everything on. TODO: Proper tests */
+   /* Otherwise, default everything on. TODO: Proper tests */
 
-        return 0xFF;
+   return 0xFF;
 }
diff --git a/src/panfrost/lib/pan_util.c b/src/panfrost/lib/pan_util.c
index 4f56a828e68..4f43d56f6be 100644
--- a/src/panfrost/lib/pan_util.c
+++ b/src/panfrost/lib/pan_util.c
@@ -23,7 +23,7 @@
 
 #include <stdio.h>
 #include "pan_texture.h"
- 
+
 /* Translate a PIPE swizzle quad to a 12-bit Mali swizzle code. PIPE
  * swizzles line up with Mali swizzles for the XYZW01, but PIPE swizzles have
  * an additional "NONE" field that we have to mask out to zero. Additionally,
@@ -32,38 +32,39 @@
 unsigned
 panfrost_translate_swizzle_4(const unsigned char swizzle[4])
 {
-        unsigned out = 0;
+   unsigned out = 0;
 
-        for (unsigned i = 0; i < 4; ++i) {
-                unsigned translated = (swizzle[i] > PIPE_SWIZZLE_1) ? PIPE_SWIZZLE_0 : swizzle[i];
-                out |= (translated << (3*i));
-        }
+   for (unsigned i = 0; i < 4; ++i) {
+      unsigned translated =
+         (swizzle[i] > PIPE_SWIZZLE_1) ? PIPE_SWIZZLE_0 : swizzle[i];
+      out |= (translated << (3 * i));
+   }
 
-        return out;
+   return out;
 }
 
 void
 panfrost_invert_swizzle(const unsigned char *in, unsigned char *out)
 {
-        /* First, default to all zeroes to prevent uninitialized junk */
+   /* First, default to all zeroes to prevent uninitialized junk */
 
-        for (unsigned c = 0; c < 4; ++c)
-                out[c] = PIPE_SWIZZLE_0;
+   for (unsigned c = 0; c < 4; ++c)
+      out[c] = PIPE_SWIZZLE_0;
 
-        /* Now "do" what the swizzle says */
+   /* Now "do" what the swizzle says */
 
-        for (unsigned c = 0; c < 4; ++c) {
-                unsigned char i = in[c];
+   for (unsigned c = 0; c < 4; ++c) {
+      unsigned char i = in[c];
 
-                /* Who cares? */
-                assert(PIPE_SWIZZLE_X == 0);
-                if (i > PIPE_SWIZZLE_W)
-                        continue;
+      /* Who cares? */
+      assert(PIPE_SWIZZLE_X == 0);
+      if (i > PIPE_SWIZZLE_W)
+         continue;
 
-                /* Invert */
-                unsigned idx = i - PIPE_SWIZZLE_X;
-                out[idx] = PIPE_SWIZZLE_X + c;
-        }
+      /* Invert */
+      unsigned idx = i - PIPE_SWIZZLE_X;
+      out[idx] = PIPE_SWIZZLE_X + c;
+   }
 }
 
 /* Formats requiring blend shaders are stored raw in the tilebuffer and will
@@ -72,12 +73,12 @@ panfrost_invert_swizzle(const unsigned char *in, unsigned char *out)
 
 unsigned
 panfrost_format_to_bifrost_blend(const struct panfrost_device *dev,
-                                 enum pipe_format format,
-                                 bool dithered)
+                                 enum pipe_format format, bool dithered)
 {
-        mali_pixel_format pixfmt = (dev->arch >= 7) ?
-                panfrost_blendable_formats_v7[format].bifrost[dithered] :
-                panfrost_blendable_formats_v6[format].bifrost[dithered];
+   mali_pixel_format pixfmt =
+      (dev->arch >= 7)
+         ? panfrost_blendable_formats_v7[format].bifrost[dithered]
+         : panfrost_blendable_formats_v6[format].bifrost[dithered];
 
-        return pixfmt ?: dev->formats[format].hw;
+   return pixfmt ?: dev->formats[format].hw;
 }
diff --git a/src/panfrost/lib/pan_util.h b/src/panfrost/lib/pan_util.h
index c2f883737c3..87eccff7fbc 100644
--- a/src/panfrost/lib/pan_util.h
+++ b/src/panfrost/lib/pan_util.h
@@ -28,58 +28,54 @@
 #ifndef PAN_UTIL_H
 #define PAN_UTIL_H
 
-#include <stdint.h>
 #include <stdbool.h>
+#include <stdint.h>
 #include "util/format/u_format.h"
 
-#define PAN_DBG_PERF            0x0001
-#define PAN_DBG_TRACE           0x0002
-#define PAN_DBG_DEQP            0x0004
-#define PAN_DBG_DIRTY           0x0008
-#define PAN_DBG_SYNC            0x0010
+#define PAN_DBG_PERF  0x0001
+#define PAN_DBG_TRACE 0x0002
+#define PAN_DBG_DEQP  0x0004
+#define PAN_DBG_DIRTY 0x0008
+#define PAN_DBG_SYNC  0x0010
 /* 0x20 unused */
-#define PAN_DBG_NOFP16          0x0040
-#define PAN_DBG_NO_CRC          0x0080
-#define PAN_DBG_GL3             0x0100
-#define PAN_DBG_NO_AFBC         0x0200
-#define PAN_DBG_MSAA16          0x0400
-#define PAN_DBG_INDIRECT        0x0800
-#define PAN_DBG_LINEAR          0x1000
-#define PAN_DBG_NO_CACHE        0x2000
-#define PAN_DBG_DUMP            0x4000
+#define PAN_DBG_NOFP16   0x0040
+#define PAN_DBG_NO_CRC   0x0080
+#define PAN_DBG_GL3      0x0100
+#define PAN_DBG_NO_AFBC  0x0200
+#define PAN_DBG_MSAA16   0x0400
+#define PAN_DBG_INDIRECT 0x0800
+#define PAN_DBG_LINEAR   0x1000
+#define PAN_DBG_NO_CACHE 0x2000
+#define PAN_DBG_DUMP     0x4000
 
 #ifndef NDEBUG
-#define PAN_DBG_OVERFLOW        0x8000
+#define PAN_DBG_OVERFLOW 0x8000
 #endif
 
 struct panfrost_device;
 
-unsigned
-panfrost_translate_swizzle_4(const unsigned char swizzle[4]);
+unsigned panfrost_translate_swizzle_4(const unsigned char swizzle[4]);
 
-void
-panfrost_invert_swizzle(const unsigned char *in, unsigned char *out);
+void panfrost_invert_swizzle(const unsigned char *in, unsigned char *out);
 
-unsigned
-panfrost_format_to_bifrost_blend(const struct panfrost_device *dev,
-                                 enum pipe_format format,
-                                 bool dithered);
+unsigned panfrost_format_to_bifrost_blend(const struct panfrost_device *dev,
+                                          enum pipe_format format,
+                                          bool dithered);
 
-void
-pan_pack_color(uint32_t *packed, const union pipe_color_union *color,
-               enum pipe_format format, bool dithered);
+void pan_pack_color(uint32_t *packed, const union pipe_color_union *color,
+                    enum pipe_format format, bool dithered);
 
 /* Get the last blend shader, for an erratum workaround on v5 */
 
 static inline uint64_t
 panfrost_last_nonnull(uint64_t *ptrs, unsigned count)
 {
-        for (signed i = ((signed) count - 1); i >= 0; --i) {
-                if (ptrs[i])
-                        return ptrs[i];
-        }
+   for (signed i = ((signed)count - 1); i >= 0; --i) {
+      if (ptrs[i])
+         return ptrs[i];
+   }
 
-        return 0;
+   return 0;
 }
 
 #endif /* PAN_UTIL_H */
diff --git a/src/panfrost/lib/tests/test-blend.c b/src/panfrost/lib/tests/test-blend.c
index d04efd68fcb..2ba301e7b24 100644
--- a/src/panfrost/lib/tests/test-blend.c
+++ b/src/panfrost/lib/tests/test-blend.c
@@ -293,17 +293,19 @@ static const struct test blend_tests[] = {
 };
 /* clang-format on */
 
-#define ASSERT_EQ(x, y) do { \
-   if (x == y) { \
-      nr_pass++; \
-   } else { \
-      nr_fail++; \
-      fprintf(stderr, "%s: Assertion failed %s (%x) != %s (%x)\n", \
-            T.label, #x, x, #y, y); \
-   } \
-} while(0)
+#define ASSERT_EQ(x, y)                                                        \
+   do {                                                                        \
+      if (x == y) {                                                            \
+         nr_pass++;                                                            \
+      } else {                                                                 \
+         nr_fail++;                                                            \
+         fprintf(stderr, "%s: Assertion failed %s (%x) != %s (%x)\n", T.label, \
+                 #x, x, #y, y);                                                \
+      }                                                                        \
+   } while (0)
 
-int main(int argc, const char **argv)
+int
+main(int argc, const char **argv)
 {
    unsigned nr_pass = 0, nr_fail = 0;
 
diff --git a/src/panfrost/lib/tests/test-clear.c b/src/panfrost/lib/tests/test-clear.c
index 81d807d53e4..91199199149 100644
--- a/src/panfrost/lib/tests/test-clear.c
+++ b/src/panfrost/lib/tests/test-clear.c
@@ -33,10 +33,22 @@ struct test {
    uint32_t packed[4];
 };
 
-#define RRRR(r) { r, r, r, r }
-#define RGRG(r, g) { r, g, r, g }
-#define F(r, g, b, a) { .f = { r, g, b, a } }
-#define UI(r, g, b, a) { .ui = { r, g, b, a } }
+#define RRRR(r)                                                                \
+   {                                                                           \
+      r, r, r, r                                                               \
+   }
+#define RGRG(r, g)                                                             \
+   {                                                                           \
+      r, g, r, g                                                               \
+   }
+#define F(r, g, b, a)                                                          \
+   {                                                                           \
+      .f = { r, g, b, a }                                                      \
+   }
+#define UI(r, g, b, a)                                                         \
+   {                                                                           \
+      .ui = { r, g, b, a }                                                     \
+   }
 #define D (true)
 #define _ (false)
 
@@ -140,17 +152,23 @@ static const struct test clear_tests[] = {
 };
 /* clang-format on */
 
-#define ASSERT_EQ(x, y) do { \
-   if ((x[0] == y[0]) && (x[1] == y[1]) && (x[2] == y[2]) && (x[3] == y[3])) { \
-      nr_pass++; \
-   } else { \
-      nr_fail++; \
-      fprintf(stderr, "%s%s: Assertion failed %s (%08X %08X %08X %08X) != %s (%08X %08X %08X %08X)\n", \
-            util_format_short_name(T.format), T.dithered ? " dithered" : "", #x, x[0], x[1], x[2], x[3], #y, y[0], y[1], y[2], y[3]); \
-   } \
-} while(0)
+#define ASSERT_EQ(x, y)                                                                      \
+   do {                                                                                      \
+      if ((x[0] == y[0]) && (x[1] == y[1]) && (x[2] == y[2]) &&                              \
+          (x[3] == y[3])) {                                                                  \
+         nr_pass++;                                                                          \
+      } else {                                                                               \
+         nr_fail++;                                                                          \
+         fprintf(                                                                            \
+            stderr,                                                                          \
+            "%s%s: Assertion failed %s (%08X %08X %08X %08X) != %s (%08X %08X %08X %08X)\n", \
+            util_format_short_name(T.format), T.dithered ? " dithered" : "",                 \
+            #x, x[0], x[1], x[2], x[3], #y, y[0], y[1], y[2], y[3]);                         \
+      }                                                                                      \
+   } while (0)
 
-int main(int argc, const char **argv)
+int
+main(int argc, const char **argv)
 {
    unsigned nr_pass = 0, nr_fail = 0;
 
diff --git a/src/panfrost/lib/tests/test-earlyzs.cpp b/src/panfrost/lib/tests/test-earlyzs.cpp
index 8fff5d469ce..872487b808c 100644
--- a/src/panfrost/lib/tests/test-earlyzs.cpp
+++ b/src/panfrost/lib/tests/test-earlyzs.cpp
@@ -21,8 +21,8 @@
  * SOFTWARE.
  */
 
-#include "pan_earlyzs.h"
 #include "util/pan_ir.h"
+#include "pan_earlyzs.h"
 
 #include <gtest/gtest.h>
 
@@ -34,18 +34,19 @@
  * under test, only the external API. So we test only the composition.
  */
 
-#define ZS_WRITEMASK BITFIELD_BIT(0)
-#define ALPHA2COV BITFIELD_BIT(1)
+#define ZS_WRITEMASK     BITFIELD_BIT(0)
+#define ALPHA2COV        BITFIELD_BIT(1)
 #define ZS_ALWAYS_PASSES BITFIELD_BIT(2)
-#define DISCARD BITFIELD_BIT(3)
-#define WRITES_Z BITFIELD_BIT(4)
-#define WRITES_S BITFIELD_BIT(5)
-#define WRITES_COV BITFIELD_BIT(6)
-#define SIDEFX BITFIELD_BIT(7)
-#define API_EARLY BITFIELD_BIT(8)
+#define DISCARD          BITFIELD_BIT(3)
+#define WRITES_Z         BITFIELD_BIT(4)
+#define WRITES_S         BITFIELD_BIT(5)
+#define WRITES_COV       BITFIELD_BIT(6)
+#define SIDEFX           BITFIELD_BIT(7)
+#define API_EARLY        BITFIELD_BIT(8)
 
 static void
-test(enum pan_earlyzs expected_update, enum pan_earlyzs expected_kill, uint32_t flags)
+test(enum pan_earlyzs expected_update, enum pan_earlyzs expected_kill,
+     uint32_t flags)
 {
    struct pan_shader_info info = {};
    info.fs.can_discard = !!(flags & DISCARD);
@@ -56,18 +57,15 @@ test(enum pan_earlyzs expected_update, enum pan_earlyzs expected_kill, uint32_t
    info.writes_global = !!(flags & SIDEFX);
 
    struct pan_earlyzs_state result =
-      pan_earlyzs_get(pan_earlyzs_analyze(&info),
-                      !!(flags & ZS_WRITEMASK),
-                      !!(flags & ALPHA2COV),
-                      !!(flags & ZS_ALWAYS_PASSES));
+      pan_earlyzs_get(pan_earlyzs_analyze(&info), !!(flags & ZS_WRITEMASK),
+                      !!(flags & ALPHA2COV), !!(flags & ZS_ALWAYS_PASSES));
 
    ASSERT_EQ(result.update, expected_update);
    ASSERT_EQ(result.kill, expected_kill);
 }
 
-
-#define CASE(expected_update, expected_kill, flags) \
-   test(PAN_EARLYZS_ ## expected_update, PAN_EARLYZS_ ## expected_kill, flags)
+#define CASE(expected_update, expected_kill, flags)                            \
+   test(PAN_EARLYZS_##expected_update, PAN_EARLYZS_##expected_kill, flags)
 
 TEST(EarlyZS, APIForceEarly)
 {
@@ -91,7 +89,8 @@ TEST(EarlyZS, ModifiesCoverageWritesZSNoSideFX)
    CASE(FORCE_LATE, FORCE_EARLY, ZS_WRITEMASK | WRITES_COV);
    CASE(FORCE_LATE, FORCE_EARLY, ZS_WRITEMASK | DISCARD);
    CASE(FORCE_LATE, FORCE_EARLY, ZS_WRITEMASK | ALPHA2COV);
-   CASE(FORCE_LATE, FORCE_EARLY, ZS_WRITEMASK | WRITES_COV | DISCARD | ALPHA2COV);
+   CASE(FORCE_LATE, FORCE_EARLY,
+        ZS_WRITEMASK | WRITES_COV | DISCARD | ALPHA2COV);
 }
 
 TEST(EarlyZS, ModifiesCoverageWritesZSNoSideFXAlt)
@@ -99,7 +98,8 @@ TEST(EarlyZS, ModifiesCoverageWritesZSNoSideFXAlt)
    CASE(FORCE_LATE, WEAK_EARLY, ZS_ALWAYS_PASSES | ZS_WRITEMASK | WRITES_COV);
    CASE(FORCE_LATE, WEAK_EARLY, ZS_ALWAYS_PASSES | ZS_WRITEMASK | DISCARD);
    CASE(FORCE_LATE, WEAK_EARLY, ZS_ALWAYS_PASSES | ZS_WRITEMASK | ALPHA2COV);
-   CASE(FORCE_LATE, WEAK_EARLY, ZS_ALWAYS_PASSES | ZS_WRITEMASK | WRITES_COV | DISCARD | ALPHA2COV);
+   CASE(FORCE_LATE, WEAK_EARLY,
+        ZS_ALWAYS_PASSES | ZS_WRITEMASK | WRITES_COV | DISCARD | ALPHA2COV);
 }
 
 TEST(EarlyZS, ModifiesCoverageWritesZSSideFX)
@@ -107,7 +107,8 @@ TEST(EarlyZS, ModifiesCoverageWritesZSSideFX)
    CASE(FORCE_LATE, FORCE_LATE, ZS_WRITEMASK | SIDEFX | WRITES_COV);
    CASE(FORCE_LATE, FORCE_LATE, ZS_WRITEMASK | SIDEFX | DISCARD);
    CASE(FORCE_LATE, FORCE_LATE, ZS_WRITEMASK | SIDEFX | ALPHA2COV);
-   CASE(FORCE_LATE, FORCE_LATE, ZS_WRITEMASK | SIDEFX | WRITES_COV | DISCARD | ALPHA2COV);
+   CASE(FORCE_LATE, FORCE_LATE,
+        ZS_WRITEMASK | SIDEFX | WRITES_COV | DISCARD | ALPHA2COV);
 }
 
 TEST(EarlyZS, SideFXNoShaderZS)
@@ -136,6 +137,7 @@ TEST(EarlyZS, NoSideFXNoShaderZS)
 TEST(EarlyZS, NoSideFXNoShaderZSAlt)
 {
    CASE(WEAK_EARLY, WEAK_EARLY, ZS_ALWAYS_PASSES);
-   CASE(WEAK_EARLY, WEAK_EARLY, ZS_ALWAYS_PASSES | ALPHA2COV | DISCARD | WRITES_COV);
+   CASE(WEAK_EARLY, WEAK_EARLY,
+        ZS_ALWAYS_PASSES | ALPHA2COV | DISCARD | WRITES_COV);
    CASE(WEAK_EARLY, WEAK_EARLY, ZS_ALWAYS_PASSES | ZS_WRITEMASK);
 }
diff --git a/src/panfrost/lib/tests/test-layout.cpp b/src/panfrost/lib/tests/test-layout.cpp
index 1ba7938efec..f47337a64f4 100644
--- a/src/panfrost/lib/tests/test-layout.cpp
+++ b/src/panfrost/lib/tests/test-layout.cpp
@@ -27,15 +27,13 @@
 
 TEST(BlockSize, Linear)
 {
-   enum pipe_format format[] = {
-      PIPE_FORMAT_R32G32B32_FLOAT,
-      PIPE_FORMAT_R8G8B8_UNORM,
-      PIPE_FORMAT_ETC2_RGB8,
-      PIPE_FORMAT_ASTC_5x5
-   };
+   enum pipe_format format[] = {PIPE_FORMAT_R32G32B32_FLOAT,
+                                PIPE_FORMAT_R8G8B8_UNORM, PIPE_FORMAT_ETC2_RGB8,
+                                PIPE_FORMAT_ASTC_5x5};
 
    for (unsigned i = 0; i < ARRAY_SIZE(format); ++i) {
-      struct pan_block_size blk = panfrost_block_size(DRM_FORMAT_MOD_LINEAR, format[i]);
+      struct pan_block_size blk =
+         panfrost_block_size(DRM_FORMAT_MOD_LINEAR, format[i]);
 
       EXPECT_EQ(blk.width, 1);
       EXPECT_EQ(blk.height, 1);
@@ -50,7 +48,8 @@ TEST(BlockSize, UInterleavedRegular)
    };
 
    for (unsigned i = 0; i < ARRAY_SIZE(format); ++i) {
-      struct pan_block_size blk = panfrost_block_size(DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED, format[i]);
+      struct pan_block_size blk = panfrost_block_size(
+         DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED, format[i]);
 
       EXPECT_EQ(blk.width, 16);
       EXPECT_EQ(blk.height, 16);
@@ -59,13 +58,11 @@ TEST(BlockSize, UInterleavedRegular)
 
 TEST(BlockSize, UInterleavedBlockCompressed)
 {
-   enum pipe_format format[] = {
-      PIPE_FORMAT_ETC2_RGB8,
-      PIPE_FORMAT_ASTC_5x5
-   };
+   enum pipe_format format[] = {PIPE_FORMAT_ETC2_RGB8, PIPE_FORMAT_ASTC_5x5};
 
    for (unsigned i = 0; i < ARRAY_SIZE(format); ++i) {
-      struct pan_block_size blk = panfrost_block_size(DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED, format[i]);
+      struct pan_block_size blk = panfrost_block_size(
+         DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED, format[i]);
 
       EXPECT_EQ(blk.width, 4);
       EXPECT_EQ(blk.height, 4);
@@ -74,17 +71,13 @@ TEST(BlockSize, UInterleavedBlockCompressed)
 
 TEST(BlockSize, AFBCFormatInvariant16x16)
 {
-   enum pipe_format format[] = {
-      PIPE_FORMAT_R32G32B32_FLOAT,
-      PIPE_FORMAT_R8G8B8_UNORM,
-      PIPE_FORMAT_ETC2_RGB8,
-      PIPE_FORMAT_ASTC_5x5
-   };
+   enum pipe_format format[] = {PIPE_FORMAT_R32G32B32_FLOAT,
+                                PIPE_FORMAT_R8G8B8_UNORM, PIPE_FORMAT_ETC2_RGB8,
+                                PIPE_FORMAT_ASTC_5x5};
 
-   uint64_t modifier = DRM_FORMAT_MOD_ARM_AFBC(
-                AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
-                AFBC_FORMAT_MOD_SPARSE |
-                AFBC_FORMAT_MOD_YTR);
+   uint64_t modifier =
+      DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
+                              AFBC_FORMAT_MOD_SPARSE | AFBC_FORMAT_MOD_YTR);
 
    for (unsigned i = 0; i < ARRAY_SIZE(format); ++i) {
       struct pan_block_size blk = panfrost_block_size(modifier, format[i]);
@@ -96,17 +89,13 @@ TEST(BlockSize, AFBCFormatInvariant16x16)
 
 TEST(BlockSize, AFBCFormatInvariant32x8)
 {
-   enum pipe_format format[] = {
-      PIPE_FORMAT_R32G32B32_FLOAT,
-      PIPE_FORMAT_R8G8B8_UNORM,
-      PIPE_FORMAT_ETC2_RGB8,
-      PIPE_FORMAT_ASTC_5x5
-   };
+   enum pipe_format format[] = {PIPE_FORMAT_R32G32B32_FLOAT,
+                                PIPE_FORMAT_R8G8B8_UNORM, PIPE_FORMAT_ETC2_RGB8,
+                                PIPE_FORMAT_ASTC_5x5};
 
-   uint64_t modifier = DRM_FORMAT_MOD_ARM_AFBC(
-                AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
-                AFBC_FORMAT_MOD_SPARSE |
-                AFBC_FORMAT_MOD_YTR);
+   uint64_t modifier =
+      DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
+                              AFBC_FORMAT_MOD_SPARSE | AFBC_FORMAT_MOD_YTR);
 
    for (unsigned i = 0; i < ARRAY_SIZE(format); ++i) {
       struct pan_block_size blk = panfrost_block_size(modifier, format[i]);
@@ -118,10 +107,9 @@ TEST(BlockSize, AFBCFormatInvariant32x8)
 
 TEST(BlockSize, AFBCSuperblock16x16)
 {
-   uint64_t modifier = DRM_FORMAT_MOD_ARM_AFBC(
-                AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
-                AFBC_FORMAT_MOD_SPARSE |
-                AFBC_FORMAT_MOD_YTR);
+   uint64_t modifier =
+      DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
+                              AFBC_FORMAT_MOD_SPARSE | AFBC_FORMAT_MOD_YTR);
 
    EXPECT_EQ(panfrost_afbc_superblock_size(modifier).width, 16);
    EXPECT_EQ(panfrost_afbc_superblock_width(modifier), 16);
@@ -134,9 +122,8 @@ TEST(BlockSize, AFBCSuperblock16x16)
 
 TEST(BlockSize, AFBCSuperblock32x8)
 {
-   uint64_t modifier = DRM_FORMAT_MOD_ARM_AFBC(
-                AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
-                AFBC_FORMAT_MOD_SPARSE);
+   uint64_t modifier = DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
+                                               AFBC_FORMAT_MOD_SPARSE);
 
    EXPECT_EQ(panfrost_afbc_superblock_size(modifier).width, 32);
    EXPECT_EQ(panfrost_afbc_superblock_width(modifier), 32);
@@ -149,9 +136,8 @@ TEST(BlockSize, AFBCSuperblock32x8)
 
 TEST(BlockSize, AFBCSuperblock64x4)
 {
-   uint64_t modifier = DRM_FORMAT_MOD_ARM_AFBC(
-                AFBC_FORMAT_MOD_BLOCK_SIZE_64x4 |
-                AFBC_FORMAT_MOD_SPARSE);
+   uint64_t modifier = DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_64x4 |
+                                               AFBC_FORMAT_MOD_SPARSE);
 
    EXPECT_EQ(panfrost_afbc_superblock_size(modifier).width, 64);
    EXPECT_EQ(panfrost_afbc_superblock_width(modifier), 64);
@@ -165,9 +151,11 @@ TEST(BlockSize, AFBCSuperblock64x4)
 /* Calculate Bifrost line stride, since we have reference formulas for Bifrost
  * stride calculations.
  */
-static uint32_t pan_afbc_line_stride(uint64_t modifier, uint32_t width)
+static uint32_t
+pan_afbc_line_stride(uint64_t modifier, uint32_t width)
 {
-   return pan_afbc_stride_blocks(modifier, pan_afbc_row_stride(modifier, width));
+   return pan_afbc_stride_blocks(modifier,
+                                 pan_afbc_row_stride(modifier, width));
 }
 
 /* Which form of the stride we specify is hardware specific (row stride for
@@ -189,16 +177,16 @@ TEST(AFBCStride, Linear)
       uint64_t modifier = modifiers[m];
 
       uint32_t sw = panfrost_afbc_superblock_width(modifier);
-      uint32_t cases[] = { 1, 4, 17, 39 };
+      uint32_t cases[] = {1, 4, 17, 39};
 
       for (unsigned i = 0; i < ARRAY_SIZE(cases); ++i) {
          uint32_t width = sw * cases[i];
 
          EXPECT_EQ(pan_afbc_row_stride(modifier, width),
-               16 * DIV_ROUND_UP(width, sw));
+                   16 * DIV_ROUND_UP(width, sw));
 
          EXPECT_EQ(pan_afbc_line_stride(modifier, width),
-               DIV_ROUND_UP(width, sw));
+                   DIV_ROUND_UP(width, sw));
       }
    }
 }
@@ -207,63 +195,73 @@ TEST(AFBCStride, Tiled)
 {
    uint64_t modifiers[] = {
       DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
-                              AFBC_FORMAT_MOD_TILED |
-                              AFBC_FORMAT_MOD_SPARSE),
+                              AFBC_FORMAT_MOD_TILED | AFBC_FORMAT_MOD_SPARSE),
       DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
-                              AFBC_FORMAT_MOD_TILED |
-                              AFBC_FORMAT_MOD_SPARSE),
+                              AFBC_FORMAT_MOD_TILED | AFBC_FORMAT_MOD_SPARSE),
       DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_64x4 |
-                              AFBC_FORMAT_MOD_TILED |
-                              AFBC_FORMAT_MOD_SPARSE),
+                              AFBC_FORMAT_MOD_TILED | AFBC_FORMAT_MOD_SPARSE),
    };
 
    for (unsigned m = 0; m < ARRAY_SIZE(modifiers); ++m) {
       uint64_t modifier = modifiers[m];
 
       uint32_t sw = panfrost_afbc_superblock_width(modifier);
-      uint32_t cases[] = { 1, 4, 17, 39 };
+      uint32_t cases[] = {1, 4, 17, 39};
 
       for (unsigned i = 0; i < ARRAY_SIZE(cases); ++i) {
          uint32_t width = sw * 8 * cases[i];
 
          EXPECT_EQ(pan_afbc_row_stride(modifier, width),
-               16 * DIV_ROUND_UP(width, (sw * 8)) * 8 * 8);
+                   16 * DIV_ROUND_UP(width, (sw * 8)) * 8 * 8);
 
          EXPECT_EQ(pan_afbc_line_stride(modifier, width),
-               DIV_ROUND_UP(width, sw * 8) * 8);
+                   DIV_ROUND_UP(width, sw * 8) * 8);
       }
    }
 }
 
 TEST(LegacyStride, FromLegacyLinear)
 {
-   EXPECT_EQ(panfrost_from_legacy_stride(1920 * 4, PIPE_FORMAT_R8G8B8A8_UINT, DRM_FORMAT_MOD_LINEAR), 1920 * 4);
-   EXPECT_EQ(panfrost_from_legacy_stride(53, PIPE_FORMAT_R8_SNORM, DRM_FORMAT_MOD_LINEAR), 53);
-   EXPECT_EQ(panfrost_from_legacy_stride(60, PIPE_FORMAT_ETC2_RGB8, DRM_FORMAT_MOD_LINEAR), 60);
+   EXPECT_EQ(panfrost_from_legacy_stride(1920 * 4, PIPE_FORMAT_R8G8B8A8_UINT,
+                                         DRM_FORMAT_MOD_LINEAR),
+             1920 * 4);
+   EXPECT_EQ(panfrost_from_legacy_stride(53, PIPE_FORMAT_R8_SNORM,
+                                         DRM_FORMAT_MOD_LINEAR),
+             53);
+   EXPECT_EQ(panfrost_from_legacy_stride(60, PIPE_FORMAT_ETC2_RGB8,
+                                         DRM_FORMAT_MOD_LINEAR),
+             60);
 }
 
 TEST(LegacyStride, FromLegacyInterleaved)
 {
-   EXPECT_EQ(panfrost_from_legacy_stride(1920 * 4, PIPE_FORMAT_R8G8B8A8_UINT,
-            DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED),
-            1920 * 4 * 16);
+   EXPECT_EQ(
+      panfrost_from_legacy_stride(1920 * 4, PIPE_FORMAT_R8G8B8A8_UINT,
+                                  DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED),
+      1920 * 4 * 16);
 
-   EXPECT_EQ(panfrost_from_legacy_stride(53, PIPE_FORMAT_R8_SNORM,
-            DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED), 53 * 16);
+   EXPECT_EQ(
+      panfrost_from_legacy_stride(53, PIPE_FORMAT_R8_SNORM,
+                                  DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED),
+      53 * 16);
 
-   EXPECT_EQ(panfrost_from_legacy_stride(60, PIPE_FORMAT_ETC2_RGB8,
-            DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED), 60 * 4);
+   EXPECT_EQ(
+      panfrost_from_legacy_stride(60, PIPE_FORMAT_ETC2_RGB8,
+                                  DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED),
+      60 * 4);
 }
 
 TEST(LegacyStride, FromLegacyAFBC)
 {
-   uint64_t modifier = DRM_FORMAT_MOD_ARM_AFBC(
-                AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
-                AFBC_FORMAT_MOD_SPARSE |
-                AFBC_FORMAT_MOD_YTR);
+   uint64_t modifier =
+      DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
+                              AFBC_FORMAT_MOD_SPARSE | AFBC_FORMAT_MOD_YTR);
 
-   EXPECT_EQ(panfrost_from_legacy_stride(1920 * 4, PIPE_FORMAT_R8G8B8A8_UINT, modifier), 60 * 16);
-   EXPECT_EQ(panfrost_from_legacy_stride(64, PIPE_FORMAT_R8_SNORM, modifier), 2 * 16);
+   EXPECT_EQ(panfrost_from_legacy_stride(1920 * 4, PIPE_FORMAT_R8G8B8A8_UINT,
+                                         modifier),
+             60 * 16);
+   EXPECT_EQ(panfrost_from_legacy_stride(64, PIPE_FORMAT_R8_SNORM, modifier),
+             2 * 16);
 }
 
 /* dEQP-GLES3.functional.texture.format.compressed.etc1_2d_pot */
@@ -277,12 +275,10 @@ TEST(Layout, ImplicitLayoutInterleavedETC2)
       .depth = 1,
       .nr_samples = 1,
       .dim = MALI_TEXTURE_DIMENSION_2D,
-      .nr_slices = 8
-   };
+      .nr_slices = 8};
 
-   unsigned offsets[9] = {
-      0, 8192, 10240, 10752, 10880, 11008, 11136, 11264, 11392
-   };
+   unsigned offsets[9] = {0,     8192,  10240, 10752, 10880,
+                          11008, 11136, 11264, 11392};
 
    ASSERT_TRUE(pan_image_layout_init(&l, NULL));
 
@@ -307,8 +303,7 @@ TEST(Layout, ImplicitLayoutInterleavedASTC5x5)
       .depth = 1,
       .nr_samples = 1,
       .dim = MALI_TEXTURE_DIMENSION_2D,
-      .nr_slices = 1
-   };
+      .nr_slices = 1};
 
    ASSERT_TRUE(pan_image_layout_init(&l, NULL));
 
@@ -326,16 +321,14 @@ TEST(Layout, ImplicitLayoutInterleavedASTC5x5)
 
 TEST(Layout, ImplicitLayoutLinearASTC5x5)
 {
-   struct pan_image_layout l = {
-      .modifier = DRM_FORMAT_MOD_LINEAR,
-      .format = PIPE_FORMAT_ASTC_5x5,
-      .width = 50,
-      .height = 50,
-      .depth = 1,
-      .nr_samples = 1,
-      .dim = MALI_TEXTURE_DIMENSION_2D,
-      .nr_slices = 1
-   };
+   struct pan_image_layout l = {.modifier = DRM_FORMAT_MOD_LINEAR,
+                                .format = PIPE_FORMAT_ASTC_5x5,
+                                .width = 50,
+                                .height = 50,
+                                .depth = 1,
+                                .nr_samples = 1,
+                                .dim = MALI_TEXTURE_DIMENSION_2D,
+                                .nr_slices = 1};
 
    ASSERT_TRUE(pan_image_layout_init(&l, NULL));
 
@@ -353,25 +346,23 @@ TEST(Layout, ImplicitLayoutLinearASTC5x5)
 /* dEQP-GLES3.functional.texture.format.unsized.rgba_unsigned_byte_3d_pot */
 TEST(AFBCLayout, Linear3D)
 {
-   uint64_t modifier = DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
-                        AFBC_FORMAT_MOD_SPARSE);
+   uint64_t modifier = DRM_FORMAT_MOD_ARM_AFBC(
+      AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 | AFBC_FORMAT_MOD_SPARSE);
 
-   struct pan_image_layout l = {
-      .modifier = modifier,
-      .format = PIPE_FORMAT_R8G8B8A8_UNORM,
-      .width = 8,
-      .height = 32,
-      .depth = 16,
-      .nr_samples = 1,
-      .dim = MALI_TEXTURE_DIMENSION_3D,
-      .nr_slices = 1
-   };
+   struct pan_image_layout l = {.modifier = modifier,
+                                .format = PIPE_FORMAT_R8G8B8A8_UNORM,
+                                .width = 8,
+                                .height = 32,
+                                .depth = 16,
+                                .nr_samples = 1,
+                                .dim = MALI_TEXTURE_DIMENSION_3D,
+                                .nr_slices = 1};
 
    ASSERT_TRUE(pan_image_layout_init(&l, NULL));
 
    /* AFBC Surface stride is bytes between consecutive surface headers, which is
-    * the header size since this is a 3D texture. At superblock size 16x16, the 8x32
-    * layer has 1x2 superblocks, so the header size is 2 * 16 = 32 bytes,
+    * the header size since this is a 3D texture. At superblock size 16x16, the
+    * 8x32 layer has 1x2 superblocks, so the header size is 2 * 16 = 32 bytes,
     * rounded up to cache line 64.
     *
     * There is only 1 superblock per row, so the row stride is the bytes per 1
@@ -393,20 +384,18 @@ TEST(AFBCLayout, Linear3D)
 
 TEST(AFBCLayout, Tiled16x16)
 {
-   uint64_t modifier = DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
-                        AFBC_FORMAT_MOD_TILED |
-                        AFBC_FORMAT_MOD_SPARSE);
+   uint64_t modifier =
+      DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
+                              AFBC_FORMAT_MOD_TILED | AFBC_FORMAT_MOD_SPARSE);
 
-   struct pan_image_layout l = {
-      .modifier = modifier,
-      .format = PIPE_FORMAT_R8G8B8A8_UNORM,
-      .width = 917,
-      .height = 417,
-      .depth = 1,
-      .nr_samples = 1,
-      .dim = MALI_TEXTURE_DIMENSION_2D,
-      .nr_slices = 1
-   };
+   struct pan_image_layout l = {.modifier = modifier,
+                                .format = PIPE_FORMAT_R8G8B8A8_UNORM,
+                                .width = 917,
+                                .height = 417,
+                                .depth = 1,
+                                .nr_samples = 1,
+                                .dim = MALI_TEXTURE_DIMENSION_2D,
+                                .nr_slices = 1};
 
    ASSERT_TRUE(pan_image_layout_init(&l, NULL));
 
@@ -432,19 +421,17 @@ TEST(AFBCLayout, Tiled16x16)
 
 TEST(AFBCLayout, Linear16x16Minimal)
 {
-   uint64_t modifier = DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
-                        AFBC_FORMAT_MOD_SPARSE);
+   uint64_t modifier = DRM_FORMAT_MOD_ARM_AFBC(
+      AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 | AFBC_FORMAT_MOD_SPARSE);
 
-   struct pan_image_layout l = {
-      .modifier = modifier,
-      .format = PIPE_FORMAT_R8_UNORM,
-      .width = 1,
-      .height = 1,
-      .depth = 1,
-      .nr_samples = 1,
-      .dim = MALI_TEXTURE_DIMENSION_2D,
-      .nr_slices = 1
-   };
+   struct pan_image_layout l = {.modifier = modifier,
+                                .format = PIPE_FORMAT_R8_UNORM,
+                                .width = 1,
+                                .height = 1,
+                                .depth = 1,
+                                .nr_samples = 1,
+                                .dim = MALI_TEXTURE_DIMENSION_2D,
+                                .nr_slices = 1};
 
    ASSERT_TRUE(pan_image_layout_init(&l, NULL));
 
@@ -459,20 +446,18 @@ TEST(AFBCLayout, Linear16x16Minimal)
 
 TEST(AFBCLayout, Tiled16x16Minimal)
 {
-   uint64_t modifier = DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
-                        AFBC_FORMAT_MOD_TILED |
-                        AFBC_FORMAT_MOD_SPARSE);
+   uint64_t modifier =
+      DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
+                              AFBC_FORMAT_MOD_TILED | AFBC_FORMAT_MOD_SPARSE);
 
-   struct pan_image_layout l = {
-      .modifier = modifier,
-      .format = PIPE_FORMAT_R8_UNORM,
-      .width = 1,
-      .height = 1,
-      .depth = 1,
-      .nr_samples = 1,
-      .dim = MALI_TEXTURE_DIMENSION_2D,
-      .nr_slices = 1
-   };
+   struct pan_image_layout l = {.modifier = modifier,
+                                .format = PIPE_FORMAT_R8_UNORM,
+                                .width = 1,
+                                .height = 1,
+                                .depth = 1,
+                                .nr_samples = 1,
+                                .dim = MALI_TEXTURE_DIMENSION_2D,
+                                .nr_slices = 1};
 
    ASSERT_TRUE(pan_image_layout_init(&l, NULL));
 
diff --git a/src/panfrost/lib/wrap.h b/src/panfrost/lib/wrap.h
index 56bb0f48aed..d4cafa75429 100644
--- a/src/panfrost/lib/wrap.h
+++ b/src/panfrost/lib/wrap.h
@@ -27,11 +27,11 @@
 #ifndef __PAN_DECODE_PUBLIC_H__
 #define __PAN_DECODE_PUBLIC_H__
 
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <stddef.h>
-#include <stdbool.h>
-#include <inttypes.h>
 
 /* Public entrypoints for the tracing infrastructure. This API should be kept
  * more or less stable. Don't feel bad if you have to change it; just feel
@@ -48,14 +48,13 @@ void pandecode_next_frame(void);
 
 void pandecode_close(void);
 
-void
-pandecode_inject_mmap(uint64_t gpu_va, void *cpu, unsigned sz, const char *name);
+void pandecode_inject_mmap(uint64_t gpu_va, void *cpu, unsigned sz,
+                           const char *name);
 
 void pandecode_inject_free(uint64_t gpu_va, unsigned sz);
 
 void pandecode_jc(uint64_t jc_gpu_va, unsigned gpu_id);
 
-void
-pandecode_abort_on_fault(uint64_t jc_gpu_va, unsigned gpu_id);
+void pandecode_abort_on_fault(uint64_t jc_gpu_va, unsigned gpu_id);
 
 #endif /* __MMAP_TRACE_H__ */
diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h
index 593a4e599a4..96d1a9a1ab6 100644
--- a/src/panfrost/midgard/compiler.h
+++ b/src/panfrost/midgard/compiler.h
@@ -25,21 +25,21 @@
 #ifndef _MDG_COMPILER_H
 #define _MDG_COMPILER_H
 
-#include "midgard.h"
 #include "helpers.h"
+#include "midgard.h"
 #include "midgard_compile.h"
 #include "midgard_ops.h"
 
 #include "util/hash_table.h"
-#include "util/u_dynarray.h"
-#include "util/set.h"
 #include "util/list.h"
+#include "util/set.h"
+#include "util/u_dynarray.h"
 #include "util/u_math.h"
 
-#include "compiler/nir_types.h"
 #include "compiler/nir/nir.h"
-#include "panfrost/util/pan_ir.h"
+#include "compiler/nir_types.h"
 #include "panfrost/util/lcra.h"
+#include "panfrost/util/pan_ir.h"
 
 /* Forward declare */
 struct midgard_block;
@@ -48,28 +48,30 @@ struct midgard_block;
  * the hardware), hence why that must be zero. TARGET_DISCARD signals this
  * instruction is actually a discard op. */
 
-#define TARGET_GOTO 0
-#define TARGET_BREAK 1
-#define TARGET_CONTINUE 2
-#define TARGET_DISCARD 3
+#define TARGET_GOTO         0
+#define TARGET_BREAK        1
+#define TARGET_CONTINUE     2
+#define TARGET_DISCARD      3
 #define TARGET_TILEBUF_WAIT 4
 
 typedef struct midgard_branch {
-        /* If conditional, the condition is specified in r31.w */
-        bool conditional;
+   /* If conditional, the condition is specified in r31.w */
+   bool conditional;
 
-        /* For conditionals, if this is true, we branch on FALSE. If false, we  branch on TRUE. */
-        bool invert_conditional;
+   /* For conditionals, if this is true, we branch on FALSE. If false, we branch
+    * on TRUE. */
+   bool invert_conditional;
 
-        /* Branch targets: the start of a block, the start of a loop (continue), the end of a loop (break). Value is one of TARGET_ */
-        unsigned target_type;
+   /* Branch targets: the start of a block, the start of a loop (continue), the
+    * end of a loop (break). Value is one of TARGET_ */
+   unsigned target_type;
 
-        /* The actual target */
-        union {
-                int target_block;
-                int target_break;
-                int target_continue;
-        };
+   /* The actual target */
+   union {
+      int target_block;
+      int target_break;
+      int target_continue;
+   };
 } midgard_branch;
 
 /* Generic in-memory data type repesenting a single logical instruction, rather
@@ -82,233 +84,235 @@ typedef struct midgard_branch {
  * emitted before the register allocation pass.
  */
 
-#define MIR_SRC_COUNT 4
+#define MIR_SRC_COUNT      4
 #define MIR_VEC_COMPONENTS 16
 
 typedef struct midgard_instruction {
-        /* Must be first for casting */
-        struct list_head link;
+   /* Must be first for casting */
+   struct list_head link;
 
-        unsigned type; /* ALU, load/store, texture */
+   unsigned type; /* ALU, load/store, texture */
 
-        /* Instruction arguments represented as block-local SSA
-         * indices, rather than registers. ~0 means unused. */
-        unsigned src[MIR_SRC_COUNT];
-        unsigned dest;
+   /* Instruction arguments represented as block-local SSA
+    * indices, rather than registers. ~0 means unused. */
+   unsigned src[MIR_SRC_COUNT];
+   unsigned dest;
 
-        /* vec16 swizzle, unpacked, per source */
-        unsigned swizzle[MIR_SRC_COUNT][MIR_VEC_COMPONENTS];
+   /* vec16 swizzle, unpacked, per source */
+   unsigned swizzle[MIR_SRC_COUNT][MIR_VEC_COMPONENTS];
 
-        /* Types! */
-        nir_alu_type src_types[MIR_SRC_COUNT];
-        nir_alu_type dest_type;
+   /* Types! */
+   nir_alu_type src_types[MIR_SRC_COUNT];
+   nir_alu_type dest_type;
 
-        /* Packing ops have non-32-bit dest types even though they functionally
-         * work at the 32-bit level, use this as a signal to disable copyprop.
-         * We maybe need synthetic pack ops instead. */
-        bool is_pack;
+   /* Packing ops have non-32-bit dest types even though they functionally
+    * work at the 32-bit level, use this as a signal to disable copyprop.
+    * We maybe need synthetic pack ops instead. */
+   bool is_pack;
 
-        /* Modifiers, depending on type */
-        union {
-                struct {
-                        bool src_abs[MIR_SRC_COUNT];
-                        bool src_neg[MIR_SRC_COUNT];
-                };
+   /* Modifiers, depending on type */
+   union {
+      struct {
+         bool src_abs[MIR_SRC_COUNT];
+         bool src_neg[MIR_SRC_COUNT];
+      };
 
-                struct {
-                        bool src_shift[MIR_SRC_COUNT];
-                };
-        };
+      struct {
+         bool src_shift[MIR_SRC_COUNT];
+      };
+   };
 
-        /* Out of the union for csel (could maybe be fixed..) */
-        bool src_invert[MIR_SRC_COUNT];
+   /* Out of the union for csel (could maybe be fixed..) */
+   bool src_invert[MIR_SRC_COUNT];
 
-        /* If the op supports it */
-        enum midgard_roundmode roundmode;
+   /* If the op supports it */
+   enum midgard_roundmode roundmode;
 
-        /* For textures: should helpers execute this instruction (instead of
-         * just helping with derivatives)? Should helpers terminate after? */
-        bool helper_terminate;
-        bool helper_execute;
+   /* For textures: should helpers execute this instruction (instead of
+    * just helping with derivatives)? Should helpers terminate after? */
+   bool helper_terminate;
+   bool helper_execute;
 
-        /* I.e. (1 << alu_bit) */
-        int unit;
+   /* I.e. (1 << alu_bit) */
+   int unit;
 
-        bool has_constants;
-        midgard_constants constants;
-        uint16_t inline_constant;
-        bool has_inline_constant;
+   bool has_constants;
+   midgard_constants constants;
+   uint16_t inline_constant;
+   bool has_inline_constant;
 
-        bool compact_branch;
-        uint8_t writeout;
-        bool last_writeout;
+   bool compact_branch;
+   uint8_t writeout;
+   bool last_writeout;
 
-        /* Masks in a saneish format. One bit per channel, not packed fancy.
-         * Use this instead of the op specific ones, and switch over at emit
-         * time */
+   /* Masks in a saneish format. One bit per channel, not packed fancy.
+    * Use this instead of the op specific ones, and switch over at emit
+    * time */
 
-        uint16_t mask;
+   uint16_t mask;
 
-        /* Hint for the register allocator not to spill the destination written
-         * from this instruction (because it is a spill/unspill node itself).
-         * Bitmask of spilled classes */
+   /* Hint for the register allocator not to spill the destination written
+    * from this instruction (because it is a spill/unspill node itself).
+    * Bitmask of spilled classes */
 
-        unsigned no_spill;
+   unsigned no_spill;
 
-        /* Generic hint for intra-pass use */
-        bool hint;
+   /* Generic hint for intra-pass use */
+   bool hint;
 
-        /* During scheduling, the backwards dependency graph
-         * (DAG). nr_dependencies is the number of unscheduled
-         * instructions that must still be scheduled after
-         * (before) this instruction. dependents are which
-         * instructions need to be scheduled before (after) this
-         * instruction. */
+   /* During scheduling, the backwards dependency graph
+    * (DAG). nr_dependencies is the number of unscheduled
+    * instructions that must still be scheduled after
+    * (before) this instruction. dependents are which
+    * instructions need to be scheduled before (after) this
+    * instruction. */
 
-        unsigned nr_dependencies;
-        BITSET_WORD *dependents;
+   unsigned nr_dependencies;
+   BITSET_WORD *dependents;
 
-        /* Use this in conjunction with `type` */
-        unsigned op;
+   /* Use this in conjunction with `type` */
+   unsigned op;
 
-        /* This refers to midgard_outmod_float or midgard_outmod_int.
-         * In case of a ALU op, use midgard_is_integer_out_op() to know which
-         * one is used.
-         * If it's a texture op, it's always midgard_outmod_float. */
-        unsigned outmod;
+   /* This refers to midgard_outmod_float or midgard_outmod_int.
+    * In case of a ALU op, use midgard_is_integer_out_op() to know which
+    * one is used.
+    * If it's a texture op, it's always midgard_outmod_float. */
+   unsigned outmod;
 
-        union {
-                midgard_load_store_word load_store;
-                midgard_texture_word texture;
+   union {
+      midgard_load_store_word load_store;
+      midgard_texture_word texture;
 
-                midgard_branch branch;
-        };
+      midgard_branch branch;
+   };
 
-        unsigned bundle_id;
+   unsigned bundle_id;
 } midgard_instruction;
 
 typedef struct midgard_block {
-        pan_block base;
+   pan_block base;
 
-        bool scheduled;
+   bool scheduled;
 
-        /* List of midgard_bundles emitted (after the scheduler has run) */
-        struct util_dynarray bundles;
+   /* List of midgard_bundles emitted (after the scheduler has run) */
+   struct util_dynarray bundles;
 
-        /* Number of quadwords _actually_ emitted, as determined after scheduling */
-        unsigned quadword_count;
+   /* Number of quadwords _actually_ emitted, as determined after scheduling */
+   unsigned quadword_count;
 
-        /* Indicates this is a fixed-function fragment epilogue block */
-        bool epilogue;
+   /* Indicates this is a fixed-function fragment epilogue block */
+   bool epilogue;
 
-        /* Are helper invocations required by this block? */
-        bool helpers_in;
+   /* Are helper invocations required by this block? */
+   bool helpers_in;
 } midgard_block;
 
 typedef struct midgard_bundle {
-        /* Tag for the overall bundle */
-        int tag;
+   /* Tag for the overall bundle */
+   int tag;
 
-        /* Instructions contained by the bundle. instruction_count <= 6 (vmul,
-         * sadd, vadd, smul, vlut, branch) */
-        int instruction_count;
-        midgard_instruction *instructions[6];
+   /* Instructions contained by the bundle. instruction_count <= 6 (vmul,
+    * sadd, vadd, smul, vlut, branch) */
+   int instruction_count;
+   midgard_instruction *instructions[6];
 
-        /* Bundle-wide ALU configuration */
-        int padding;
-        int control;
-        bool has_embedded_constants;
-        midgard_constants constants;
-        bool last_writeout;
+   /* Bundle-wide ALU configuration */
+   int padding;
+   int control;
+   bool has_embedded_constants;
+   midgard_constants constants;
+   bool last_writeout;
 } midgard_bundle;
 
 enum midgard_rt_id {
-        MIDGARD_COLOR_RT0 = 0,
-        MIDGARD_COLOR_RT1,
-        MIDGARD_COLOR_RT2,
-        MIDGARD_COLOR_RT3,
-        MIDGARD_COLOR_RT4,
-        MIDGARD_COLOR_RT5,
-        MIDGARD_COLOR_RT6,
-        MIDGARD_COLOR_RT7,
-        MIDGARD_ZS_RT,
-        MIDGARD_NUM_RTS,
+   MIDGARD_COLOR_RT0 = 0,
+   MIDGARD_COLOR_RT1,
+   MIDGARD_COLOR_RT2,
+   MIDGARD_COLOR_RT3,
+   MIDGARD_COLOR_RT4,
+   MIDGARD_COLOR_RT5,
+   MIDGARD_COLOR_RT6,
+   MIDGARD_COLOR_RT7,
+   MIDGARD_ZS_RT,
+   MIDGARD_NUM_RTS,
 };
 
 #define MIDGARD_MAX_SAMPLE_ITER 16
 
 typedef struct compiler_context {
-        const struct panfrost_compile_inputs *inputs;
-        nir_shader *nir;
-        struct pan_shader_info *info;
-        gl_shader_stage stage;
+   const struct panfrost_compile_inputs *inputs;
+   nir_shader *nir;
+   struct pan_shader_info *info;
+   gl_shader_stage stage;
 
-        /* Number of samples for a keyed blend shader. Depends on is_blend */
-        unsigned blend_sample_iterations;
+   /* Number of samples for a keyed blend shader. Depends on is_blend */
+   unsigned blend_sample_iterations;
 
-        /* Index to precolour to r0 for an input blend colour */
-        unsigned blend_input;
+   /* Index to precolour to r0 for an input blend colour */
+   unsigned blend_input;
 
-        /* Index to precolour to r2 for a dual-source blend colour */
-        unsigned blend_src1;
+   /* Index to precolour to r2 for a dual-source blend colour */
+   unsigned blend_src1;
 
-        /* Count of spills and fills for shaderdb */
-        unsigned spills;
-        unsigned fills;
+   /* Count of spills and fills for shaderdb */
+   unsigned spills;
+   unsigned fills;
 
-        /* Current NIR function */
-        nir_function *func;
+   /* Current NIR function */
+   nir_function *func;
 
-        /* Allocated compiler temporary counter */
-        unsigned temp_alloc;
+   /* Allocated compiler temporary counter */
+   unsigned temp_alloc;
 
-        /* Unordered list of midgard_blocks */
-        int block_count;
-        struct list_head blocks;
+   /* Unordered list of midgard_blocks */
+   int block_count;
+   struct list_head blocks;
 
-        /* TODO merge with block_count? */
-        unsigned block_source_count;
+   /* TODO merge with block_count? */
+   unsigned block_source_count;
 
-        /* List of midgard_instructions emitted for the current block */
-        midgard_block *current_block;
+   /* List of midgard_instructions emitted for the current block */
+   midgard_block *current_block;
 
-        /* If there is a preset after block, use this, otherwise emit_block will create one if NULL */
-        midgard_block *after_block;
+   /* If there is a preset after block, use this, otherwise emit_block will
+    * create one if NULL */
+   midgard_block *after_block;
 
-        /* The current "depth" of the loop, for disambiguating breaks/continues
-         * when using nested loops */
-        int current_loop_depth;
+   /* The current "depth" of the loop, for disambiguating breaks/continues
+    * when using nested loops */
+   int current_loop_depth;
 
-        /* Total number of loops for shader-db */
-        unsigned loop_count;
+   /* Total number of loops for shader-db */
+   unsigned loop_count;
 
-        /* Constants which have been loaded, for later inlining */
-        struct hash_table_u64 *ssa_constants;
+   /* Constants which have been loaded, for later inlining */
+   struct hash_table_u64 *ssa_constants;
 
-        int temp_count;
-        int max_hash;
+   int temp_count;
+   int max_hash;
 
-        /* Set of NIR indices that were already emitted as outmods */
-        BITSET_WORD *already_emitted;
+   /* Set of NIR indices that were already emitted as outmods */
+   BITSET_WORD *already_emitted;
 
-        /* Count of instructions emitted from NIR overall, across all blocks */
-        int instruction_count;
+   /* Count of instructions emitted from NIR overall, across all blocks */
+   int instruction_count;
 
-        unsigned quadword_count;
+   unsigned quadword_count;
 
-        /* Bitmask of valid metadata */
-        unsigned metadata;
+   /* Bitmask of valid metadata */
+   unsigned metadata;
 
-        /* Model-specific quirk set */
-        uint32_t quirks;
+   /* Model-specific quirk set */
+   uint32_t quirks;
 
-        /* Writeout instructions for each render target */
-        midgard_instruction *writeout_branch[MIDGARD_NUM_RTS][MIDGARD_MAX_SAMPLE_ITER];
+   /* Writeout instructions for each render target */
+   midgard_instruction
+      *writeout_branch[MIDGARD_NUM_RTS][MIDGARD_MAX_SAMPLE_ITER];
 
-        struct hash_table_u64 *sysval_to_id;
+   struct hash_table_u64 *sysval_to_id;
 
-        /* Mask of UBOs that need to be uploaded */
-        uint32_t ubo_mask;
+   /* Mask of UBOs that need to be uploaded */
+   uint32_t ubo_mask;
 } compiler_context;
 
 /* Per-block live_in/live_out */
@@ -321,17 +325,18 @@ typedef struct compiler_context {
 static inline midgard_instruction *
 mir_upload_ins(struct compiler_context *ctx, struct midgard_instruction ins)
 {
-        midgard_instruction *heap = ralloc(ctx, struct midgard_instruction);
-        memcpy(heap, &ins, sizeof(ins));
-        return heap;
+   midgard_instruction *heap = ralloc(ctx, struct midgard_instruction);
+   memcpy(heap, &ins, sizeof(ins));
+   return heap;
 }
 
 static inline midgard_instruction *
-emit_mir_instruction(struct compiler_context *ctx, struct midgard_instruction ins)
+emit_mir_instruction(struct compiler_context *ctx,
+                     struct midgard_instruction ins)
 {
-        midgard_instruction *u = mir_upload_ins(ctx, ins);
-        list_addtail(&u->link, &ctx->current_block->base.instructions);
-        return u;
+   midgard_instruction *u = mir_upload_ins(ctx, ins);
+   list_addtail(&u->link, &ctx->current_block->base.instructions);
+   return u;
 }
 
 static inline struct midgard_instruction *
@@ -339,165 +344,174 @@ mir_insert_instruction_before(struct compiler_context *ctx,
                               struct midgard_instruction *tag,
                               struct midgard_instruction ins)
 {
-        struct midgard_instruction *u = mir_upload_ins(ctx, ins);
-        list_addtail(&u->link, &tag->link);
-        return u;
+   struct midgard_instruction *u = mir_upload_ins(ctx, ins);
+   list_addtail(&u->link, &tag->link);
+   return u;
 }
 
 static inline void
 mir_remove_instruction(struct midgard_instruction *ins)
 {
-        list_del(&ins->link);
+   list_del(&ins->link);
 }
 
-static inline midgard_instruction*
+static inline midgard_instruction *
 mir_prev_op(struct midgard_instruction *ins)
 {
-        return list_last_entry(&(ins->link), midgard_instruction, link);
+   return list_last_entry(&(ins->link), midgard_instruction, link);
 }
 
-static inline midgard_instruction*
+static inline midgard_instruction *
 mir_next_op(struct midgard_instruction *ins)
 {
-        return list_first_entry(&(ins->link), midgard_instruction, link);
+   return list_first_entry(&(ins->link), midgard_instruction, link);
 }
 
-#define mir_foreach_block(ctx, v) \
-        list_for_each_entry(pan_block, v, &ctx->blocks, link)
+#define mir_foreach_block(ctx, v)                                              \
+   list_for_each_entry(pan_block, v, &ctx->blocks, link)
 
-#define mir_foreach_block_from(ctx, from, v) \
-        list_for_each_entry_from(pan_block, v, &from->base, &ctx->blocks, link)
+#define mir_foreach_block_from(ctx, from, v)                                   \
+   list_for_each_entry_from(pan_block, v, &from->base, &ctx->blocks, link)
 
-#define mir_foreach_instr_in_block(block, v) \
-        list_for_each_entry(struct midgard_instruction, v, &block->base.instructions, link)
-#define mir_foreach_instr_in_block_rev(block, v) \
-        list_for_each_entry_rev(struct midgard_instruction, v, &block->base.instructions, link)
+#define mir_foreach_instr_in_block(block, v)                                   \
+   list_for_each_entry(struct midgard_instruction, v,                          \
+                       &block->base.instructions, link)
+#define mir_foreach_instr_in_block_rev(block, v)                               \
+   list_for_each_entry_rev(struct midgard_instruction, v,                      \
+                           &block->base.instructions, link)
 
-#define mir_foreach_instr_in_block_safe(block, v) \
-        list_for_each_entry_safe(struct midgard_instruction, v, &block->base.instructions, link)
+#define mir_foreach_instr_in_block_safe(block, v)                              \
+   list_for_each_entry_safe(struct midgard_instruction, v,                     \
+                            &block->base.instructions, link)
 
-#define mir_foreach_instr_in_block_safe_rev(block, v) \
-        list_for_each_entry_safe_rev(struct midgard_instruction, v, &block->base.instructions, link)
+#define mir_foreach_instr_in_block_safe_rev(block, v)                          \
+   list_for_each_entry_safe_rev(struct midgard_instruction, v,                 \
+                                &block->base.instructions, link)
 
-#define mir_foreach_instr_in_block_from(block, v, from) \
-        list_for_each_entry_from(struct midgard_instruction, v, from, &block->base.instructions, link)
+#define mir_foreach_instr_in_block_from(block, v, from)                        \
+   list_for_each_entry_from(struct midgard_instruction, v, from,               \
+                            &block->base.instructions, link)
 
-#define mir_foreach_instr_in_block_from_rev(block, v, from) \
-        list_for_each_entry_from_rev(struct midgard_instruction, v, from, &block->base.instructions, link)
+#define mir_foreach_instr_in_block_from_rev(block, v, from)                    \
+   list_for_each_entry_from_rev(struct midgard_instruction, v, from,           \
+                                &block->base.instructions, link)
 
-#define mir_foreach_bundle_in_block(block, v) \
-        util_dynarray_foreach(&block->bundles, midgard_bundle, v)
+#define mir_foreach_bundle_in_block(block, v)                                  \
+   util_dynarray_foreach(&block->bundles, midgard_bundle, v)
 
-#define mir_foreach_bundle_in_block_rev(block, v) \
-        util_dynarray_foreach_reverse(&block->bundles, midgard_bundle, v)
+#define mir_foreach_bundle_in_block_rev(block, v)                              \
+   util_dynarray_foreach_reverse(&block->bundles, midgard_bundle, v)
 
-#define mir_foreach_instr_in_block_scheduled_rev(block, v) \
-        midgard_instruction* v; \
-        signed i = 0; \
-        mir_foreach_bundle_in_block_rev(block, _bundle) \
-                for (i = (_bundle->instruction_count - 1), v = _bundle->instructions[i]; \
-                                i >= 0; \
-                                --i, v = (i >= 0) ? _bundle->instructions[i] : NULL) \
+#define mir_foreach_instr_in_block_scheduled_rev(block, v)                     \
+   midgard_instruction *v;                                                     \
+   signed i = 0;                                                               \
+   mir_foreach_bundle_in_block_rev(block, _bundle)                             \
+      for (i = (_bundle->instruction_count - 1), v = _bundle->instructions[i]; \
+           i >= 0; --i, v = (i >= 0) ? _bundle->instructions[i] : NULL)
 
-#define mir_foreach_instr_global(ctx, v) \
-        mir_foreach_block(ctx, v_block) \
-                mir_foreach_instr_in_block(((midgard_block *) v_block), v)
+#define mir_foreach_instr_global(ctx, v)                                       \
+   mir_foreach_block(ctx, v_block)                                             \
+      mir_foreach_instr_in_block(((midgard_block *)v_block), v)
 
-#define mir_foreach_instr_global_safe(ctx, v) \
-        mir_foreach_block(ctx, v_block) \
-                mir_foreach_instr_in_block_safe(((midgard_block *) v_block), v)
+#define mir_foreach_instr_global_safe(ctx, v)                                  \
+   mir_foreach_block(ctx, v_block)                                             \
+      mir_foreach_instr_in_block_safe(((midgard_block *)v_block), v)
 
 /* Based on set_foreach, expanded with automatic type casts */
 
-#define mir_foreach_predecessor(blk, v) \
-        struct set_entry *_entry_##v; \
-        struct midgard_block *v; \
-        for (_entry_##v = _mesa_set_next_entry(blk->base.predecessors, NULL), \
-                v = (struct midgard_block *) (_entry_##v ? _entry_##v->key : NULL);  \
-                _entry_##v != NULL; \
-                _entry_##v = _mesa_set_next_entry(blk->base.predecessors, _entry_##v), \
-                v = (struct midgard_block *) (_entry_##v ? _entry_##v->key : NULL))
+#define mir_foreach_predecessor(blk, v)                                        \
+   struct set_entry *_entry_##v;                                               \
+   struct midgard_block *v;                                                    \
+   for (_entry_##v = _mesa_set_next_entry(blk->base.predecessors, NULL),       \
+       v = (struct midgard_block *)(_entry_##v ? _entry_##v->key : NULL);      \
+        _entry_##v != NULL;                                                    \
+        _entry_##v = _mesa_set_next_entry(blk->base.predecessors, _entry_##v), \
+       v = (struct midgard_block *)(_entry_##v ? _entry_##v->key : NULL))
 
-#define mir_foreach_src(ins, v) \
-        for (unsigned v = 0; v < ARRAY_SIZE(ins->src); ++v)
+#define mir_foreach_src(ins, v)                                                \
+   for (unsigned v = 0; v < ARRAY_SIZE(ins->src); ++v)
 
 static inline midgard_instruction *
 mir_last_in_block(struct midgard_block *block)
 {
-        return list_last_entry(&block->base.instructions, struct midgard_instruction, link);
+   return list_last_entry(&block->base.instructions, struct midgard_instruction,
+                          link);
 }
 
 static inline midgard_block *
 mir_get_block(compiler_context *ctx, int idx)
 {
-        struct list_head *lst = &ctx->blocks;
+   struct list_head *lst = &ctx->blocks;
 
-        while ((idx--) + 1)
-                lst = lst->next;
+   while ((idx--) + 1)
+      lst = lst->next;
 
-        return (struct midgard_block *) lst;
+   return (struct midgard_block *)lst;
 }
 
 static inline bool
 mir_is_alu_bundle(midgard_bundle *bundle)
 {
-        return IS_ALU(bundle->tag);
+   return IS_ALU(bundle->tag);
 }
 
 static inline unsigned
 make_compiler_temp(compiler_context *ctx)
 {
-        return (ctx->func->impl->ssa_alloc + ctx->temp_alloc++) << 1;
+   return (ctx->func->impl->ssa_alloc + ctx->temp_alloc++) << 1;
 }
 
 static inline unsigned
 make_compiler_temp_reg(compiler_context *ctx)
 {
-        return ((ctx->func->impl->reg_alloc + ctx->temp_alloc++) << 1) | PAN_IS_REG;
+   return ((ctx->func->impl->reg_alloc + ctx->temp_alloc++) << 1) | PAN_IS_REG;
 }
 
 static inline unsigned
 nir_ssa_index(nir_ssa_def *ssa)
 {
-        return (ssa->index << 1) | 0;
+   return (ssa->index << 1) | 0;
 }
 
 static inline unsigned
 nir_src_index(compiler_context *ctx, nir_src *src)
 {
-        if (src->is_ssa)
-                return nir_ssa_index(src->ssa);
-        else {
-                assert(!src->reg.indirect);
-                return (src->reg.reg->index << 1) | PAN_IS_REG;
-        }
+   if (src->is_ssa)
+      return nir_ssa_index(src->ssa);
+   else {
+      assert(!src->reg.indirect);
+      return (src->reg.reg->index << 1) | PAN_IS_REG;
+   }
 }
 
 static inline unsigned
 nir_dest_index(nir_dest *dst)
 {
-        if (dst->is_ssa)
-                return (dst->ssa.index << 1) | 0;
-        else {
-                assert(!dst->reg.indirect);
-                return (dst->reg.reg->index << 1) | PAN_IS_REG;
-        }
+   if (dst->is_ssa)
+      return (dst->ssa.index << 1) | 0;
+   else {
+      assert(!dst->reg.indirect);
+      return (dst->reg.reg->index << 1) | PAN_IS_REG;
+   }
 }
 
-
-
 /* MIR manipulation */
 
 void mir_rewrite_index(compiler_context *ctx, unsigned old, unsigned new);
 void mir_rewrite_index_src(compiler_context *ctx, unsigned old, unsigned new);
 void mir_rewrite_index_dst(compiler_context *ctx, unsigned old, unsigned new);
-void mir_rewrite_index_dst_single(midgard_instruction *ins, unsigned old, unsigned new);
-void mir_rewrite_index_src_single(midgard_instruction *ins, unsigned old, unsigned new);
-void mir_rewrite_index_src_swizzle(compiler_context *ctx, unsigned old, unsigned new, unsigned *swizzle);
+void mir_rewrite_index_dst_single(midgard_instruction *ins, unsigned old,
+                                  unsigned new);
+void mir_rewrite_index_src_single(midgard_instruction *ins, unsigned old,
+                                  unsigned new);
+void mir_rewrite_index_src_swizzle(compiler_context *ctx, unsigned old,
+                                   unsigned new, unsigned *swizzle);
 bool mir_single_use(compiler_context *ctx, unsigned value);
 unsigned mir_use_count(compiler_context *ctx, unsigned value);
-uint16_t mir_bytemask_of_read_components(midgard_instruction *ins, unsigned node);
-uint16_t mir_bytemask_of_read_components_index(midgard_instruction *ins, unsigned i);
+uint16_t mir_bytemask_of_read_components(midgard_instruction *ins,
+                                         unsigned node);
+uint16_t mir_bytemask_of_read_components_index(midgard_instruction *ins,
+                                               unsigned i);
 uint16_t mir_from_bytemask(uint16_t bytemask, unsigned bits);
 uint16_t mir_bytemask(midgard_instruction *ins);
 uint16_t mir_round_bytemask_up(uint16_t mask, unsigned bits);
@@ -513,19 +527,25 @@ void mir_print_instruction(midgard_instruction *ins);
 void mir_print_bundle(midgard_bundle *ctx);
 void mir_print_block(midgard_block *block);
 void mir_print_shader(compiler_context *ctx);
-bool mir_nontrivial_mod(midgard_instruction *ins, unsigned i, bool check_swizzle);
+bool mir_nontrivial_mod(midgard_instruction *ins, unsigned i,
+                        bool check_swizzle);
 bool mir_nontrivial_outmod(midgard_instruction *ins);
 
-midgard_instruction *mir_insert_instruction_before_scheduled(compiler_context *ctx, midgard_block *block, midgard_instruction *tag, midgard_instruction ins);
-midgard_instruction *mir_insert_instruction_after_scheduled(compiler_context *ctx, midgard_block *block, midgard_instruction *tag, midgard_instruction ins);
+midgard_instruction *mir_insert_instruction_before_scheduled(
+   compiler_context *ctx, midgard_block *block, midgard_instruction *tag,
+   midgard_instruction ins);
+midgard_instruction *mir_insert_instruction_after_scheduled(
+   compiler_context *ctx, midgard_block *block, midgard_instruction *tag,
+   midgard_instruction ins);
 void mir_flip(midgard_instruction *ins);
 void mir_compute_temp_count(compiler_context *ctx);
 
-#define LDST_GLOBAL (REGISTER_LDST_ZERO << 2)
-#define LDST_SHARED ((REGISTER_LDST_LOCAL_STORAGE_PTR << 2) | COMPONENT_Z)
+#define LDST_GLOBAL  (REGISTER_LDST_ZERO << 2)
+#define LDST_SHARED  ((REGISTER_LDST_LOCAL_STORAGE_PTR << 2) | COMPONENT_Z)
 #define LDST_SCRATCH ((REGISTER_LDST_PC_SP << 2) | COMPONENT_Z)
 
-void mir_set_offset(compiler_context *ctx, midgard_instruction *ins, nir_src *offset, unsigned seg);
+void mir_set_offset(compiler_context *ctx, midgard_instruction *ins,
+                    nir_src *offset, unsigned seg);
 void mir_set_ubo_offset(midgard_instruction *ins, nir_src *src, unsigned bias);
 
 /* 'Intrinsic' move for aliasing */
@@ -533,93 +553,91 @@ void mir_set_ubo_offset(midgard_instruction *ins, nir_src *src, unsigned bias);
 static inline midgard_instruction
 v_mov(unsigned src, unsigned dest)
 {
-        midgard_instruction ins = {
-                .type = TAG_ALU_4,
-                .mask = 0xF,
-                .src = { ~0, src, ~0, ~0 },
-                .src_types = { 0, nir_type_uint32 },
-                .swizzle = SWIZZLE_IDENTITY,
-                .dest = dest,
-                .dest_type = nir_type_uint32,
-                .op = midgard_alu_op_imov,
-                .outmod = midgard_outmod_keeplo,
-        };
+   midgard_instruction ins = {
+      .type = TAG_ALU_4,
+      .mask = 0xF,
+      .src = {~0, src, ~0, ~0},
+      .src_types = {0, nir_type_uint32},
+      .swizzle = SWIZZLE_IDENTITY,
+      .dest = dest,
+      .dest_type = nir_type_uint32,
+      .op = midgard_alu_op_imov,
+      .outmod = midgard_outmod_keeplo,
+   };
 
-        return ins;
+   return ins;
 }
 
 /* Broad types of register classes so we can handle special
  * registers */
 
-#define REG_CLASS_WORK          0
-#define REG_CLASS_LDST          1
-#define REG_CLASS_TEXR          3
-#define REG_CLASS_TEXW          4
+#define REG_CLASS_WORK 0
+#define REG_CLASS_LDST 1
+#define REG_CLASS_TEXR 3
+#define REG_CLASS_TEXW 4
 
 /* Like a move, but to thread local storage! */
 
 static inline midgard_instruction
-v_load_store_scratch(
-                unsigned srcdest,
-                unsigned index,
-                bool is_store,
-                unsigned mask)
+v_load_store_scratch(unsigned srcdest, unsigned index, bool is_store,
+                     unsigned mask)
 {
-        /* We index by 32-bit vec4s */
-        unsigned byte = (index * 4 * 4);
+   /* We index by 32-bit vec4s */
+   unsigned byte = (index * 4 * 4);
 
-        midgard_instruction ins = {
-                .type = TAG_LOAD_STORE_4,
-                .mask = mask,
-                .dest_type = nir_type_uint32,
-                .dest = ~0,
-                .src = { ~0, ~0, ~0, ~0 },
-                .swizzle = SWIZZLE_IDENTITY_4,
-                .op = is_store ? midgard_op_st_128 : midgard_op_ld_128,
-                .load_store = {
-                        /* For register spilling - to thread local storage */
-                        .arg_reg = REGISTER_LDST_LOCAL_STORAGE_PTR,
-                        .arg_comp = COMPONENT_X,
-                        .bitsize_toggle = true,
-                        .index_format = midgard_index_address_u32,
-                        .index_reg = REGISTER_LDST_ZERO,
-                },
+   midgard_instruction ins = {
+      .type = TAG_LOAD_STORE_4,
+      .mask = mask,
+      .dest_type = nir_type_uint32,
+      .dest = ~0,
+      .src = {~0, ~0, ~0, ~0},
+      .swizzle = SWIZZLE_IDENTITY_4,
+      .op = is_store ? midgard_op_st_128 : midgard_op_ld_128,
+      .load_store =
+         {
+            /* For register spilling - to thread local storage */
+            .arg_reg = REGISTER_LDST_LOCAL_STORAGE_PTR,
+            .arg_comp = COMPONENT_X,
+            .bitsize_toggle = true,
+            .index_format = midgard_index_address_u32,
+            .index_reg = REGISTER_LDST_ZERO,
+         },
 
-                /* If we spill an unspill, RA goes into an infinite loop */
-                .no_spill = (1 << REG_CLASS_WORK),
-        };
+      /* If we spill an unspill, RA goes into an infinite loop */
+      .no_spill = (1 << REG_CLASS_WORK),
+   };
 
-        ins.constants.u32[0] = byte;
+   ins.constants.u32[0] = byte;
 
-        if (is_store) {
-                ins.src[0] = srcdest;
-                ins.src_types[0] = nir_type_uint32;
+   if (is_store) {
+      ins.src[0] = srcdest;
+      ins.src_types[0] = nir_type_uint32;
 
-                /* Ensure we are tightly swizzled so liveness analysis is
-                 * correct */
+      /* Ensure we are tightly swizzled so liveness analysis is
+       * correct */
 
-                for (unsigned i = 0; i < 4; ++i) {
-                        if (!(mask & (1 << i)))
-                                ins.swizzle[0][i] = COMPONENT_X;
-                }
-        } else
-                ins.dest = srcdest;
+      for (unsigned i = 0; i < 4; ++i) {
+         if (!(mask & (1 << i)))
+            ins.swizzle[0][i] = COMPONENT_X;
+      }
+   } else
+      ins.dest = srcdest;
 
-        return ins;
+   return ins;
 }
 
 static inline bool
 mir_has_arg(midgard_instruction *ins, unsigned arg)
 {
-        if (!ins)
-                return false;
+   if (!ins)
+      return false;
 
-        mir_foreach_src(ins, i) {
-                if (ins->src[i] == arg)
-                        return true;
-        }
+   mir_foreach_src(ins, i) {
+      if (ins->src[i] == arg)
+         return true;
+   }
 
-        return false;
+   return false;
 }
 
 /* Scheduling */
@@ -629,19 +647,19 @@ void midgard_schedule_program(compiler_context *ctx);
 void mir_ra(compiler_context *ctx);
 void mir_squeeze_index(compiler_context *ctx);
 void mir_lower_special_reads(compiler_context *ctx);
-void mir_liveness_ins_update(uint16_t *live, midgard_instruction *ins, unsigned max);
+void mir_liveness_ins_update(uint16_t *live, midgard_instruction *ins,
+                             unsigned max);
 void mir_compute_liveness(compiler_context *ctx);
 void mir_invalidate_liveness(compiler_context *ctx);
-bool mir_is_live_after(compiler_context *ctx, midgard_block *block, midgard_instruction *start, int src);
+bool mir_is_live_after(compiler_context *ctx, midgard_block *block,
+                       midgard_instruction *start, int src);
 
 void mir_create_pipeline_registers(compiler_context *ctx);
 void midgard_promote_uniforms(compiler_context *ctx);
 
-void
-midgard_emit_derivatives(compiler_context *ctx, nir_alu_instr *instr);
+void midgard_emit_derivatives(compiler_context *ctx, nir_alu_instr *instr);
 
-void
-midgard_lower_derivatives(compiler_context *ctx, midgard_block *block);
+void midgard_lower_derivatives(compiler_context *ctx, midgard_block *block);
 
 bool mir_op_computes_derivatives(gl_shader_stage stage, unsigned op);
 
@@ -650,25 +668,26 @@ void mir_analyze_helper_requirements(compiler_context *ctx);
 
 /* Final emission */
 
-void emit_binary_bundle(
-        compiler_context *ctx,
-        midgard_block *block,
-        midgard_bundle *bundle,
-        struct util_dynarray *emission,
-        int next_tag);
+void emit_binary_bundle(compiler_context *ctx, midgard_block *block,
+                        midgard_bundle *bundle, struct util_dynarray *emission,
+                        int next_tag);
 
 bool nir_fuse_io_16(nir_shader *shader);
 
 bool midgard_nir_lod_errata(nir_shader *shader);
 
-unsigned midgard_get_first_tag_from_block(compiler_context *ctx, unsigned block_idx);
+unsigned midgard_get_first_tag_from_block(compiler_context *ctx,
+                                          unsigned block_idx);
 
 /* Optimizations */
 
 bool midgard_opt_copy_prop(compiler_context *ctx, midgard_block *block);
-bool midgard_opt_combine_projection(compiler_context *ctx, midgard_block *block);
-bool midgard_opt_varying_projection(compiler_context *ctx, midgard_block *block);
+bool midgard_opt_combine_projection(compiler_context *ctx,
+                                    midgard_block *block);
+bool midgard_opt_varying_projection(compiler_context *ctx,
+                                    midgard_block *block);
 bool midgard_opt_dead_code_eliminate(compiler_context *ctx);
-bool midgard_opt_dead_move_eliminate(compiler_context *ctx, midgard_block *block);
+bool midgard_opt_dead_move_eliminate(compiler_context *ctx,
+                                     midgard_block *block);
 
 #endif
diff --git a/src/panfrost/midgard/disassemble.c b/src/panfrost/midgard/disassemble.c
index 490834d0c5c..4a2cab60d92 100644
--- a/src/panfrost/midgard/disassemble.c
+++ b/src/panfrost/midgard/disassemble.c
@@ -25,319 +25,317 @@
  * THE SOFTWARE.
  */
 
-#include <stdio.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <inttypes.h>
-#include <ctype.h>
-#include <string.h>
-#include "midgard.h"
-#include "midgard_ops.h"
-#include "midgard_quirks.h"
 #include "disassemble.h"
-#include "helpers.h"
+#include <assert.h>
+#include <ctype.h>
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
 #include "util/bitscan.h"
 #include "util/half_float.h"
 #include "util/u_math.h"
+#include "helpers.h"
+#include "midgard.h"
+#include "midgard_ops.h"
+#include "midgard_quirks.h"
 
-#define DEFINE_CASE(define, str) case define: { fprintf(fp, str); break; }
+#define DEFINE_CASE(define, str)                                               \
+   case define: {                                                              \
+      fprintf(fp, str);                                                        \
+      break;                                                                   \
+   }
 
 /* These are not mapped to hardware values, they just represent the possible
  * implicit arg modifiers that some midgard opcodes have, which can be decoded
  * from the opcodes via midgard_{alu,ldst,tex}_special_arg_mod() */
 typedef enum {
-        midgard_arg_mod_none = 0,
-        midgard_arg_mod_inv,
-        midgard_arg_mod_x2,
+   midgard_arg_mod_none = 0,
+   midgard_arg_mod_inv,
+   midgard_arg_mod_x2,
 } midgard_special_arg_mod;
 
 typedef struct {
-        unsigned *midg_tags;
+   unsigned *midg_tags;
 
-        /* For static analysis to ensure all registers are written at least once before
-         * use along the source code path (TODO: does this break done for complex CF?)
-         */
+   /* For static analysis to ensure all registers are written at least once
+    * before use along the source code path (TODO: does this break done for
+    * complex CF?)
+    */
 
-        uint16_t midg_ever_written;
+   uint16_t midg_ever_written;
 } disassemble_context;
 
 /* Transform an expanded writemask (duplicated 8-bit format) into its condensed
  * form (one bit per component) */
 
 static inline unsigned
-condense_writemask(unsigned expanded_mask,
-                   unsigned bits_per_component)
+condense_writemask(unsigned expanded_mask, unsigned bits_per_component)
 {
-        if (bits_per_component == 8) {
-                /* Duplicate every bit to go from 8 to 16-channel wrmask */
-                unsigned omask = 0;
+   if (bits_per_component == 8) {
+      /* Duplicate every bit to go from 8 to 16-channel wrmask */
+      unsigned omask = 0;
 
-                for (unsigned i = 0; i < 8; ++i) {
-                        if (expanded_mask & (1 << i))
-                                omask |= (3 << (2 * i));
-                }
+      for (unsigned i = 0; i < 8; ++i) {
+         if (expanded_mask & (1 << i))
+            omask |= (3 << (2 * i));
+      }
 
-                return omask;
-        }
+      return omask;
+   }
 
-        unsigned slots_per_component = bits_per_component / 16;
-        unsigned max_comp = (16 * 8) / bits_per_component;
-        unsigned condensed_mask = 0;
+   unsigned slots_per_component = bits_per_component / 16;
+   unsigned max_comp = (16 * 8) / bits_per_component;
+   unsigned condensed_mask = 0;
 
-        for (unsigned i = 0; i < max_comp; i++) {
-                if (expanded_mask & (1 << (i * slots_per_component)))
-                        condensed_mask |= (1 << i);
-        }
+   for (unsigned i = 0; i < max_comp; i++) {
+      if (expanded_mask & (1 << (i * slots_per_component)))
+         condensed_mask |= (1 << i);
+   }
 
-        return condensed_mask;
+   return condensed_mask;
 }
 
 static bool
 print_alu_opcode(FILE *fp, midgard_alu_op op)
 {
-        if (alu_opcode_props[op].name)
-                fprintf(fp, "%s", alu_opcode_props[op].name);
-        else
-                fprintf(fp, "alu_op_%02X", op);
+   if (alu_opcode_props[op].name)
+      fprintf(fp, "%s", alu_opcode_props[op].name);
+   else
+      fprintf(fp, "alu_op_%02X", op);
 
-        /* For constant analysis */
-        return midgard_is_integer_op(op);
+   /* For constant analysis */
+   return midgard_is_integer_op(op);
 }
 
 static void
 print_ld_st_opcode(FILE *fp, midgard_load_store_op op)
 {
-        if (load_store_opcode_props[op].name)
-                fprintf(fp, "%s", load_store_opcode_props[op].name);
-        else
-                fprintf(fp, "ldst_op_%02X", op);
+   if (load_store_opcode_props[op].name)
+      fprintf(fp, "%s", load_store_opcode_props[op].name);
+   else
+      fprintf(fp, "ldst_op_%02X", op);
 }
 
 static void
-validate_sampler_type(enum mali_texture_op op, enum mali_sampler_type sampler_type)
+validate_sampler_type(enum mali_texture_op op,
+                      enum mali_sampler_type sampler_type)
 {
-        if (op == midgard_tex_op_mov || op == midgard_tex_op_barrier)
-                assert(sampler_type == 0);
-        else
-                assert(sampler_type > 0);
+   if (op == midgard_tex_op_mov || op == midgard_tex_op_barrier)
+      assert(sampler_type == 0);
+   else
+      assert(sampler_type > 0);
 }
 
 static void
 validate_expand_mode(midgard_src_expand_mode expand_mode,
                      midgard_reg_mode reg_mode)
 {
-        switch (expand_mode) {
-        case midgard_src_passthrough:
-                break;
+   switch (expand_mode) {
+   case midgard_src_passthrough:
+      break;
 
-        case midgard_src_rep_low:
-                assert(reg_mode == midgard_reg_mode_8 ||
-                       reg_mode == midgard_reg_mode_16);
-                break;
+   case midgard_src_rep_low:
+      assert(reg_mode == midgard_reg_mode_8 || reg_mode == midgard_reg_mode_16);
+      break;
 
-        case midgard_src_rep_high:
-                assert(reg_mode == midgard_reg_mode_8 ||
-                       reg_mode == midgard_reg_mode_16);
-                break;
+   case midgard_src_rep_high:
+      assert(reg_mode == midgard_reg_mode_8 || reg_mode == midgard_reg_mode_16);
+      break;
 
-        case midgard_src_swap:
-                assert(reg_mode == midgard_reg_mode_8 ||
-                       reg_mode == midgard_reg_mode_16);
-                break;
+   case midgard_src_swap:
+      assert(reg_mode == midgard_reg_mode_8 || reg_mode == midgard_reg_mode_16);
+      break;
 
-        case midgard_src_expand_low:
-                assert(reg_mode != midgard_reg_mode_8);
-                break;
+   case midgard_src_expand_low:
+      assert(reg_mode != midgard_reg_mode_8);
+      break;
 
-        case midgard_src_expand_high:
-                assert(reg_mode != midgard_reg_mode_8);
-                break;
+   case midgard_src_expand_high:
+      assert(reg_mode != midgard_reg_mode_8);
+      break;
 
-        case midgard_src_expand_low_swap:
-                assert(reg_mode == midgard_reg_mode_16);
-                break;
+   case midgard_src_expand_low_swap:
+      assert(reg_mode == midgard_reg_mode_16);
+      break;
 
-        case midgard_src_expand_high_swap:
-                assert(reg_mode == midgard_reg_mode_16);
-                break;
+   case midgard_src_expand_high_swap:
+      assert(reg_mode == midgard_reg_mode_16);
+      break;
 
-        default:
-                unreachable("Invalid expand mode");
-                break;
-        }
+   default:
+      unreachable("Invalid expand mode");
+      break;
+   }
 }
 
 static void
 print_alu_reg(disassemble_context *ctx, FILE *fp, unsigned reg, bool is_write)
 {
-        unsigned uniform_reg = 23 - reg;
-        bool is_uniform = false;
+   unsigned uniform_reg = 23 - reg;
+   bool is_uniform = false;
 
-        /* For r8-r15, it could be a work or uniform. We distinguish based on
-         * the fact work registers are ALWAYS written before use, but uniform
-         * registers are NEVER written before use. */
+   /* For r8-r15, it could be a work or uniform. We distinguish based on
+    * the fact work registers are ALWAYS written before use, but uniform
+    * registers are NEVER written before use. */
 
-        if ((reg >= 8 && reg < 16) && !(ctx->midg_ever_written & (1 << reg)))
-                is_uniform = true;
+   if ((reg >= 8 && reg < 16) && !(ctx->midg_ever_written & (1 << reg)))
+      is_uniform = true;
 
-        /* r16-r23 are always uniform */
+   /* r16-r23 are always uniform */
 
-        if (reg >= 16 && reg <= 23)
-                is_uniform = true;
+   if (reg >= 16 && reg <= 23)
+      is_uniform = true;
 
-        if (reg == REGISTER_UNUSED || reg == REGISTER_UNUSED + 1)
-                fprintf(fp, "TMP%u", reg - REGISTER_UNUSED);
-        else if (reg == REGISTER_TEXTURE_BASE || reg == REGISTER_TEXTURE_BASE + 1)
-                fprintf(fp, "%s%u", is_write ? "AT" : "TA",  reg - REGISTER_TEXTURE_BASE);
-        else if (reg == REGISTER_LDST_BASE || reg == REGISTER_LDST_BASE + 1)
-                fprintf(fp, "AL%u", reg - REGISTER_LDST_BASE);
-        else if (is_uniform)
-                fprintf(fp, "U%u", uniform_reg);
-        else if (reg == 31 && !is_write)
-                fprintf(fp, "PC_SP");
-        else
-                fprintf(fp, "R%u", reg);
+   if (reg == REGISTER_UNUSED || reg == REGISTER_UNUSED + 1)
+      fprintf(fp, "TMP%u", reg - REGISTER_UNUSED);
+   else if (reg == REGISTER_TEXTURE_BASE || reg == REGISTER_TEXTURE_BASE + 1)
+      fprintf(fp, "%s%u", is_write ? "AT" : "TA", reg - REGISTER_TEXTURE_BASE);
+   else if (reg == REGISTER_LDST_BASE || reg == REGISTER_LDST_BASE + 1)
+      fprintf(fp, "AL%u", reg - REGISTER_LDST_BASE);
+   else if (is_uniform)
+      fprintf(fp, "U%u", uniform_reg);
+   else if (reg == 31 && !is_write)
+      fprintf(fp, "PC_SP");
+   else
+      fprintf(fp, "R%u", reg);
 }
 
 static void
 print_ldst_write_reg(FILE *fp, unsigned reg)
 {
-        switch (reg) {
-        case 26:
-        case 27:
-                fprintf(fp, "AL%u", reg - REGISTER_LDST_BASE);
-                break;
-        case 28:
-        case 29:
-                fprintf(fp, "AT%u", reg - REGISTER_TEXTURE_BASE);
-                break;
-        case 31:
-                fprintf(fp, "PC_SP");
-                break;
-        default:
-                fprintf(fp, "R%d", reg);
-                break;
-        }
+   switch (reg) {
+   case 26:
+   case 27:
+      fprintf(fp, "AL%u", reg - REGISTER_LDST_BASE);
+      break;
+   case 28:
+   case 29:
+      fprintf(fp, "AT%u", reg - REGISTER_TEXTURE_BASE);
+      break;
+   case 31:
+      fprintf(fp, "PC_SP");
+      break;
+   default:
+      fprintf(fp, "R%d", reg);
+      break;
+   }
 }
 
 static void
 print_ldst_read_reg(FILE *fp, unsigned reg)
 {
-        switch (reg) {
-        case 0:
-        case 1:
-                fprintf(fp, "AL%u", reg);
-                break;
-        case 2:
-                fprintf(fp, "PC_SP");
-                break;
-        case 3:
-                fprintf(fp, "LOCAL_STORAGE_PTR");
-                break;
-        case 4:
-                fprintf(fp, "LOCAL_THREAD_ID");
-                break;
-        case 5:
-                fprintf(fp, "GROUP_ID");
-                break;
-        case 6:
-                fprintf(fp, "GLOBAL_THREAD_ID");
-                break;
-        case 7:
-                fprintf(fp, "0");
-                break;
-        default:
-                unreachable("Invalid load/store register read");
-        }
+   switch (reg) {
+   case 0:
+   case 1:
+      fprintf(fp, "AL%u", reg);
+      break;
+   case 2:
+      fprintf(fp, "PC_SP");
+      break;
+   case 3:
+      fprintf(fp, "LOCAL_STORAGE_PTR");
+      break;
+   case 4:
+      fprintf(fp, "LOCAL_THREAD_ID");
+      break;
+   case 5:
+      fprintf(fp, "GROUP_ID");
+      break;
+   case 6:
+      fprintf(fp, "GLOBAL_THREAD_ID");
+      break;
+   case 7:
+      fprintf(fp, "0");
+      break;
+   default:
+      unreachable("Invalid load/store register read");
+   }
 }
 
 static void
 print_tex_reg(FILE *fp, unsigned reg, bool is_write)
 {
-        char *str = is_write ? "TA" : "AT";
-        int select = reg & 1;
+   char *str = is_write ? "TA" : "AT";
+   int select = reg & 1;
 
-        switch (reg) {
-        case 0:
-        case 1:
-                fprintf(fp, "R%d", select);
-                break;
-        case 26:
-        case 27:
-                fprintf(fp, "AL%d", select);
-                break;
-        case 28:
-        case 29:
-                fprintf(fp, "%s%d", str, select);
-                break;
-        default:
-                unreachable("Invalid texture register");
-        }
+   switch (reg) {
+   case 0:
+   case 1:
+      fprintf(fp, "R%d", select);
+      break;
+   case 26:
+   case 27:
+      fprintf(fp, "AL%d", select);
+      break;
+   case 28:
+   case 29:
+      fprintf(fp, "%s%d", str, select);
+      break;
+   default:
+      unreachable("Invalid texture register");
+   }
 }
 
-
 static char *srcmod_names_int[4] = {
-        ".sext",
-        ".zext",
-        ".replicate",
-        ".lshift",
+   ".sext",
+   ".zext",
+   ".replicate",
+   ".lshift",
 };
 
 static char *argmod_names[3] = {
-        "",
-        ".inv",
-        ".x2",
+   "",
+   ".inv",
+   ".x2",
 };
 
-static char *index_format_names[4] = {
-        "",
-        ".u64",
-        ".u32",
-        ".s32"
-};
+static char *index_format_names[4] = {"", ".u64", ".u32", ".s32"};
 
 static void
 print_alu_outmod(FILE *fp, unsigned outmod, bool is_int, bool half)
 {
-        if (is_int && !half) {
-                assert(outmod == midgard_outmod_keeplo);
-                return;
-        }
+   if (is_int && !half) {
+      assert(outmod == midgard_outmod_keeplo);
+      return;
+   }
 
-        if (!is_int && half)
-                fprintf(fp, ".shrink");
+   if (!is_int && half)
+      fprintf(fp, ".shrink");
 
-        mir_print_outmod(fp, outmod, is_int);
+   mir_print_outmod(fp, outmod, is_int);
 }
 
 /* arg == 0 (dest), arg == 1 (src1), arg == 2 (src2) */
 static midgard_special_arg_mod
-midgard_alu_special_arg_mod(midgard_alu_op op, unsigned arg) {
-        midgard_special_arg_mod mod = midgard_arg_mod_none;
+midgard_alu_special_arg_mod(midgard_alu_op op, unsigned arg)
+{
+   midgard_special_arg_mod mod = midgard_arg_mod_none;
 
-        switch (op) {
-        case midgard_alu_op_ishladd:
-        case midgard_alu_op_ishlsub:
-                if (arg == 1) mod = midgard_arg_mod_x2;
-                break;
+   switch (op) {
+   case midgard_alu_op_ishladd:
+   case midgard_alu_op_ishlsub:
+      if (arg == 1)
+         mod = midgard_arg_mod_x2;
+      break;
 
-        default:
-                break;
-        }
+   default:
+      break;
+   }
 
-        return mod;
+   return mod;
 }
 
 static void
 print_quad_word(FILE *fp, uint32_t *words, unsigned tabs)
 {
-        unsigned i;
+   unsigned i;
 
-        for (i = 0; i < 4; i++)
-                fprintf(fp, "0x%08X%s ", words[i], i == 3 ? "" : ",");
+   for (i = 0; i < 4; i++)
+      fprintf(fp, "0x%08X%s ", words[i], i == 3 ? "" : ",");
 
-        fprintf(fp, "\n");
+   fprintf(fp, "\n");
 }
 
 static const char components[16] = "xyzwefghijklmnop";
@@ -345,348 +343,346 @@ static const char components[16] = "xyzwefghijklmnop";
 static int
 bits_for_mode(midgard_reg_mode mode)
 {
-        switch (mode) {
-        case midgard_reg_mode_8:
-                return 8;
-        case midgard_reg_mode_16:
-                return 16;
-        case midgard_reg_mode_32:
-                return 32;
-        case midgard_reg_mode_64:
-                return 64;
-        default:
-                unreachable("Invalid reg mode");
-                return 0;
-        }
+   switch (mode) {
+   case midgard_reg_mode_8:
+      return 8;
+   case midgard_reg_mode_16:
+      return 16;
+   case midgard_reg_mode_32:
+      return 32;
+   case midgard_reg_mode_64:
+      return 64;
+   default:
+      unreachable("Invalid reg mode");
+      return 0;
+   }
 }
 
 static int
 bits_for_mode_halved(midgard_reg_mode mode, bool half)
 {
-        unsigned bits = bits_for_mode(mode);
+   unsigned bits = bits_for_mode(mode);
 
-        if (half)
-                bits >>= 1;
+   if (half)
+      bits >>= 1;
 
-        return bits;
+   return bits;
 }
 
 static void
-print_vec_selectors_64(FILE *fp, unsigned swizzle,
-                       midgard_reg_mode reg_mode,
+print_vec_selectors_64(FILE *fp, unsigned swizzle, midgard_reg_mode reg_mode,
                        midgard_src_expand_mode expand_mode,
                        unsigned selector_offset, uint8_t mask)
 {
-        bool expands = INPUT_EXPANDS(expand_mode);
+   bool expands = INPUT_EXPANDS(expand_mode);
 
-        unsigned comp_skip = expands ? 1 : 2;
-        unsigned mask_bit = 0;
-        for (unsigned i = selector_offset; i < 4; i += comp_skip, mask_bit += 4) {
-                if (!(mask & (1 << mask_bit))) continue;
+   unsigned comp_skip = expands ? 1 : 2;
+   unsigned mask_bit = 0;
+   for (unsigned i = selector_offset; i < 4; i += comp_skip, mask_bit += 4) {
+      if (!(mask & (1 << mask_bit)))
+         continue;
 
-                unsigned a = (swizzle >> (i * 2)) & 3;
+      unsigned a = (swizzle >> (i * 2)) & 3;
 
-                if (INPUT_EXPANDS(expand_mode)) {
-                        if (expand_mode == midgard_src_expand_high)
-                                a += 2;
+      if (INPUT_EXPANDS(expand_mode)) {
+         if (expand_mode == midgard_src_expand_high)
+            a += 2;
 
-                        fprintf(fp, "%c", components[a / 2]);
-                        continue;
-                }
+         fprintf(fp, "%c", components[a / 2]);
+         continue;
+      }
 
-                unsigned b = (swizzle >> ((i+1) * 2)) & 3;
+      unsigned b = (swizzle >> ((i + 1) * 2)) & 3;
 
-                /* Normally we're adjacent, but if there's an issue,
-                 * don't make it ambiguous */
+      /* Normally we're adjacent, but if there's an issue,
+       * don't make it ambiguous */
 
-                if (b == a + 1)
-                        fprintf(fp, "%c", a >> 1 ? 'Y' : 'X');
-                else
-                        fprintf(fp, "[%c%c]", components[a], components[b]);
-        }
+      if (b == a + 1)
+         fprintf(fp, "%c", a >> 1 ? 'Y' : 'X');
+      else
+         fprintf(fp, "[%c%c]", components[a], components[b]);
+   }
 }
 
 static void
-print_vec_selectors(FILE *fp, unsigned swizzle,
-                    midgard_reg_mode reg_mode,
+print_vec_selectors(FILE *fp, unsigned swizzle, midgard_reg_mode reg_mode,
                     unsigned selector_offset, uint8_t mask,
                     unsigned *mask_offset)
 {
-        assert(reg_mode != midgard_reg_mode_64);
+   assert(reg_mode != midgard_reg_mode_64);
 
-        unsigned mask_skip = MAX2(bits_for_mode(reg_mode) / 16, 1);
+   unsigned mask_skip = MAX2(bits_for_mode(reg_mode) / 16, 1);
 
-        bool is_vec16 = reg_mode == midgard_reg_mode_8;
+   bool is_vec16 = reg_mode == midgard_reg_mode_8;
 
-        for (unsigned i = 0; i < 4; i++, *mask_offset += mask_skip) {
-                if (!(mask & (1 << *mask_offset))) continue;
+   for (unsigned i = 0; i < 4; i++, *mask_offset += mask_skip) {
+      if (!(mask & (1 << *mask_offset)))
+         continue;
 
-                unsigned c = (swizzle >> (i * 2)) & 3;
+      unsigned c = (swizzle >> (i * 2)) & 3;
 
-                /* Vec16 has two components per swizzle selector. */
-                if (is_vec16)
-                        c *= 2;
+      /* Vec16 has two components per swizzle selector. */
+      if (is_vec16)
+         c *= 2;
 
-                c += selector_offset;
+      c += selector_offset;
 
-                fprintf(fp, "%c", components[c]);
-                if (is_vec16)
-                        fprintf(fp, "%c", components[c+1]);
-        }
+      fprintf(fp, "%c", components[c]);
+      if (is_vec16)
+         fprintf(fp, "%c", components[c + 1]);
+   }
 }
 
 static void
-print_vec_swizzle(FILE *fp, unsigned swizzle,
-                  midgard_src_expand_mode expand,
-                  midgard_reg_mode mode,
-                  uint8_t mask)
+print_vec_swizzle(FILE *fp, unsigned swizzle, midgard_src_expand_mode expand,
+                  midgard_reg_mode mode, uint8_t mask)
 {
-        unsigned bits = bits_for_mode_halved(mode, INPUT_EXPANDS(expand));
+   unsigned bits = bits_for_mode_halved(mode, INPUT_EXPANDS(expand));
 
-        /* Swizzle selectors are divided in two halves that are always
-         * mirrored, the only difference is the starting component offset.
-         * The number represents an offset into the components[] array. */
-        unsigned first_half = 0;
-        unsigned second_half = (128 / bits) / 2; /* only used for 8 and 16-bit */
+   /* Swizzle selectors are divided in two halves that are always
+    * mirrored, the only difference is the starting component offset.
+    * The number represents an offset into the components[] array. */
+   unsigned first_half = 0;
+   unsigned second_half = (128 / bits) / 2; /* only used for 8 and 16-bit */
 
-        switch (expand) {
-        case midgard_src_passthrough:
-                if (swizzle == 0xE4) return; /* identity swizzle */
-                break;
+   switch (expand) {
+   case midgard_src_passthrough:
+      if (swizzle == 0xE4)
+         return; /* identity swizzle */
+      break;
 
-        case midgard_src_expand_low:
-                second_half /= 2;
-                break;
+   case midgard_src_expand_low:
+      second_half /= 2;
+      break;
 
-        case midgard_src_expand_high:
-                first_half = second_half;
-                second_half += second_half / 2;
-                break;
+   case midgard_src_expand_high:
+      first_half = second_half;
+      second_half += second_half / 2;
+      break;
 
-        /* The rest of the cases are only used for 8 and 16-bit */
+      /* The rest of the cases are only used for 8 and 16-bit */
 
-        case midgard_src_rep_low:
-                second_half = 0;
-                break;
+   case midgard_src_rep_low:
+      second_half = 0;
+      break;
 
-        case midgard_src_rep_high:
-                first_half = second_half;
-                break;
+   case midgard_src_rep_high:
+      first_half = second_half;
+      break;
 
-        case midgard_src_swap:
-                first_half = second_half;
-                second_half = 0;
-                break;
+   case midgard_src_swap:
+      first_half = second_half;
+      second_half = 0;
+      break;
 
-        case midgard_src_expand_low_swap:
-                first_half = second_half / 2;
-                second_half = 0;
-                break;
+   case midgard_src_expand_low_swap:
+      first_half = second_half / 2;
+      second_half = 0;
+      break;
 
-        case midgard_src_expand_high_swap:
-                first_half = second_half + second_half / 2;
-                break;
+   case midgard_src_expand_high_swap:
+      first_half = second_half + second_half / 2;
+      break;
 
-        default:
-                unreachable("Invalid expand mode");
-                break;
-        }
+   default:
+      unreachable("Invalid expand mode");
+      break;
+   }
 
-        fprintf(fp, ".");
+   fprintf(fp, ".");
 
-        /* Vec2 are weird so we use a separate function to simplify things. */
-        if (mode == midgard_reg_mode_64) {
-                print_vec_selectors_64(fp, swizzle, mode, expand, first_half, mask);
-                return;
-        }
+   /* Vec2 are weird so we use a separate function to simplify things. */
+   if (mode == midgard_reg_mode_64) {
+      print_vec_selectors_64(fp, swizzle, mode, expand, first_half, mask);
+      return;
+   }
 
-        unsigned mask_offs = 0;
-        print_vec_selectors(fp, swizzle, mode, first_half, mask, &mask_offs);
-        if (mode == midgard_reg_mode_8 || mode == midgard_reg_mode_16)
-                print_vec_selectors(fp, swizzle, mode, second_half, mask, &mask_offs);
+   unsigned mask_offs = 0;
+   print_vec_selectors(fp, swizzle, mode, first_half, mask, &mask_offs);
+   if (mode == midgard_reg_mode_8 || mode == midgard_reg_mode_16)
+      print_vec_selectors(fp, swizzle, mode, second_half, mask, &mask_offs);
 }
 
 static void
 print_scalar_constant(FILE *fp, unsigned src_binary,
-                      const midgard_constants *consts,
-                      midgard_scalar_alu *alu)
+                      const midgard_constants *consts, midgard_scalar_alu *alu)
 {
-        midgard_scalar_alu_src *src = (midgard_scalar_alu_src *)&src_binary;
-        assert(consts != NULL);
+   midgard_scalar_alu_src *src = (midgard_scalar_alu_src *)&src_binary;
+   assert(consts != NULL);
 
-        fprintf(fp, "#");
-        mir_print_constant_component(fp, consts, src->component,
-                                     src->full ?
-                                     midgard_reg_mode_32 : midgard_reg_mode_16,
-                                     false, src->mod, alu->op);
+   fprintf(fp, "#");
+   mir_print_constant_component(
+      fp, consts, src->component,
+      src->full ? midgard_reg_mode_32 : midgard_reg_mode_16, false, src->mod,
+      alu->op);
 }
 
 static void
 print_vector_constants(FILE *fp, unsigned src_binary,
-                       const midgard_constants *consts,
-                       midgard_vector_alu *alu)
+                       const midgard_constants *consts, midgard_vector_alu *alu)
 {
-        midgard_vector_alu_src *src = (midgard_vector_alu_src *)&src_binary;
-        bool expands = INPUT_EXPANDS(src->expand_mode);
-        unsigned bits = bits_for_mode_halved(alu->reg_mode, expands);
-        unsigned max_comp = (sizeof(*consts) * 8) / bits;
-        unsigned comp_mask, num_comp = 0;
+   midgard_vector_alu_src *src = (midgard_vector_alu_src *)&src_binary;
+   bool expands = INPUT_EXPANDS(src->expand_mode);
+   unsigned bits = bits_for_mode_halved(alu->reg_mode, expands);
+   unsigned max_comp = (sizeof(*consts) * 8) / bits;
+   unsigned comp_mask, num_comp = 0;
 
-        assert(consts);
-        assert(max_comp <= 16);
+   assert(consts);
+   assert(max_comp <= 16);
 
-        comp_mask = effective_writemask(alu->op, condense_writemask(alu->mask, bits));
-        num_comp = util_bitcount(comp_mask);
+   comp_mask =
+      effective_writemask(alu->op, condense_writemask(alu->mask, bits));
+   num_comp = util_bitcount(comp_mask);
 
-        if (num_comp > 1)
-                fprintf(fp, "<");
-        else
-                fprintf(fp, "#");
+   if (num_comp > 1)
+      fprintf(fp, "<");
+   else
+      fprintf(fp, "#");
 
-        bool first = true;
+   bool first = true;
 
-	for (unsigned i = 0; i < max_comp; ++i) {
-                if (!(comp_mask & (1 << i))) continue;
+   for (unsigned i = 0; i < max_comp; ++i) {
+      if (!(comp_mask & (1 << i)))
+         continue;
 
-                unsigned c = (src->swizzle >> (i * 2)) & 3;
+      unsigned c = (src->swizzle >> (i * 2)) & 3;
 
-                if (bits == 16 && !expands) {
-                        bool upper = i >= 4;
+      if (bits == 16 && !expands) {
+         bool upper = i >= 4;
 
-                        switch (src->expand_mode) {
-                        case midgard_src_passthrough:
-                                c += upper * 4;
-                                break;
-                        case midgard_src_rep_low:
-                                break;
-                        case midgard_src_rep_high:
-                                c += 4;
-                                break;
-                        case midgard_src_swap:
-                                c += !upper * 4;
-                                break;
-                        default:
-                                unreachable("invalid expand mode");
-                                break;
-                        }
-                } else if (bits == 32 && !expands) {
-                        /* Implicitly ok */
-                } else if (bits == 64 && !expands) {
-                        /* Implicitly ok */
-                } else if (bits == 8 && !expands) {
-                        bool upper = i >= 8;
+         switch (src->expand_mode) {
+         case midgard_src_passthrough:
+            c += upper * 4;
+            break;
+         case midgard_src_rep_low:
+            break;
+         case midgard_src_rep_high:
+            c += 4;
+            break;
+         case midgard_src_swap:
+            c += !upper * 4;
+            break;
+         default:
+            unreachable("invalid expand mode");
+            break;
+         }
+      } else if (bits == 32 && !expands) {
+         /* Implicitly ok */
+      } else if (bits == 64 && !expands) {
+         /* Implicitly ok */
+      } else if (bits == 8 && !expands) {
+         bool upper = i >= 8;
 
-                        unsigned index = (i >> 1) & 3;
-                        unsigned base = (src->swizzle >> (index * 2)) & 3;
-                        c = base * 2;
+         unsigned index = (i >> 1) & 3;
+         unsigned base = (src->swizzle >> (index * 2)) & 3;
+         c = base * 2;
 
-                        switch (src->expand_mode) {
-                        case midgard_src_passthrough:
-                                c += upper * 8;
-                                break;
-                        case midgard_src_rep_low:
-                                break;
-                        case midgard_src_rep_high:
-                                c += 8;
-                                break;
-                        case midgard_src_swap:
-                                c += !upper * 8;
-                                break;
-                        default:
-                                unreachable("invalid expand mode");
-                                break;
-                        }
+         switch (src->expand_mode) {
+         case midgard_src_passthrough:
+            c += upper * 8;
+            break;
+         case midgard_src_rep_low:
+            break;
+         case midgard_src_rep_high:
+            c += 8;
+            break;
+         case midgard_src_swap:
+            c += !upper * 8;
+            break;
+         default:
+            unreachable("invalid expand mode");
+            break;
+         }
 
-                        /* We work on twos, actually */
-                        if (i & 1)
-                                c++;
-                }
+         /* We work on twos, actually */
+         if (i & 1)
+            c++;
+      }
 
-                if (first)
-                        first = false;
-                else
-                        fprintf(fp, ", ");
+      if (first)
+         first = false;
+      else
+         fprintf(fp, ", ");
 
-                mir_print_constant_component(fp, consts, c, alu->reg_mode,
-                                             expands, src->mod, alu->op);
-        }
+      mir_print_constant_component(fp, consts, c, alu->reg_mode, expands,
+                                   src->mod, alu->op);
+   }
 
-        if (num_comp > 1)
-                fprintf(fp, ">");
+   if (num_comp > 1)
+      fprintf(fp, ">");
 }
 
 static void
 print_srcmod(FILE *fp, bool is_int, bool expands, unsigned mod, bool scalar)
 {
-        /* Modifiers change meaning depending on the op's context */
+   /* Modifiers change meaning depending on the op's context */
 
-        if (is_int) {
-                if (expands)
-                        fprintf(fp, "%s", srcmod_names_int[mod]);
-        } else {
-                if (mod & MIDGARD_FLOAT_MOD_ABS)
-                        fprintf(fp, ".abs");
-                if (mod & MIDGARD_FLOAT_MOD_NEG)
-                        fprintf(fp, ".neg");
-                if (expands)
-                        fprintf(fp, ".widen");
-        }
+   if (is_int) {
+      if (expands)
+         fprintf(fp, "%s", srcmod_names_int[mod]);
+   } else {
+      if (mod & MIDGARD_FLOAT_MOD_ABS)
+         fprintf(fp, ".abs");
+      if (mod & MIDGARD_FLOAT_MOD_NEG)
+         fprintf(fp, ".neg");
+      if (expands)
+         fprintf(fp, ".widen");
+   }
 }
 
 static void
 print_vector_src(disassemble_context *ctx, FILE *fp, unsigned src_binary,
                  midgard_reg_mode mode, unsigned reg,
-                 midgard_shrink_mode shrink_mode,
-                 uint8_t src_mask, bool is_int,
+                 midgard_shrink_mode shrink_mode, uint8_t src_mask, bool is_int,
                  midgard_special_arg_mod arg_mod)
 {
-        midgard_vector_alu_src *src = (midgard_vector_alu_src *)&src_binary;
+   midgard_vector_alu_src *src = (midgard_vector_alu_src *)&src_binary;
 
-        validate_expand_mode(src->expand_mode, mode);
+   validate_expand_mode(src->expand_mode, mode);
 
-        print_alu_reg(ctx, fp, reg, false);
+   print_alu_reg(ctx, fp, reg, false);
 
-        print_vec_swizzle(fp, src->swizzle, src->expand_mode, mode, src_mask);
+   print_vec_swizzle(fp, src->swizzle, src->expand_mode, mode, src_mask);
 
-        fprintf(fp, "%s", argmod_names[arg_mod]);
+   fprintf(fp, "%s", argmod_names[arg_mod]);
 
-        print_srcmod(fp, is_int, INPUT_EXPANDS(src->expand_mode), src->mod, false);
+   print_srcmod(fp, is_int, INPUT_EXPANDS(src->expand_mode), src->mod, false);
 }
 
 static uint16_t
 decode_vector_imm(unsigned src2_reg, unsigned imm)
 {
-        uint16_t ret;
-        ret = src2_reg << 11;
-        ret |= (imm & 0x7) << 8;
-        ret |= (imm >> 3) & 0xFF;
-        return ret;
+   uint16_t ret;
+   ret = src2_reg << 11;
+   ret |= (imm & 0x7) << 8;
+   ret |= (imm >> 3) & 0xFF;
+   return ret;
 }
 
 static void
 print_immediate(FILE *fp, uint16_t imm, bool is_instruction_int)
 {
-        if (is_instruction_int)
-                fprintf(fp, "#%u", imm);
-        else
-                fprintf(fp, "#%g", _mesa_half_to_float(imm));
+   if (is_instruction_int)
+      fprintf(fp, "#%u", imm);
+   else
+      fprintf(fp, "#%g", _mesa_half_to_float(imm));
 }
 
 static void
 update_dest(disassemble_context *ctx, unsigned reg)
 {
-        /* We should record writes as marking this as a work register. Store
-         * the max register in work_count; we'll add one at the end */
+   /* We should record writes as marking this as a work register. Store
+    * the max register in work_count; we'll add one at the end */
 
-        if (reg < 16)
-                ctx->midg_ever_written |= (1 << reg);
+   if (reg < 16)
+      ctx->midg_ever_written |= (1 << reg);
 }
 
 static void
 print_dest(disassemble_context *ctx, FILE *fp, unsigned reg)
 {
-        update_dest(ctx, reg);
-        print_alu_reg(ctx, fp, reg, true);
+   update_dest(ctx, reg);
+   print_alu_reg(ctx, fp, reg, true);
 }
 
 /* For 16-bit+ masks, we read off from the 8-bit mask field. For 16-bit (vec8),
@@ -697,84 +693,86 @@ print_dest(disassemble_context *ctx, FILE *fp, unsigned reg)
  * the mask to make it obvious what happened */
 
 static void
-print_alu_mask(FILE *fp, uint8_t mask, unsigned bits, midgard_shrink_mode shrink_mode)
+print_alu_mask(FILE *fp, uint8_t mask, unsigned bits,
+               midgard_shrink_mode shrink_mode)
 {
-        /* Skip 'complete' masks */
+   /* Skip 'complete' masks */
 
-        if (shrink_mode == midgard_shrink_mode_none && mask == 0xFF)
-                return;
+   if (shrink_mode == midgard_shrink_mode_none && mask == 0xFF)
+      return;
 
-        fprintf(fp, ".");
+   fprintf(fp, ".");
 
-        unsigned skip = MAX2(bits / 16, 1);
-        bool tripped = false;
+   unsigned skip = MAX2(bits / 16, 1);
+   bool tripped = false;
 
-        /* To apply an upper destination shrink_mode, we "shift" the alphabet.
-         * E.g. with an upper shrink_mode on 32-bit, instead of xyzw, print efgh.
-         * For upper 16-bit, instead of xyzwefgh, print ijklmnop */
+   /* To apply an upper destination shrink_mode, we "shift" the alphabet.
+    * E.g. with an upper shrink_mode on 32-bit, instead of xyzw, print efgh.
+    * For upper 16-bit, instead of xyzwefgh, print ijklmnop */
 
-        const char *alphabet = components;
+   const char *alphabet = components;
 
-        if (shrink_mode == midgard_shrink_mode_upper) {
-                assert(bits != 8);
-                alphabet += (128 / bits);
-        }
+   if (shrink_mode == midgard_shrink_mode_upper) {
+      assert(bits != 8);
+      alphabet += (128 / bits);
+   }
 
-        for (unsigned i = 0; i < 8; i += skip) {
-                bool a = (mask & (1 << i)) != 0;
+   for (unsigned i = 0; i < 8; i += skip) {
+      bool a = (mask & (1 << i)) != 0;
 
-                for (unsigned j = 1; j < skip; ++j) {
-                        bool dupe = (mask & (1 << (i + j))) != 0;
-                        tripped |= (dupe != a);
-                }
+      for (unsigned j = 1; j < skip; ++j) {
+         bool dupe = (mask & (1 << (i + j))) != 0;
+         tripped |= (dupe != a);
+      }
 
-                if (a) {
-                        /* TODO: handle shrinking from 16-bit */
-                        unsigned comp_idx = bits == 8 ? i * 2 : i;
-                        char c = alphabet[comp_idx / skip];
+      if (a) {
+         /* TODO: handle shrinking from 16-bit */
+         unsigned comp_idx = bits == 8 ? i * 2 : i;
+         char c = alphabet[comp_idx / skip];
 
-                        fprintf(fp, "%c", c);
-                        if (bits == 8)
-                                fprintf(fp, "%c", alphabet[comp_idx+1]);
-                }
-        }
+         fprintf(fp, "%c", c);
+         if (bits == 8)
+            fprintf(fp, "%c", alphabet[comp_idx + 1]);
+      }
+   }
 
-        if (tripped)
-                fprintf(fp, " /* %X */", mask);
+   if (tripped)
+      fprintf(fp, " /* %X */", mask);
 }
 
 /* TODO: 16-bit mode */
 static void
-print_ldst_mask(FILE *fp, unsigned mask, unsigned swizzle) {
-        fprintf(fp, ".");
+print_ldst_mask(FILE *fp, unsigned mask, unsigned swizzle)
+{
+   fprintf(fp, ".");
 
-        for (unsigned i = 0; i < 4; ++i) {
-                bool write = (mask & (1 << i)) != 0;
-                unsigned c = (swizzle >> (i * 2)) & 3;
-                /* We can't omit the swizzle here since many ldst ops have a
-                 * combined swizzle/writemask, and it would be ambiguous to not
-                 * print the masked-out components. */
-                fprintf(fp, "%c", write ? components[c] : '~');
-        }
+   for (unsigned i = 0; i < 4; ++i) {
+      bool write = (mask & (1 << i)) != 0;
+      unsigned c = (swizzle >> (i * 2)) & 3;
+      /* We can't omit the swizzle here since many ldst ops have a
+       * combined swizzle/writemask, and it would be ambiguous to not
+       * print the masked-out components. */
+      fprintf(fp, "%c", write ? components[c] : '~');
+   }
 }
 
 static void
 print_tex_mask(FILE *fp, unsigned mask, bool upper)
 {
-        if (mask == 0xF) {
-                if (upper)
-                        fprintf(fp, "'");
+   if (mask == 0xF) {
+      if (upper)
+         fprintf(fp, "'");
 
-                return;
-        }
+      return;
+   }
 
-        fprintf(fp, ".");
+   fprintf(fp, ".");
 
-        for (unsigned i = 0; i < 4; ++i) {
-                bool a = (mask & (1 << i)) != 0;
-                if (a)
-                        fprintf(fp, "%c", components[i + (upper ? 4 : 0)]);
-        }
+   for (unsigned i = 0; i < 4; ++i) {
+      bool a = (mask & (1 << i)) != 0;
+      if (a)
+         fprintf(fp, "%c", components[i + (upper ? 4 : 0)]);
+   }
 }
 
 static void
@@ -782,115 +780,120 @@ print_vector_field(disassemble_context *ctx, FILE *fp, const char *name,
                    uint16_t *words, uint16_t reg_word,
                    const midgard_constants *consts, unsigned tabs, bool verbose)
 {
-        midgard_reg_info *reg_info = (midgard_reg_info *)&reg_word;
-        midgard_vector_alu *alu_field = (midgard_vector_alu *) words;
-        midgard_reg_mode mode = alu_field->reg_mode;
-        midgard_alu_op op = alu_field->op;
-        unsigned shrink_mode = alu_field->shrink_mode;
-        bool is_int = midgard_is_integer_op(op);
-        bool is_int_out = midgard_is_integer_out_op(op);
+   midgard_reg_info *reg_info = (midgard_reg_info *)&reg_word;
+   midgard_vector_alu *alu_field = (midgard_vector_alu *)words;
+   midgard_reg_mode mode = alu_field->reg_mode;
+   midgard_alu_op op = alu_field->op;
+   unsigned shrink_mode = alu_field->shrink_mode;
+   bool is_int = midgard_is_integer_op(op);
+   bool is_int_out = midgard_is_integer_out_op(op);
 
-        if (verbose)
-                fprintf(fp, "%s.", name);
+   if (verbose)
+      fprintf(fp, "%s.", name);
 
-        bool is_instruction_int = print_alu_opcode(fp, alu_field->op);
+   bool is_instruction_int = print_alu_opcode(fp, alu_field->op);
 
-        /* Print lane width */
-        fprintf(fp, ".%c%d", is_int_out ? 'i' : 'f', bits_for_mode(mode));
+   /* Print lane width */
+   fprintf(fp, ".%c%d", is_int_out ? 'i' : 'f', bits_for_mode(mode));
 
-        fprintf(fp, " ");
+   fprintf(fp, " ");
 
-        /* Mask denoting status of 8-lanes */
-        uint8_t mask = alu_field->mask;
+   /* Mask denoting status of 8-lanes */
+   uint8_t mask = alu_field->mask;
 
-        /* First, print the destination */
-        print_dest(ctx, fp, reg_info->out_reg);
+   /* First, print the destination */
+   print_dest(ctx, fp, reg_info->out_reg);
 
-        if (shrink_mode != midgard_shrink_mode_none) {
-                bool shrinkable = (mode != midgard_reg_mode_8);
-                bool known = shrink_mode != 0x3; /* Unused value */
+   if (shrink_mode != midgard_shrink_mode_none) {
+      bool shrinkable = (mode != midgard_reg_mode_8);
+      bool known = shrink_mode != 0x3; /* Unused value */
 
-                if (!(shrinkable && known))
-                        fprintf(fp, "/* do%u */ ", shrink_mode);
-        }
+      if (!(shrinkable && known))
+         fprintf(fp, "/* do%u */ ", shrink_mode);
+   }
 
-        /* Instructions like fdot4 do *not* replicate, ensure the
-         * mask is of only a single component */
+   /* Instructions like fdot4 do *not* replicate, ensure the
+    * mask is of only a single component */
 
-        unsigned rep = GET_CHANNEL_COUNT(alu_opcode_props[op].props);
+   unsigned rep = GET_CHANNEL_COUNT(alu_opcode_props[op].props);
 
-        if (rep) {
-                unsigned comp_mask = condense_writemask(mask, bits_for_mode(mode));
-                unsigned num_comp = util_bitcount(comp_mask);
-                if (num_comp != 1)
-                        fprintf(fp, "/* err too many components */");
-        }
-        print_alu_mask(fp, mask, bits_for_mode(mode), shrink_mode);
+   if (rep) {
+      unsigned comp_mask = condense_writemask(mask, bits_for_mode(mode));
+      unsigned num_comp = util_bitcount(comp_mask);
+      if (num_comp != 1)
+         fprintf(fp, "/* err too many components */");
+   }
+   print_alu_mask(fp, mask, bits_for_mode(mode), shrink_mode);
 
-        /* Print output modifiers */
+   /* Print output modifiers */
 
-        print_alu_outmod(fp, alu_field->outmod, is_int_out, shrink_mode != midgard_shrink_mode_none);
+   print_alu_outmod(fp, alu_field->outmod, is_int_out,
+                    shrink_mode != midgard_shrink_mode_none);
 
-        /* Mask out unused components based on the writemask, but don't mask out
-         * components that are used for interlane instructions like fdot3. */
-        uint8_t src_mask =
-                rep ? expand_writemask(mask_of(rep), util_logbase2(128 / bits_for_mode(mode))) : mask;
+   /* Mask out unused components based on the writemask, but don't mask out
+    * components that are used for interlane instructions like fdot3. */
+   uint8_t src_mask =
+      rep ? expand_writemask(mask_of(rep),
+                             util_logbase2(128 / bits_for_mode(mode)))
+          : mask;
 
-        fprintf(fp, ", ");
+   fprintf(fp, ", ");
 
-        if (reg_info->src1_reg == REGISTER_CONSTANT)
-                print_vector_constants(fp, alu_field->src1, consts, alu_field);
-        else {
-                midgard_special_arg_mod argmod = midgard_alu_special_arg_mod(op, 1);
-                print_vector_src(ctx, fp, alu_field->src1, mode, reg_info->src1_reg,
-                                 shrink_mode, src_mask, is_int, argmod);
-        }
+   if (reg_info->src1_reg == REGISTER_CONSTANT)
+      print_vector_constants(fp, alu_field->src1, consts, alu_field);
+   else {
+      midgard_special_arg_mod argmod = midgard_alu_special_arg_mod(op, 1);
+      print_vector_src(ctx, fp, alu_field->src1, mode, reg_info->src1_reg,
+                       shrink_mode, src_mask, is_int, argmod);
+   }
 
-        fprintf(fp, ", ");
+   fprintf(fp, ", ");
 
-        if (reg_info->src2_imm) {
-                uint16_t imm = decode_vector_imm(reg_info->src2_reg, alu_field->src2 >> 2);
-                print_immediate(fp, imm, is_instruction_int);
-        } else if (reg_info->src2_reg == REGISTER_CONSTANT) {
-                print_vector_constants(fp, alu_field->src2, consts, alu_field);
-        } else {
-                midgard_special_arg_mod argmod = midgard_alu_special_arg_mod(op, 2);
-                print_vector_src(ctx, fp, alu_field->src2, mode, reg_info->src2_reg,
-                                 shrink_mode, src_mask, is_int, argmod);
-        }
+   if (reg_info->src2_imm) {
+      uint16_t imm =
+         decode_vector_imm(reg_info->src2_reg, alu_field->src2 >> 2);
+      print_immediate(fp, imm, is_instruction_int);
+   } else if (reg_info->src2_reg == REGISTER_CONSTANT) {
+      print_vector_constants(fp, alu_field->src2, consts, alu_field);
+   } else {
+      midgard_special_arg_mod argmod = midgard_alu_special_arg_mod(op, 2);
+      print_vector_src(ctx, fp, alu_field->src2, mode, reg_info->src2_reg,
+                       shrink_mode, src_mask, is_int, argmod);
+   }
 
-        fprintf(fp, "\n");
+   fprintf(fp, "\n");
 }
 
 static void
-print_scalar_src(disassemble_context *ctx, FILE *fp, bool is_int, unsigned src_binary, unsigned reg)
+print_scalar_src(disassemble_context *ctx, FILE *fp, bool is_int,
+                 unsigned src_binary, unsigned reg)
 {
-        midgard_scalar_alu_src *src = (midgard_scalar_alu_src *)&src_binary;
+   midgard_scalar_alu_src *src = (midgard_scalar_alu_src *)&src_binary;
 
-        print_alu_reg(ctx, fp, reg, false);
+   print_alu_reg(ctx, fp, reg, false);
 
-        unsigned c = src->component;
+   unsigned c = src->component;
 
-        if (src->full) {
-                assert((c & 1) == 0);
-                c >>= 1;
-        }
+   if (src->full) {
+      assert((c & 1) == 0);
+      c >>= 1;
+   }
 
-        fprintf(fp, ".%c", components[c]);
+   fprintf(fp, ".%c", components[c]);
 
-        print_srcmod(fp, is_int, !src->full, src->mod, true);
+   print_srcmod(fp, is_int, !src->full, src->mod, true);
 }
 
 static uint16_t
 decode_scalar_imm(unsigned src2_reg, unsigned imm)
 {
-        uint16_t ret;
-        ret = src2_reg << 11;
-        ret |= (imm & 3) << 9;
-        ret |= (imm & 4) << 6;
-        ret |= (imm & 0x38) << 2;
-        ret |= imm >> 6;
-        return ret;
+   uint16_t ret;
+   ret = src2_reg << 11;
+   ret |= (imm & 3) << 9;
+   ret |= (imm & 4) << 6;
+   ret |= (imm & 0x38) << 2;
+   ret |= imm >> 6;
+   return ret;
 }
 
 static void
@@ -898,241 +901,245 @@ print_scalar_field(disassemble_context *ctx, FILE *fp, const char *name,
                    uint16_t *words, uint16_t reg_word,
                    const midgard_constants *consts, unsigned tabs, bool verbose)
 {
-        midgard_reg_info *reg_info = (midgard_reg_info *)&reg_word;
-        midgard_scalar_alu *alu_field = (midgard_scalar_alu *) words;
-        bool is_int = midgard_is_integer_op(alu_field->op);
-        bool is_int_out = midgard_is_integer_out_op(alu_field->op);
-        bool full = alu_field->output_full;
+   midgard_reg_info *reg_info = (midgard_reg_info *)&reg_word;
+   midgard_scalar_alu *alu_field = (midgard_scalar_alu *)words;
+   bool is_int = midgard_is_integer_op(alu_field->op);
+   bool is_int_out = midgard_is_integer_out_op(alu_field->op);
+   bool full = alu_field->output_full;
 
-        if (alu_field->reserved)
-                fprintf(fp, "scalar ALU reserved bit set\n");
+   if (alu_field->reserved)
+      fprintf(fp, "scalar ALU reserved bit set\n");
 
-        if (verbose)
-                fprintf(fp, "%s.", name);
+   if (verbose)
+      fprintf(fp, "%s.", name);
 
-        bool is_instruction_int = print_alu_opcode(fp, alu_field->op);
+   bool is_instruction_int = print_alu_opcode(fp, alu_field->op);
 
-        /* Print lane width, in this case the lane width is always 32-bit, but
-         * we print it anyway to make it consistent with the other instructions. */
-        fprintf(fp, ".%c32", is_int_out ? 'i' : 'f');
+   /* Print lane width, in this case the lane width is always 32-bit, but
+    * we print it anyway to make it consistent with the other instructions. */
+   fprintf(fp, ".%c32", is_int_out ? 'i' : 'f');
 
-        fprintf(fp, " ");
+   fprintf(fp, " ");
 
-        print_dest(ctx, fp, reg_info->out_reg);
-        unsigned c = alu_field->output_component;
+   print_dest(ctx, fp, reg_info->out_reg);
+   unsigned c = alu_field->output_component;
 
-        if (full) {
-                assert((c & 1) == 0);
-                c >>= 1;
-        }
+   if (full) {
+      assert((c & 1) == 0);
+      c >>= 1;
+   }
 
-        fprintf(fp, ".%c", components[c]);
+   fprintf(fp, ".%c", components[c]);
 
-        print_alu_outmod(fp, alu_field->outmod, is_int_out, !full);
+   print_alu_outmod(fp, alu_field->outmod, is_int_out, !full);
 
-        fprintf(fp, ", ");
+   fprintf(fp, ", ");
 
-        if (reg_info->src1_reg == REGISTER_CONSTANT)
-                print_scalar_constant(fp, alu_field->src1, consts, alu_field);
-        else
-                print_scalar_src(ctx, fp, is_int, alu_field->src1, reg_info->src1_reg);
+   if (reg_info->src1_reg == REGISTER_CONSTANT)
+      print_scalar_constant(fp, alu_field->src1, consts, alu_field);
+   else
+      print_scalar_src(ctx, fp, is_int, alu_field->src1, reg_info->src1_reg);
 
-        fprintf(fp, ", ");
+   fprintf(fp, ", ");
 
-        if (reg_info->src2_imm) {
-                uint16_t imm = decode_scalar_imm(reg_info->src2_reg,
-                                                 alu_field->src2);
-                print_immediate(fp, imm, is_instruction_int);
-	} else if (reg_info->src2_reg == REGISTER_CONSTANT) {
-                print_scalar_constant(fp, alu_field->src2, consts, alu_field);
-        } else
-                print_scalar_src(ctx, fp, is_int, alu_field->src2, reg_info->src2_reg);
+   if (reg_info->src2_imm) {
+      uint16_t imm = decode_scalar_imm(reg_info->src2_reg, alu_field->src2);
+      print_immediate(fp, imm, is_instruction_int);
+   } else if (reg_info->src2_reg == REGISTER_CONSTANT) {
+      print_scalar_constant(fp, alu_field->src2, consts, alu_field);
+   } else
+      print_scalar_src(ctx, fp, is_int, alu_field->src2, reg_info->src2_reg);
 
-        fprintf(fp, "\n");
+   fprintf(fp, "\n");
 }
 
 static void
 print_branch_op(FILE *fp, unsigned op)
 {
-        switch (op) {
-        case midgard_jmp_writeout_op_branch_uncond:
-                fprintf(fp, "uncond.");
-                break;
+   switch (op) {
+   case midgard_jmp_writeout_op_branch_uncond:
+      fprintf(fp, "uncond.");
+      break;
 
-        case midgard_jmp_writeout_op_branch_cond:
-                fprintf(fp, "cond.");
-                break;
+   case midgard_jmp_writeout_op_branch_cond:
+      fprintf(fp, "cond.");
+      break;
 
-        case midgard_jmp_writeout_op_writeout:
-                fprintf(fp, "write.");
-                break;
+   case midgard_jmp_writeout_op_writeout:
+      fprintf(fp, "write.");
+      break;
 
-        case midgard_jmp_writeout_op_tilebuffer_pending:
-                fprintf(fp, "tilebuffer.");
-                break;
+   case midgard_jmp_writeout_op_tilebuffer_pending:
+      fprintf(fp, "tilebuffer.");
+      break;
 
-        case midgard_jmp_writeout_op_discard:
-                fprintf(fp, "discard.");
-                break;
+   case midgard_jmp_writeout_op_discard:
+      fprintf(fp, "discard.");
+      break;
 
-        default:
-                fprintf(fp, "unk%u.", op);
-                break;
-        }
+   default:
+      fprintf(fp, "unk%u.", op);
+      break;
+   }
 }
 
 static void
 print_branch_cond(FILE *fp, int cond)
 {
-        switch (cond) {
-        case midgard_condition_write0:
-                fprintf(fp, "write0");
-                break;
+   switch (cond) {
+   case midgard_condition_write0:
+      fprintf(fp, "write0");
+      break;
 
-        case midgard_condition_false:
-                fprintf(fp, "false");
-                break;
+   case midgard_condition_false:
+      fprintf(fp, "false");
+      break;
 
-        case midgard_condition_true:
-                fprintf(fp, "true");
-                break;
+   case midgard_condition_true:
+      fprintf(fp, "true");
+      break;
 
-        case midgard_condition_always:
-                fprintf(fp, "always");
-                break;
+   case midgard_condition_always:
+      fprintf(fp, "always");
+      break;
 
-        default:
-                fprintf(fp, "unk%X", cond);
-                break;
-        }
+   default:
+      fprintf(fp, "unk%X", cond);
+      break;
+   }
 }
 
 static const char *
 function_call_mode(enum midgard_call_mode mode)
 {
-        switch (mode) {
-        case midgard_call_mode_default: return "";
-        case midgard_call_mode_call: return ".call";
-        case midgard_call_mode_return: return ".return";
-        default: return ".reserved";
-        }
+   switch (mode) {
+   case midgard_call_mode_default:
+      return "";
+   case midgard_call_mode_call:
+      return ".call";
+   case midgard_call_mode_return:
+      return ".return";
+   default:
+      return ".reserved";
+   }
 }
 
 static bool
-print_compact_branch_writeout_field(disassemble_context *ctx, FILE *fp, uint16_t word)
+print_compact_branch_writeout_field(disassemble_context *ctx, FILE *fp,
+                                    uint16_t word)
 {
-        midgard_jmp_writeout_op op = word & 0x7;
+   midgard_jmp_writeout_op op = word & 0x7;
 
-        switch (op) {
-        case midgard_jmp_writeout_op_branch_uncond: {
-                midgard_branch_uncond br_uncond;
-                memcpy((char *) &br_uncond, (char *) &word, sizeof(br_uncond));
-                fprintf(fp, "br.uncond%s ", function_call_mode(br_uncond.call_mode));
+   switch (op) {
+   case midgard_jmp_writeout_op_branch_uncond: {
+      midgard_branch_uncond br_uncond;
+      memcpy((char *)&br_uncond, (char *)&word, sizeof(br_uncond));
+      fprintf(fp, "br.uncond%s ", function_call_mode(br_uncond.call_mode));
 
-                if (br_uncond.offset >= 0)
-                        fprintf(fp, "+");
+      if (br_uncond.offset >= 0)
+         fprintf(fp, "+");
 
-                fprintf(fp, "%d -> %s", br_uncond.offset,
-                                midgard_tag_props[br_uncond.dest_tag].name);
-                fprintf(fp, "\n");
+      fprintf(fp, "%d -> %s", br_uncond.offset,
+              midgard_tag_props[br_uncond.dest_tag].name);
+      fprintf(fp, "\n");
 
-                return br_uncond.offset >= 0;
-        }
+      return br_uncond.offset >= 0;
+   }
 
-        case midgard_jmp_writeout_op_branch_cond:
-        case midgard_jmp_writeout_op_writeout:
-        case midgard_jmp_writeout_op_discard:
-        default: {
-                midgard_branch_cond br_cond;
-                memcpy((char *) &br_cond, (char *) &word, sizeof(br_cond));
+   case midgard_jmp_writeout_op_branch_cond:
+   case midgard_jmp_writeout_op_writeout:
+   case midgard_jmp_writeout_op_discard:
+   default: {
+      midgard_branch_cond br_cond;
+      memcpy((char *)&br_cond, (char *)&word, sizeof(br_cond));
 
-                fprintf(fp, "br.");
+      fprintf(fp, "br.");
 
-                print_branch_op(fp, br_cond.op);
-                print_branch_cond(fp, br_cond.cond);
+      print_branch_op(fp, br_cond.op);
+      print_branch_cond(fp, br_cond.cond);
 
-                fprintf(fp, " ");
+      fprintf(fp, " ");
 
-                if (br_cond.offset >= 0)
-                        fprintf(fp, "+");
+      if (br_cond.offset >= 0)
+         fprintf(fp, "+");
 
-                fprintf(fp, "%d -> %s", br_cond.offset,
-                                midgard_tag_props[br_cond.dest_tag].name);
-                fprintf(fp, "\n");
+      fprintf(fp, "%d -> %s", br_cond.offset,
+              midgard_tag_props[br_cond.dest_tag].name);
+      fprintf(fp, "\n");
 
-                return br_cond.offset >= 0;
-        }
-        }
+      return br_cond.offset >= 0;
+   }
+   }
 
-        return false;
+   return false;
 }
 
 static bool
-print_extended_branch_writeout_field(disassemble_context *ctx, FILE *fp, uint8_t *words,
-                                     unsigned next)
+print_extended_branch_writeout_field(disassemble_context *ctx, FILE *fp,
+                                     uint8_t *words, unsigned next)
 {
-        midgard_branch_extended br;
-        memcpy((char *) &br, (char *) words, sizeof(br));
+   midgard_branch_extended br;
+   memcpy((char *)&br, (char *)words, sizeof(br));
 
-        fprintf(fp, "brx%s.", function_call_mode(br.call_mode));
+   fprintf(fp, "brx%s.", function_call_mode(br.call_mode));
 
-        print_branch_op(fp, br.op);
+   print_branch_op(fp, br.op);
 
-        /* Condition codes are a LUT in the general case, but simply repeated 8 times for single-channel conditions.. Check this. */
+   /* Condition codes are a LUT in the general case, but simply repeated 8 times
+    * for single-channel conditions.. Check this. */
 
-        bool single_channel = true;
+   bool single_channel = true;
 
-        for (unsigned i = 0; i < 16; i += 2) {
-                single_channel &= (((br.cond >> i) & 0x3) == (br.cond & 0x3));
-        }
+   for (unsigned i = 0; i < 16; i += 2) {
+      single_channel &= (((br.cond >> i) & 0x3) == (br.cond & 0x3));
+   }
 
-        if (single_channel)
-                print_branch_cond(fp, br.cond & 0x3);
-        else
-                fprintf(fp, "lut%X", br.cond);
+   if (single_channel)
+      print_branch_cond(fp, br.cond & 0x3);
+   else
+      fprintf(fp, "lut%X", br.cond);
 
-        fprintf(fp, " ");
+   fprintf(fp, " ");
 
-        if (br.offset >= 0)
-                fprintf(fp, "+");
+   if (br.offset >= 0)
+      fprintf(fp, "+");
 
-        fprintf(fp, "%d -> %s\n", br.offset,
-                        midgard_tag_props[br.dest_tag].name);
+   fprintf(fp, "%d -> %s\n", br.offset, midgard_tag_props[br.dest_tag].name);
 
-        unsigned I = next + br.offset * 4;
+   unsigned I = next + br.offset * 4;
 
-        if (ctx->midg_tags[I] && ctx->midg_tags[I] != br.dest_tag) {
-                fprintf(fp, "\t/* XXX TAG ERROR: jumping to %s but tagged %s \n",
-                        midgard_tag_props[br.dest_tag].name,
-                        midgard_tag_props[ctx->midg_tags[I]].name);
-        }
+   if (ctx->midg_tags[I] && ctx->midg_tags[I] != br.dest_tag) {
+      fprintf(fp, "\t/* XXX TAG ERROR: jumping to %s but tagged %s \n",
+              midgard_tag_props[br.dest_tag].name,
+              midgard_tag_props[ctx->midg_tags[I]].name);
+   }
 
-        ctx->midg_tags[I] = br.dest_tag;
+   ctx->midg_tags[I] = br.dest_tag;
 
-        return br.offset >= 0;
+   return br.offset >= 0;
 }
 
 static unsigned
 num_alu_fields_enabled(uint32_t control_word)
 {
-        unsigned ret = 0;
+   unsigned ret = 0;
 
-        if ((control_word >> 17) & 1)
-                ret++;
+   if ((control_word >> 17) & 1)
+      ret++;
 
-        if ((control_word >> 19) & 1)
-                ret++;
+   if ((control_word >> 19) & 1)
+      ret++;
 
-        if ((control_word >> 21) & 1)
-                ret++;
+   if ((control_word >> 21) & 1)
+      ret++;
 
-        if ((control_word >> 23) & 1)
-                ret++;
+   if ((control_word >> 23) & 1)
+      ret++;
 
-        if ((control_word >> 25) & 1)
-                ret++;
+   if ((control_word >> 25) & 1)
+      ret++;
 
-        return ret;
+   return ret;
 }
 
 static bool
@@ -1140,101 +1147,106 @@ print_alu_word(disassemble_context *ctx, FILE *fp, uint32_t *words,
                unsigned num_quad_words, unsigned tabs, unsigned next,
                bool verbose)
 {
-        uint32_t control_word = words[0];
-        uint16_t *beginning_ptr = (uint16_t *)(words + 1);
-        unsigned num_fields = num_alu_fields_enabled(control_word);
-        uint16_t *word_ptr = beginning_ptr + num_fields;
-        unsigned num_words = 2 + num_fields;
-        const midgard_constants *consts = NULL;
-        bool branch_forward = false;
+   uint32_t control_word = words[0];
+   uint16_t *beginning_ptr = (uint16_t *)(words + 1);
+   unsigned num_fields = num_alu_fields_enabled(control_word);
+   uint16_t *word_ptr = beginning_ptr + num_fields;
+   unsigned num_words = 2 + num_fields;
+   const midgard_constants *consts = NULL;
+   bool branch_forward = false;
 
-        if ((control_word >> 17) & 1)
-                num_words += 3;
+   if ((control_word >> 17) & 1)
+      num_words += 3;
 
-        if ((control_word >> 19) & 1)
-                num_words += 2;
+   if ((control_word >> 19) & 1)
+      num_words += 2;
 
-        if ((control_word >> 21) & 1)
-                num_words += 3;
+   if ((control_word >> 21) & 1)
+      num_words += 3;
 
-        if ((control_word >> 23) & 1)
-                num_words += 2;
+   if ((control_word >> 23) & 1)
+      num_words += 2;
 
-        if ((control_word >> 25) & 1)
-                num_words += 3;
+   if ((control_word >> 25) & 1)
+      num_words += 3;
 
-        if ((control_word >> 26) & 1)
-                num_words += 1;
+   if ((control_word >> 26) & 1)
+      num_words += 1;
 
-        if ((control_word >> 27) & 1)
-                num_words += 3;
+   if ((control_word >> 27) & 1)
+      num_words += 3;
 
-        if (num_quad_words > (num_words + 7) / 8) {
-                assert(num_quad_words == (num_words + 15) / 8);
-                //Assume that the extra quadword is constants
-                consts = (midgard_constants *)(words + (4 * num_quad_words - 4));
-        }
+   if (num_quad_words > (num_words + 7) / 8) {
+      assert(num_quad_words == (num_words + 15) / 8);
+      // Assume that the extra quadword is constants
+      consts = (midgard_constants *)(words + (4 * num_quad_words - 4));
+   }
 
-        if ((control_word >> 16) & 1)
-                fprintf(fp, "unknown bit 16 enabled\n");
+   if ((control_word >> 16) & 1)
+      fprintf(fp, "unknown bit 16 enabled\n");
 
-        if ((control_word >> 17) & 1) {
-                print_vector_field(ctx, fp, "vmul", word_ptr, *beginning_ptr, consts, tabs, verbose);
-                beginning_ptr += 1;
-                word_ptr += 3;
-        }
+   if ((control_word >> 17) & 1) {
+      print_vector_field(ctx, fp, "vmul", word_ptr, *beginning_ptr, consts,
+                         tabs, verbose);
+      beginning_ptr += 1;
+      word_ptr += 3;
+   }
 
-        if ((control_word >> 18) & 1)
-                fprintf(fp, "unknown bit 18 enabled\n");
+   if ((control_word >> 18) & 1)
+      fprintf(fp, "unknown bit 18 enabled\n");
 
-        if ((control_word >> 19) & 1) {
-                print_scalar_field(ctx, fp, "sadd", word_ptr, *beginning_ptr, consts, tabs, verbose);
-                beginning_ptr += 1;
-                word_ptr += 2;
-        }
+   if ((control_word >> 19) & 1) {
+      print_scalar_field(ctx, fp, "sadd", word_ptr, *beginning_ptr, consts,
+                         tabs, verbose);
+      beginning_ptr += 1;
+      word_ptr += 2;
+   }
 
-        if ((control_word >> 20) & 1)
-                fprintf(fp, "unknown bit 20 enabled\n");
+   if ((control_word >> 20) & 1)
+      fprintf(fp, "unknown bit 20 enabled\n");
 
-        if ((control_word >> 21) & 1) {
-                print_vector_field(ctx, fp, "vadd", word_ptr, *beginning_ptr, consts, tabs, verbose);
-                beginning_ptr += 1;
-                word_ptr += 3;
-        }
+   if ((control_word >> 21) & 1) {
+      print_vector_field(ctx, fp, "vadd", word_ptr, *beginning_ptr, consts,
+                         tabs, verbose);
+      beginning_ptr += 1;
+      word_ptr += 3;
+   }
 
-        if ((control_word >> 22) & 1)
-                fprintf(fp, "unknown bit 22 enabled\n");
+   if ((control_word >> 22) & 1)
+      fprintf(fp, "unknown bit 22 enabled\n");
 
-        if ((control_word >> 23) & 1) {
-                print_scalar_field(ctx, fp, "smul", word_ptr, *beginning_ptr, consts, tabs, verbose);
-                beginning_ptr += 1;
-                word_ptr += 2;
-        }
+   if ((control_word >> 23) & 1) {
+      print_scalar_field(ctx, fp, "smul", word_ptr, *beginning_ptr, consts,
+                         tabs, verbose);
+      beginning_ptr += 1;
+      word_ptr += 2;
+   }
 
-        if ((control_word >> 24) & 1)
-                fprintf(fp, "unknown bit 24 enabled\n");
+   if ((control_word >> 24) & 1)
+      fprintf(fp, "unknown bit 24 enabled\n");
 
-        if ((control_word >> 25) & 1) {
-                print_vector_field(ctx, fp, "lut", word_ptr, *beginning_ptr, consts, tabs, verbose);
-                word_ptr += 3;
-        }
+   if ((control_word >> 25) & 1) {
+      print_vector_field(ctx, fp, "lut", word_ptr, *beginning_ptr, consts, tabs,
+                         verbose);
+      word_ptr += 3;
+   }
 
-        if ((control_word >> 26) & 1) {
-                branch_forward |= print_compact_branch_writeout_field(ctx, fp, *word_ptr);
-                word_ptr += 1;
-        }
+   if ((control_word >> 26) & 1) {
+      branch_forward |= print_compact_branch_writeout_field(ctx, fp, *word_ptr);
+      word_ptr += 1;
+   }
 
-        if ((control_word >> 27) & 1) {
-                branch_forward |= print_extended_branch_writeout_field(ctx, fp, (uint8_t *) word_ptr, next);
-                word_ptr += 3;
-        }
+   if ((control_word >> 27) & 1) {
+      branch_forward |= print_extended_branch_writeout_field(
+         ctx, fp, (uint8_t *)word_ptr, next);
+      word_ptr += 3;
+   }
 
-        if (consts)
-                fprintf(fp, "uconstants 0x%X, 0x%X, 0x%X, 0x%X\n",
-                        consts->u32[0], consts->u32[1],
-                        consts->u32[2], consts->u32[3]);
+   if (consts)
+      fprintf(fp, "uconstants 0x%X, 0x%X, 0x%X, 0x%X\n", consts->u32[0],
+              consts->u32[1], consts->u32[2], consts->u32[3]);
 
-        return branch_forward;
+   return branch_forward;
 }
 
 /* TODO: how can we use this now that we know that these params can't be known
@@ -1242,349 +1254,346 @@ print_alu_word(disassemble_context *ctx, FILE *fp, uint32_t *words,
 UNUSED static void
 print_varying_parameters(FILE *fp, midgard_load_store_word *word)
 {
-        midgard_varying_params p = midgard_unpack_varying_params(*word);
+   midgard_varying_params p = midgard_unpack_varying_params(*word);
 
-        /* If a varying, there are qualifiers */
-        if (p.flat_shading)
-                fprintf(fp, ".flat");
+   /* If a varying, there are qualifiers */
+   if (p.flat_shading)
+      fprintf(fp, ".flat");
 
-        if (p.perspective_correction)
-                fprintf(fp, ".correction");
+   if (p.perspective_correction)
+      fprintf(fp, ".correction");
 
-        if (p.centroid_mapping)
-                fprintf(fp, ".centroid");
+   if (p.centroid_mapping)
+      fprintf(fp, ".centroid");
 
-        if (p.interpolate_sample)
-                fprintf(fp, ".sample");
+   if (p.interpolate_sample)
+      fprintf(fp, ".sample");
 
-        switch (p.modifier) {
-                case midgard_varying_mod_perspective_y:
-                        fprintf(fp, ".perspectivey");
-                        break;
-                case midgard_varying_mod_perspective_z:
-                        fprintf(fp, ".perspectivez");
-                        break;
-                case midgard_varying_mod_perspective_w:
-                        fprintf(fp, ".perspectivew");
-                        break;
-                default:
-                        unreachable("invalid varying modifier");
-                        break;
-        }
+   switch (p.modifier) {
+   case midgard_varying_mod_perspective_y:
+      fprintf(fp, ".perspectivey");
+      break;
+   case midgard_varying_mod_perspective_z:
+      fprintf(fp, ".perspectivez");
+      break;
+   case midgard_varying_mod_perspective_w:
+      fprintf(fp, ".perspectivew");
+      break;
+   default:
+      unreachable("invalid varying modifier");
+      break;
+   }
 }
 
 /* Helper to print integer well-formatted, but only when non-zero. */
 static void
 midgard_print_sint(FILE *fp, int n)
 {
-        if (n > 0)
-                fprintf(fp, " + 0x%X", n);
-        else if (n < 0)
-                fprintf(fp, " - 0x%X", -n);
+   if (n > 0)
+      fprintf(fp, " + 0x%X", n);
+   else if (n < 0)
+      fprintf(fp, " - 0x%X", -n);
 }
 
 static void
-print_load_store_instr(disassemble_context *ctx, FILE *fp, uint64_t data, bool verbose)
+print_load_store_instr(disassemble_context *ctx, FILE *fp, uint64_t data,
+                       bool verbose)
 {
-        midgard_load_store_word *word = (midgard_load_store_word *) &data;
+   midgard_load_store_word *word = (midgard_load_store_word *)&data;
 
-        print_ld_st_opcode(fp, word->op);
+   print_ld_st_opcode(fp, word->op);
 
-        if (word->op == midgard_op_trap) {
-                fprintf(fp, " 0x%X\n", word->signed_offset);
-                return;
-        }
+   if (word->op == midgard_op_trap) {
+      fprintf(fp, " 0x%X\n", word->signed_offset);
+      return;
+   }
 
-        /* Print opcode modifiers */
+   /* Print opcode modifiers */
 
-        if (OP_USES_ATTRIB(word->op)) {
-                /* Print non-default attribute tables */
-                bool default_secondary =
-                        (word->op == midgard_op_st_vary_32) ||
-                        (word->op == midgard_op_st_vary_16) ||
-                        (word->op == midgard_op_st_vary_32u) ||
-                        (word->op == midgard_op_st_vary_32i) ||
-                        (word->op == midgard_op_ld_vary_32) ||
-                        (word->op == midgard_op_ld_vary_16) ||
-                        (word->op == midgard_op_ld_vary_32u) ||
-                        (word->op == midgard_op_ld_vary_32i);
+   if (OP_USES_ATTRIB(word->op)) {
+      /* Print non-default attribute tables */
+      bool default_secondary = (word->op == midgard_op_st_vary_32) ||
+                               (word->op == midgard_op_st_vary_16) ||
+                               (word->op == midgard_op_st_vary_32u) ||
+                               (word->op == midgard_op_st_vary_32i) ||
+                               (word->op == midgard_op_ld_vary_32) ||
+                               (word->op == midgard_op_ld_vary_16) ||
+                               (word->op == midgard_op_ld_vary_32u) ||
+                               (word->op == midgard_op_ld_vary_32i);
 
-                bool default_primary =
-                        (word->op == midgard_op_ld_attr_32) ||
-                        (word->op == midgard_op_ld_attr_16) ||
-                        (word->op == midgard_op_ld_attr_32u) ||
-                        (word->op == midgard_op_ld_attr_32i);
+      bool default_primary = (word->op == midgard_op_ld_attr_32) ||
+                             (word->op == midgard_op_ld_attr_16) ||
+                             (word->op == midgard_op_ld_attr_32u) ||
+                             (word->op == midgard_op_ld_attr_32i);
 
-                bool has_default = (default_secondary || default_primary);
-                bool auto32 = (word->index_format >> 0) & 1;
-                bool is_secondary = (word->index_format >> 1) & 1;
+      bool has_default = (default_secondary || default_primary);
+      bool auto32 = (word->index_format >> 0) & 1;
+      bool is_secondary = (word->index_format >> 1) & 1;
 
-                if (auto32)
-                        fprintf(fp, ".a32");
+      if (auto32)
+         fprintf(fp, ".a32");
 
-                if (has_default && (is_secondary != default_secondary))
-                        fprintf(fp, ".%s", is_secondary ? "secondary" : "primary");
-        } else if (word->op == midgard_op_ld_cubemap_coords || OP_IS_PROJECTION(word->op))
-                fprintf(fp, ".%s", word->bitsize_toggle ? "f32" : "f16");
+      if (has_default && (is_secondary != default_secondary))
+         fprintf(fp, ".%s", is_secondary ? "secondary" : "primary");
+   } else if (word->op == midgard_op_ld_cubemap_coords ||
+              OP_IS_PROJECTION(word->op))
+      fprintf(fp, ".%s", word->bitsize_toggle ? "f32" : "f16");
 
-        fprintf(fp, " ");
+   fprintf(fp, " ");
 
-        /* src/dest register */
+   /* src/dest register */
 
-        if (!OP_IS_STORE(word->op)) {
-                print_ldst_write_reg(fp, word->reg);
+   if (!OP_IS_STORE(word->op)) {
+      print_ldst_write_reg(fp, word->reg);
 
-                /* Some opcodes don't have a swizzable src register, and
-                 * instead the swizzle is applied before the result is written
-                 * to the dest reg. For these ops, we combine the writemask
-                 * with the swizzle to display them in the disasm compactly. */
-                unsigned swizzle = word->swizzle;
-                if ((OP_IS_REG2REG_LDST(word->op) &&
-                        word->op != midgard_op_lea &&
-                        word->op != midgard_op_lea_image) || OP_IS_ATOMIC(word->op))
-                        swizzle = 0xE4;
-                print_ldst_mask(fp, word->mask, swizzle);
-        } else {
-                uint8_t mask =
-                        (word->mask & 0x1) |
-                        ((word->mask & 0x2) << 1) |
-                        ((word->mask & 0x4) << 2) |
-                        ((word->mask & 0x8) << 3);
-                mask |= mask << 1;
-                print_ldst_read_reg(fp, word->reg);
-                print_vec_swizzle(fp, word->swizzle, midgard_src_passthrough,
-                                  midgard_reg_mode_32, mask);
-        }
+      /* Some opcodes don't have a swizzable src register, and
+       * instead the swizzle is applied before the result is written
+       * to the dest reg. For these ops, we combine the writemask
+       * with the swizzle to display them in the disasm compactly. */
+      unsigned swizzle = word->swizzle;
+      if ((OP_IS_REG2REG_LDST(word->op) && word->op != midgard_op_lea &&
+           word->op != midgard_op_lea_image) ||
+          OP_IS_ATOMIC(word->op))
+         swizzle = 0xE4;
+      print_ldst_mask(fp, word->mask, swizzle);
+   } else {
+      uint8_t mask = (word->mask & 0x1) | ((word->mask & 0x2) << 1) |
+                     ((word->mask & 0x4) << 2) | ((word->mask & 0x8) << 3);
+      mask |= mask << 1;
+      print_ldst_read_reg(fp, word->reg);
+      print_vec_swizzle(fp, word->swizzle, midgard_src_passthrough,
+                        midgard_reg_mode_32, mask);
+   }
 
-        /* ld_ubo args */
-        if (OP_IS_UBO_READ(word->op)) {
-                if (word->signed_offset & 1) { /* buffer index imm */
-                        unsigned imm = midgard_unpack_ubo_index_imm(*word);
-                        fprintf(fp, ", %u", imm);
-                } else { /* buffer index from reg */
-                        fprintf(fp, ", ");
-                        print_ldst_read_reg(fp, word->arg_reg);
-                        fprintf(fp, ".%c", components[word->arg_comp]);
-                }
+   /* ld_ubo args */
+   if (OP_IS_UBO_READ(word->op)) {
+      if (word->signed_offset & 1) { /* buffer index imm */
+         unsigned imm = midgard_unpack_ubo_index_imm(*word);
+         fprintf(fp, ", %u", imm);
+      } else { /* buffer index from reg */
+         fprintf(fp, ", ");
+         print_ldst_read_reg(fp, word->arg_reg);
+         fprintf(fp, ".%c", components[word->arg_comp]);
+      }
 
-                fprintf(fp, ", ");
-                print_ldst_read_reg(fp, word->index_reg);
-                fprintf(fp, ".%c", components[word->index_comp]);
-                if (word->index_shift)
-                        fprintf(fp, " << %u",  word->index_shift);
-                midgard_print_sint(fp, UNPACK_LDST_UBO_OFS(word->signed_offset));
-        }
+      fprintf(fp, ", ");
+      print_ldst_read_reg(fp, word->index_reg);
+      fprintf(fp, ".%c", components[word->index_comp]);
+      if (word->index_shift)
+         fprintf(fp, " << %u", word->index_shift);
+      midgard_print_sint(fp, UNPACK_LDST_UBO_OFS(word->signed_offset));
+   }
 
-        /* mem addr expression */
-        if (OP_HAS_ADDRESS(word->op)) {
-                fprintf(fp, ", ");
-                bool first = true;
+   /* mem addr expression */
+   if (OP_HAS_ADDRESS(word->op)) {
+      fprintf(fp, ", ");
+      bool first = true;
 
-                /* Skip printing zero */
-                if (word->arg_reg != 7 || verbose) {
-                        print_ldst_read_reg(fp, word->arg_reg);
-                        fprintf(fp, ".u%d.%c",
-                                word->bitsize_toggle ? 64 : 32, components[word->arg_comp]);
-                        first = false;
-                }
+      /* Skip printing zero */
+      if (word->arg_reg != 7 || verbose) {
+         print_ldst_read_reg(fp, word->arg_reg);
+         fprintf(fp, ".u%d.%c", word->bitsize_toggle ? 64 : 32,
+                 components[word->arg_comp]);
+         first = false;
+      }
 
-                if ((word->op < midgard_op_atomic_cmpxchg ||
-                     word->op > midgard_op_atomic_cmpxchg64_be) &&
-                     word->index_reg != 0x7) {
-                        if (!first)
-                                fprintf(fp, " + ");
+      if ((word->op < midgard_op_atomic_cmpxchg ||
+           word->op > midgard_op_atomic_cmpxchg64_be) &&
+          word->index_reg != 0x7) {
+         if (!first)
+            fprintf(fp, " + ");
 
-                        print_ldst_read_reg(fp, word->index_reg);
-                        fprintf(fp, "%s.%c",
-                                index_format_names[word->index_format],
-                                components[word->index_comp]);
-                        if (word->index_shift)
-                                fprintf(fp, " << %u",  word->index_shift);
-                }
+         print_ldst_read_reg(fp, word->index_reg);
+         fprintf(fp, "%s.%c", index_format_names[word->index_format],
+                 components[word->index_comp]);
+         if (word->index_shift)
+            fprintf(fp, " << %u", word->index_shift);
+      }
 
-                midgard_print_sint(fp, word->signed_offset);
-        }
+      midgard_print_sint(fp, word->signed_offset);
+   }
 
-        /* src reg for reg2reg ldst opcodes */
-        if (OP_IS_REG2REG_LDST(word->op)) {
-                fprintf(fp, ", ");
-                print_ldst_read_reg(fp, word->arg_reg);
-                print_vec_swizzle(fp, word->swizzle, midgard_src_passthrough,
-                                  midgard_reg_mode_32, 0xFF);
-        }
+   /* src reg for reg2reg ldst opcodes */
+   if (OP_IS_REG2REG_LDST(word->op)) {
+      fprintf(fp, ", ");
+      print_ldst_read_reg(fp, word->arg_reg);
+      print_vec_swizzle(fp, word->swizzle, midgard_src_passthrough,
+                        midgard_reg_mode_32, 0xFF);
+   }
 
-        /* atomic ops encode the source arg where the ldst swizzle would be. */
-        if (OP_IS_ATOMIC(word->op)) {
-                unsigned src = (word->swizzle >> 2) & 0x7;
-                unsigned src_comp = word->swizzle & 0x3;
-                fprintf(fp, ", ");
-                print_ldst_read_reg(fp, src);
-                fprintf(fp, ".%c", components[src_comp]);
-        }
+   /* atomic ops encode the source arg where the ldst swizzle would be. */
+   if (OP_IS_ATOMIC(word->op)) {
+      unsigned src = (word->swizzle >> 2) & 0x7;
+      unsigned src_comp = word->swizzle & 0x3;
+      fprintf(fp, ", ");
+      print_ldst_read_reg(fp, src);
+      fprintf(fp, ".%c", components[src_comp]);
+   }
 
-        /* CMPXCHG encodes the extra comparison arg where the index reg would be. */
-        if (word->op >= midgard_op_atomic_cmpxchg &&
-            word->op <= midgard_op_atomic_cmpxchg64_be) {
-                fprintf(fp, ", ");
-                print_ldst_read_reg(fp, word->index_reg);
-                fprintf(fp, ".%c", components[word->index_comp]);
-        }
+   /* CMPXCHG encodes the extra comparison arg where the index reg would be. */
+   if (word->op >= midgard_op_atomic_cmpxchg &&
+       word->op <= midgard_op_atomic_cmpxchg64_be) {
+      fprintf(fp, ", ");
+      print_ldst_read_reg(fp, word->index_reg);
+      fprintf(fp, ".%c", components[word->index_comp]);
+   }
 
-        /* index reg for attr/vary/images, selector for ld/st_special */
-        if (OP_IS_SPECIAL(word->op) || OP_USES_ATTRIB(word->op)) {
-                fprintf(fp, ", ");
-                print_ldst_read_reg(fp, word->index_reg);
-                fprintf(fp, ".%c", components[word->index_comp]);
-                if (word->index_shift)
-                        fprintf(fp, " << %u",  word->index_shift);
-                midgard_print_sint(fp, UNPACK_LDST_ATTRIB_OFS(word->signed_offset));
-        }
+   /* index reg for attr/vary/images, selector for ld/st_special */
+   if (OP_IS_SPECIAL(word->op) || OP_USES_ATTRIB(word->op)) {
+      fprintf(fp, ", ");
+      print_ldst_read_reg(fp, word->index_reg);
+      fprintf(fp, ".%c", components[word->index_comp]);
+      if (word->index_shift)
+         fprintf(fp, " << %u", word->index_shift);
+      midgard_print_sint(fp, UNPACK_LDST_ATTRIB_OFS(word->signed_offset));
+   }
 
-        /* vertex reg for attrib/varying ops, coord reg for image ops */
-        if (OP_USES_ATTRIB(word->op)) {
-                fprintf(fp, ", ");
-                print_ldst_read_reg(fp, word->arg_reg);
+   /* vertex reg for attrib/varying ops, coord reg for image ops */
+   if (OP_USES_ATTRIB(word->op)) {
+      fprintf(fp, ", ");
+      print_ldst_read_reg(fp, word->arg_reg);
 
-                if (OP_IS_IMAGE(word->op))
-                        fprintf(fp, ".u%d", word->bitsize_toggle ? 64 : 32);
+      if (OP_IS_IMAGE(word->op))
+         fprintf(fp, ".u%d", word->bitsize_toggle ? 64 : 32);
 
-                fprintf(fp, ".%c", components[word->arg_comp]);
+      fprintf(fp, ".%c", components[word->arg_comp]);
 
-                if (word->bitsize_toggle && !OP_IS_IMAGE(word->op))
-                        midgard_print_sint(fp, UNPACK_LDST_VERTEX_OFS(word->signed_offset));
-        }
+      if (word->bitsize_toggle && !OP_IS_IMAGE(word->op))
+         midgard_print_sint(fp, UNPACK_LDST_VERTEX_OFS(word->signed_offset));
+   }
 
-        /* TODO: properly decode format specifier for PACK/UNPACK ops */
-        if (OP_IS_PACK_COLOUR(word->op) || OP_IS_UNPACK_COLOUR(word->op)) {
-                fprintf(fp, ", ");
-                unsigned format_specifier = (word->signed_offset << 4) | word->index_shift;
-                fprintf(fp, "0x%X", format_specifier);
-        }
+   /* TODO: properly decode format specifier for PACK/UNPACK ops */
+   if (OP_IS_PACK_COLOUR(word->op) || OP_IS_UNPACK_COLOUR(word->op)) {
+      fprintf(fp, ", ");
+      unsigned format_specifier =
+         (word->signed_offset << 4) | word->index_shift;
+      fprintf(fp, "0x%X", format_specifier);
+   }
 
-        fprintf(fp, "\n");
+   fprintf(fp, "\n");
 
-        /* Debugging stuff */
+   /* Debugging stuff */
 
-        if (!OP_IS_STORE(word->op))
-                update_dest(ctx, word->reg);
+   if (!OP_IS_STORE(word->op))
+      update_dest(ctx, word->reg);
 }
 
 static void
-print_load_store_word(disassemble_context *ctx, FILE *fp, uint32_t *word, bool verbose)
+print_load_store_word(disassemble_context *ctx, FILE *fp, uint32_t *word,
+                      bool verbose)
 {
-        midgard_load_store *load_store = (midgard_load_store *) word;
+   midgard_load_store *load_store = (midgard_load_store *)word;
 
-        if (load_store->word1 != 3) {
-                print_load_store_instr(ctx, fp, load_store->word1, verbose);
-        }
+   if (load_store->word1 != 3) {
+      print_load_store_instr(ctx, fp, load_store->word1, verbose);
+   }
 
-        if (load_store->word2 != 3) {
-                print_load_store_instr(ctx, fp, load_store->word2, verbose);
-        }
+   if (load_store->word2 != 3) {
+      print_load_store_instr(ctx, fp, load_store->word2, verbose);
+   }
 }
 
 static void
 print_texture_reg_select(FILE *fp, uint8_t u, unsigned base)
 {
-        midgard_tex_register_select sel;
-        memcpy(&sel, &u, sizeof(u));
+   midgard_tex_register_select sel;
+   memcpy(&sel, &u, sizeof(u));
 
-        print_tex_reg(fp, base + sel.select, false);
+   print_tex_reg(fp, base + sel.select, false);
 
-        unsigned component = sel.component;
+   unsigned component = sel.component;
 
-        /* Use the upper half in half-reg mode */
-        if (sel.upper) {
-                assert(!sel.full);
-                component += 4;
-        }
+   /* Use the upper half in half-reg mode */
+   if (sel.upper) {
+      assert(!sel.full);
+      component += 4;
+   }
 
-        fprintf(fp, ".%c.%d", components[component], sel.full ? 32 : 16);
+   fprintf(fp, ".%c.%d", components[component], sel.full ? 32 : 16);
 
-        assert(sel.zero == 0);
+   assert(sel.zero == 0);
 }
 
 static void
 print_texture_format(FILE *fp, int format)
 {
-        /* Act like a modifier */
-        fprintf(fp, ".");
+   /* Act like a modifier */
+   fprintf(fp, ".");
 
-        switch (format) {
-                DEFINE_CASE(1, "1d");
-                DEFINE_CASE(2, "2d");
-                DEFINE_CASE(3, "3d");
-                DEFINE_CASE(0, "cube");
+   switch (format) {
+      DEFINE_CASE(1, "1d");
+      DEFINE_CASE(2, "2d");
+      DEFINE_CASE(3, "3d");
+      DEFINE_CASE(0, "cube");
 
-        default:
-                unreachable("Bad format");
-        }
+   default:
+      unreachable("Bad format");
+   }
 }
 
 static void
 print_texture_op(FILE *fp, unsigned op)
 {
-        if (tex_opcode_props[op].name)
-                fprintf(fp, "%s", tex_opcode_props[op].name);
-        else
-                fprintf(fp, "tex_op_%02X", op);
+   if (tex_opcode_props[op].name)
+      fprintf(fp, "%s", tex_opcode_props[op].name);
+   else
+      fprintf(fp, "tex_op_%02X", op);
 }
 
 static bool
 texture_op_takes_bias(unsigned op)
 {
-        return op == midgard_tex_op_normal;
+   return op == midgard_tex_op_normal;
 }
 
 static char
 sampler_type_name(enum mali_sampler_type t)
 {
-        switch (t) {
-        case MALI_SAMPLER_FLOAT:
-                return 'f';
-        case MALI_SAMPLER_UNSIGNED:
-                return 'u';
-        case MALI_SAMPLER_SIGNED:
-                return 'i';
-        default:
-                return '?';
-        }
-
+   switch (t) {
+   case MALI_SAMPLER_FLOAT:
+      return 'f';
+   case MALI_SAMPLER_UNSIGNED:
+      return 'u';
+   case MALI_SAMPLER_SIGNED:
+      return 'i';
+   default:
+      return '?';
+   }
 }
 
 static void
 print_texture_barrier(FILE *fp, uint32_t *word)
 {
-        midgard_texture_barrier_word *barrier = (midgard_texture_barrier_word *) word;
+   midgard_texture_barrier_word *barrier = (midgard_texture_barrier_word *)word;
 
-        if (barrier->type != TAG_TEXTURE_4_BARRIER)
-                fprintf(fp, "/* barrier tag %X != tex/bar */ ", barrier->type);
+   if (barrier->type != TAG_TEXTURE_4_BARRIER)
+      fprintf(fp, "/* barrier tag %X != tex/bar */ ", barrier->type);
 
-        if (!barrier->cont)
-                fprintf(fp, "/* cont missing? */");
+   if (!barrier->cont)
+      fprintf(fp, "/* cont missing? */");
 
-        if (!barrier->last)
-                fprintf(fp, "/* last missing? */");
+   if (!barrier->last)
+      fprintf(fp, "/* last missing? */");
 
-        if (barrier->zero1)
-                fprintf(fp, "/* zero1 = 0x%X */ ", barrier->zero1);
+   if (barrier->zero1)
+      fprintf(fp, "/* zero1 = 0x%X */ ", barrier->zero1);
 
-        if (barrier->zero2)
-                fprintf(fp, "/* zero2 = 0x%X */ ", barrier->zero2);
+   if (barrier->zero2)
+      fprintf(fp, "/* zero2 = 0x%X */ ", barrier->zero2);
 
-        if (barrier->zero3)
-                fprintf(fp, "/* zero3 = 0x%X */ ", barrier->zero3);
+   if (barrier->zero3)
+      fprintf(fp, "/* zero3 = 0x%X */ ", barrier->zero3);
 
-        if (barrier->zero4)
-                fprintf(fp, "/* zero4 = 0x%X */ ", barrier->zero4);
+   if (barrier->zero4)
+      fprintf(fp, "/* zero4 = 0x%X */ ", barrier->zero4);
 
-        if (barrier->zero5)
-                fprintf(fp, "/* zero4 = 0x%" PRIx64 " */ ", barrier->zero5);
+   if (barrier->zero5)
+      fprintf(fp, "/* zero4 = 0x%" PRIx64 " */ ", barrier->zero5);
 
-        if (barrier->out_of_order)
-                fprintf(fp, ".ooo%u", barrier->out_of_order);
+   if (barrier->out_of_order)
+      fprintf(fp, ".ooo%u", barrier->out_of_order);
 
-        fprintf(fp, "\n");
+   fprintf(fp, "\n");
 }
 
 #undef DEFINE_CASE
@@ -1592,334 +1601,352 @@ print_texture_barrier(FILE *fp, uint32_t *word)
 static const char *
 texture_mode(enum mali_texture_mode mode)
 {
-        switch (mode) {
-        case TEXTURE_NORMAL: return "";
-        case TEXTURE_SHADOW: return ".shadow";
-        case TEXTURE_GATHER_SHADOW: return ".gather.shadow";
-        case TEXTURE_GATHER_X: return ".gatherX";
-        case TEXTURE_GATHER_Y: return ".gatherY";
-        case TEXTURE_GATHER_Z: return ".gatherZ";
-        case TEXTURE_GATHER_W: return ".gatherW";
-        default: return "unk";
-        }
+   switch (mode) {
+   case TEXTURE_NORMAL:
+      return "";
+   case TEXTURE_SHADOW:
+      return ".shadow";
+   case TEXTURE_GATHER_SHADOW:
+      return ".gather.shadow";
+   case TEXTURE_GATHER_X:
+      return ".gatherX";
+   case TEXTURE_GATHER_Y:
+      return ".gatherY";
+   case TEXTURE_GATHER_Z:
+      return ".gatherZ";
+   case TEXTURE_GATHER_W:
+      return ".gatherW";
+   default:
+      return "unk";
+   }
 }
 
 static const char *
 derivative_mode(enum mali_derivative_mode mode)
 {
-        switch (mode) {
-        case TEXTURE_DFDX: return ".x";
-        case TEXTURE_DFDY: return ".y";
-        default: return "unk";
-        }
+   switch (mode) {
+   case TEXTURE_DFDX:
+      return ".x";
+   case TEXTURE_DFDY:
+      return ".y";
+   default:
+      return "unk";
+   }
 }
 
 static const char *
 partial_exection_mode(enum midgard_partial_execution mode)
 {
-        switch (mode) {
-        case MIDGARD_PARTIAL_EXECUTION_NONE: return "";
-        case MIDGARD_PARTIAL_EXECUTION_SKIP: return ".skip";
-        case MIDGARD_PARTIAL_EXECUTION_KILL: return ".kill";
-        default: return ".reserved";
-        }
+   switch (mode) {
+   case MIDGARD_PARTIAL_EXECUTION_NONE:
+      return "";
+   case MIDGARD_PARTIAL_EXECUTION_SKIP:
+      return ".skip";
+   case MIDGARD_PARTIAL_EXECUTION_KILL:
+      return ".kill";
+   default:
+      return ".reserved";
+   }
 }
 
 static void
 print_texture_word(disassemble_context *ctx, FILE *fp, uint32_t *word,
                    unsigned tabs, unsigned in_reg_base, unsigned out_reg_base)
 {
-        midgard_texture_word *texture = (midgard_texture_word *) word;
-        validate_sampler_type(texture->op, texture->sampler_type);
+   midgard_texture_word *texture = (midgard_texture_word *)word;
+   validate_sampler_type(texture->op, texture->sampler_type);
 
-        /* Broad category of texture operation in question */
-        print_texture_op(fp, texture->op);
+   /* Broad category of texture operation in question */
+   print_texture_op(fp, texture->op);
 
-        /* Barriers use a dramatically different code path */
-        if (texture->op == midgard_tex_op_barrier) {
-                print_texture_barrier(fp, word);
-                return;
-        } else if (texture->type == TAG_TEXTURE_4_BARRIER)
-                fprintf (fp, "/* nonbarrier had tex/bar tag */ ");
-        else if (texture->type == TAG_TEXTURE_4_VTX)
-                fprintf (fp, ".vtx");
+   /* Barriers use a dramatically different code path */
+   if (texture->op == midgard_tex_op_barrier) {
+      print_texture_barrier(fp, word);
+      return;
+   } else if (texture->type == TAG_TEXTURE_4_BARRIER)
+      fprintf(fp, "/* nonbarrier had tex/bar tag */ ");
+   else if (texture->type == TAG_TEXTURE_4_VTX)
+      fprintf(fp, ".vtx");
 
-        if (texture->op == midgard_tex_op_derivative)
-                fprintf(fp, "%s", derivative_mode(texture->mode));
-        else
-                fprintf(fp, "%s", texture_mode(texture->mode));
+   if (texture->op == midgard_tex_op_derivative)
+      fprintf(fp, "%s", derivative_mode(texture->mode));
+   else
+      fprintf(fp, "%s", texture_mode(texture->mode));
 
-        /* Specific format in question */
-        print_texture_format(fp, texture->format);
+   /* Specific format in question */
+   print_texture_format(fp, texture->format);
 
-        /* Instruction "modifiers" parallel the ALU instructions. */
-        fputs(partial_exection_mode(texture->exec), fp);
+   /* Instruction "modifiers" parallel the ALU instructions. */
+   fputs(partial_exection_mode(texture->exec), fp);
 
-        if (texture->out_of_order)
-                fprintf(fp, ".ooo%u", texture->out_of_order);
+   if (texture->out_of_order)
+      fprintf(fp, ".ooo%u", texture->out_of_order);
 
-        fprintf(fp, " ");
-        print_tex_reg(fp, out_reg_base + texture->out_reg_select, true);
-        print_tex_mask(fp, texture->mask, texture->out_upper);
-        fprintf(fp, ".%c%d", texture->sampler_type == MALI_SAMPLER_FLOAT ? 'f' : 'i',
-                             texture->out_full ? 32 : 16);
-        assert(!(texture->out_full && texture->out_upper));
+   fprintf(fp, " ");
+   print_tex_reg(fp, out_reg_base + texture->out_reg_select, true);
+   print_tex_mask(fp, texture->mask, texture->out_upper);
+   fprintf(fp, ".%c%d", texture->sampler_type == MALI_SAMPLER_FLOAT ? 'f' : 'i',
+           texture->out_full ? 32 : 16);
+   assert(!(texture->out_full && texture->out_upper));
 
-        /* Output modifiers are only valid for float texture operations */
-        if (texture->sampler_type == MALI_SAMPLER_FLOAT)
-                mir_print_outmod(fp, texture->outmod, false);
+   /* Output modifiers are only valid for float texture operations */
+   if (texture->sampler_type == MALI_SAMPLER_FLOAT)
+      mir_print_outmod(fp, texture->outmod, false);
 
-        fprintf(fp, ", ");
+   fprintf(fp, ", ");
 
-        /* Depending on whether we read from textures directly or indirectly,
-         * we may be able to update our analysis */
+   /* Depending on whether we read from textures directly or indirectly,
+    * we may be able to update our analysis */
 
-        if (texture->texture_register) {
-                fprintf(fp, "texture[");
-                print_texture_reg_select(fp, texture->texture_handle, in_reg_base);
-                fprintf(fp, "], ");
-        } else {
-                fprintf(fp, "texture%u, ", texture->texture_handle);
-        }
+   if (texture->texture_register) {
+      fprintf(fp, "texture[");
+      print_texture_reg_select(fp, texture->texture_handle, in_reg_base);
+      fprintf(fp, "], ");
+   } else {
+      fprintf(fp, "texture%u, ", texture->texture_handle);
+   }
 
-        /* Print the type, GL style */
-        fprintf(fp, "%csampler", sampler_type_name(texture->sampler_type));
+   /* Print the type, GL style */
+   fprintf(fp, "%csampler", sampler_type_name(texture->sampler_type));
 
-        if (texture->sampler_register) {
-                fprintf(fp, "[");
-                print_texture_reg_select(fp, texture->sampler_handle, in_reg_base);
-                fprintf(fp, "]");
-        } else {
-                fprintf(fp, "%u", texture->sampler_handle);
-        }
+   if (texture->sampler_register) {
+      fprintf(fp, "[");
+      print_texture_reg_select(fp, texture->sampler_handle, in_reg_base);
+      fprintf(fp, "]");
+   } else {
+      fprintf(fp, "%u", texture->sampler_handle);
+   }
 
-        print_vec_swizzle(fp, texture->swizzle, midgard_src_passthrough, midgard_reg_mode_32, 0xFF);
+   print_vec_swizzle(fp, texture->swizzle, midgard_src_passthrough,
+                     midgard_reg_mode_32, 0xFF);
 
-        fprintf(fp, ", ");
+   fprintf(fp, ", ");
 
-        midgard_src_expand_mode exp =
-                texture->in_reg_upper ? midgard_src_expand_high : midgard_src_passthrough;
-        print_tex_reg(fp, in_reg_base + texture->in_reg_select, false);
-        print_vec_swizzle(fp, texture->in_reg_swizzle, exp, midgard_reg_mode_32, 0xFF);
-        fprintf(fp, ".%d", texture->in_reg_full ? 32 : 16);
-        assert(!(texture->in_reg_full && texture->in_reg_upper));
+   midgard_src_expand_mode exp =
+      texture->in_reg_upper ? midgard_src_expand_high : midgard_src_passthrough;
+   print_tex_reg(fp, in_reg_base + texture->in_reg_select, false);
+   print_vec_swizzle(fp, texture->in_reg_swizzle, exp, midgard_reg_mode_32,
+                     0xFF);
+   fprintf(fp, ".%d", texture->in_reg_full ? 32 : 16);
+   assert(!(texture->in_reg_full && texture->in_reg_upper));
 
-        /* There is *always* an offset attached. Of
-         * course, that offset is just immediate #0 for a
-         * GLES call that doesn't take an offset. If there
-         * is a non-negative non-zero offset, this is
-         * specified in immediate offset mode, with the
-         * values in the offset_* fields as immediates. If
-         * this is a negative offset, we instead switch to
-         * a register offset mode, where the offset_*
-         * fields become register triplets */
+   /* There is *always* an offset attached. Of
+    * course, that offset is just immediate #0 for a
+    * GLES call that doesn't take an offset. If there
+    * is a non-negative non-zero offset, this is
+    * specified in immediate offset mode, with the
+    * values in the offset_* fields as immediates. If
+    * this is a negative offset, we instead switch to
+    * a register offset mode, where the offset_*
+    * fields become register triplets */
 
-        if (texture->offset_register) {
-                fprintf(fp, " + ");
+   if (texture->offset_register) {
+      fprintf(fp, " + ");
 
-                bool full = texture->offset & 1;
-                bool select = texture->offset & 2;
-                bool upper = texture->offset & 4;
-                unsigned swizzle = texture->offset >> 3;
-                midgard_src_expand_mode exp =
-                        upper ? midgard_src_expand_high : midgard_src_passthrough;
+      bool full = texture->offset & 1;
+      bool select = texture->offset & 2;
+      bool upper = texture->offset & 4;
+      unsigned swizzle = texture->offset >> 3;
+      midgard_src_expand_mode exp =
+         upper ? midgard_src_expand_high : midgard_src_passthrough;
 
-                print_tex_reg(fp, in_reg_base + select, false);
-                print_vec_swizzle(fp, swizzle, exp, midgard_reg_mode_32, 0xFF);
-                fprintf(fp, ".%d", full ? 32 : 16);
-                assert(!(texture->out_full && texture->out_upper));
+      print_tex_reg(fp, in_reg_base + select, false);
+      print_vec_swizzle(fp, swizzle, exp, midgard_reg_mode_32, 0xFF);
+      fprintf(fp, ".%d", full ? 32 : 16);
+      assert(!(texture->out_full && texture->out_upper));
 
-                fprintf(fp, ", ");
-        } else if (texture->offset) {
-                /* Only select ops allow negative immediate offsets, verify */
+      fprintf(fp, ", ");
+   } else if (texture->offset) {
+      /* Only select ops allow negative immediate offsets, verify */
 
-                signed offset_x = (texture->offset & 0xF);
-                signed offset_y = ((texture->offset >> 4) & 0xF);
-                signed offset_z = ((texture->offset >> 8) & 0xF);
+      signed offset_x = (texture->offset & 0xF);
+      signed offset_y = ((texture->offset >> 4) & 0xF);
+      signed offset_z = ((texture->offset >> 8) & 0xF);
 
-                bool neg_x = offset_x < 0;
-                bool neg_y = offset_y < 0;
-                bool neg_z = offset_z < 0;
-                bool any_neg = neg_x || neg_y || neg_z;
+      bool neg_x = offset_x < 0;
+      bool neg_y = offset_y < 0;
+      bool neg_z = offset_z < 0;
+      bool any_neg = neg_x || neg_y || neg_z;
 
-                if (any_neg && texture->op != midgard_tex_op_fetch)
-                        fprintf(fp, "/* invalid negative */ ");
+      if (any_neg && texture->op != midgard_tex_op_fetch)
+         fprintf(fp, "/* invalid negative */ ");
 
-                /* Regardless, just print the immediate offset */
+      /* Regardless, just print the immediate offset */
 
-                fprintf(fp, " + <%d, %d, %d>, ", offset_x, offset_y, offset_z);
-        } else {
-                fprintf(fp, ", ");
-        }
+      fprintf(fp, " + <%d, %d, %d>, ", offset_x, offset_y, offset_z);
+   } else {
+      fprintf(fp, ", ");
+   }
 
-        char lod_operand = texture_op_takes_bias(texture->op) ? '+' : '=';
+   char lod_operand = texture_op_takes_bias(texture->op) ? '+' : '=';
 
-        if (texture->lod_register) {
-                fprintf(fp, "lod %c ", lod_operand);
-                print_texture_reg_select(fp, texture->bias, in_reg_base);
-                fprintf(fp, ", ");
+   if (texture->lod_register) {
+      fprintf(fp, "lod %c ", lod_operand);
+      print_texture_reg_select(fp, texture->bias, in_reg_base);
+      fprintf(fp, ", ");
 
-                if (texture->bias_int)
-                        fprintf(fp, " /* bias_int = 0x%X */", texture->bias_int);
-        } else if (texture->op == midgard_tex_op_fetch) {
-                /* For texel fetch, the int LOD is in the fractional place and
-                 * there is no fraction. We *always* have an explicit LOD, even
-                 * if it's zero. */
+      if (texture->bias_int)
+         fprintf(fp, " /* bias_int = 0x%X */", texture->bias_int);
+   } else if (texture->op == midgard_tex_op_fetch) {
+      /* For texel fetch, the int LOD is in the fractional place and
+       * there is no fraction. We *always* have an explicit LOD, even
+       * if it's zero. */
 
-                if (texture->bias_int)
-                        fprintf(fp, " /* bias_int = 0x%X */ ", texture->bias_int);
+      if (texture->bias_int)
+         fprintf(fp, " /* bias_int = 0x%X */ ", texture->bias_int);
 
-                fprintf(fp, "lod = %u, ", texture->bias);
-        } else if (texture->bias || texture->bias_int) {
-                signed bias_int = texture->bias_int;
-                float bias_frac = texture->bias / 256.0f;
-                float bias = bias_int + bias_frac;
+      fprintf(fp, "lod = %u, ", texture->bias);
+   } else if (texture->bias || texture->bias_int) {
+      signed bias_int = texture->bias_int;
+      float bias_frac = texture->bias / 256.0f;
+      float bias = bias_int + bias_frac;
 
-                bool is_bias = texture_op_takes_bias(texture->op);
-                char sign = (bias >= 0.0) ? '+' : '-';
-                char operand = is_bias ? sign : '=';
+      bool is_bias = texture_op_takes_bias(texture->op);
+      char sign = (bias >= 0.0) ? '+' : '-';
+      char operand = is_bias ? sign : '=';
 
-                fprintf(fp, "lod %c %f, ", operand, fabsf(bias));
-        }
+      fprintf(fp, "lod %c %f, ", operand, fabsf(bias));
+   }
 
-        fprintf(fp, "\n");
+   fprintf(fp, "\n");
 
-        /* While not zero in general, for these simple instructions the
-         * following unknowns are zero, so we don't include them */
+   /* While not zero in general, for these simple instructions the
+    * following unknowns are zero, so we don't include them */
 
-        if (texture->unknown4 ||
-            texture->unknown8) {
-                fprintf(fp, "// unknown4 = 0x%x\n", texture->unknown4);
-                fprintf(fp, "// unknown8 = 0x%x\n", texture->unknown8);
-        }
+   if (texture->unknown4 || texture->unknown8) {
+      fprintf(fp, "// unknown4 = 0x%x\n", texture->unknown4);
+      fprintf(fp, "// unknown8 = 0x%x\n", texture->unknown8);
+   }
 }
 
 void
-disassemble_midgard(FILE *fp, uint8_t *code, size_t size, unsigned gpu_id, bool verbose)
+disassemble_midgard(FILE *fp, uint8_t *code, size_t size, unsigned gpu_id,
+                    bool verbose)
 {
-        uint32_t *words = (uint32_t *) code;
-        unsigned num_words = size / 4;
-        int tabs = 0;
+   uint32_t *words = (uint32_t *)code;
+   unsigned num_words = size / 4;
+   int tabs = 0;
 
-        bool branch_forward = false;
+   bool branch_forward = false;
 
-        int last_next_tag = -1;
+   int last_next_tag = -1;
 
-        unsigned i = 0;
+   unsigned i = 0;
 
-        disassemble_context ctx = {
-                .midg_tags = calloc(sizeof(ctx.midg_tags[0]), num_words),
-                .midg_ever_written = 0,
-        };
+   disassemble_context ctx = {
+      .midg_tags = calloc(sizeof(ctx.midg_tags[0]), num_words),
+      .midg_ever_written = 0,
+   };
 
-        while (i < num_words) {
-                unsigned tag = words[i] & 0xF;
-                unsigned next_tag = (words[i] >> 4) & 0xF;
-                unsigned num_quad_words = midgard_tag_props[tag].size;
+   while (i < num_words) {
+      unsigned tag = words[i] & 0xF;
+      unsigned next_tag = (words[i] >> 4) & 0xF;
+      unsigned num_quad_words = midgard_tag_props[tag].size;
 
-                if (ctx.midg_tags[i] && ctx.midg_tags[i] != tag) {
-                        fprintf(fp, "\t/* XXX: TAG ERROR branch, got %s expected %s */\n",
-                                        midgard_tag_props[tag].name,
-                                        midgard_tag_props[ctx.midg_tags[i]].name);
-                }
+      if (ctx.midg_tags[i] && ctx.midg_tags[i] != tag) {
+         fprintf(fp, "\t/* XXX: TAG ERROR branch, got %s expected %s */\n",
+                 midgard_tag_props[tag].name,
+                 midgard_tag_props[ctx.midg_tags[i]].name);
+      }
 
-                ctx.midg_tags[i] = tag;
+      ctx.midg_tags[i] = tag;
 
-                /* Check the tag. The idea is to ensure that next_tag is
-                 * *always* recoverable from the disassembly, such that we may
-                 * safely omit printing next_tag. To show this, we first
-                 * consider that next tags are semantically off-byone -- we end
-                 * up parsing tag n during step n+1. So, we ensure after we're
-                 * done disassembling the next tag of the final bundle is BREAK
-                 * and warn otherwise. We also ensure that the next tag is
-                 * never INVALID. Beyond that, since the last tag is checked
-                 * outside the loop, we can check one tag prior. If equal to
-                 * the current tag (which is unique), we're done. Otherwise, we
-                 * print if that tag was > TAG_BREAK, which implies the tag was
-                 * not TAG_BREAK or TAG_INVALID. But we already checked for
-                 * TAG_INVALID, so it's just if the last tag was TAG_BREAK that
-                 * we're silent. So we throw in a print for break-next on at
-                 * the end of the bundle (if it's not the final bundle, which
-                 * we already check for above), disambiguating this case as
-                 * well.  Hence in all cases we are unambiguous, QED. */
+      /* Check the tag. The idea is to ensure that next_tag is
+       * *always* recoverable from the disassembly, such that we may
+       * safely omit printing next_tag. To show this, we first
+       * consider that next tags are semantically off-byone -- we end
+       * up parsing tag n during step n+1. So, we ensure after we're
+       * done disassembling the next tag of the final bundle is BREAK
+       * and warn otherwise. We also ensure that the next tag is
+       * never INVALID. Beyond that, since the last tag is checked
+       * outside the loop, we can check one tag prior. If equal to
+       * the current tag (which is unique), we're done. Otherwise, we
+       * print if that tag was > TAG_BREAK, which implies the tag was
+       * not TAG_BREAK or TAG_INVALID. But we already checked for
+       * TAG_INVALID, so it's just if the last tag was TAG_BREAK that
+       * we're silent. So we throw in a print for break-next on at
+       * the end of the bundle (if it's not the final bundle, which
+       * we already check for above), disambiguating this case as
+       * well.  Hence in all cases we are unambiguous, QED. */
 
-                if (next_tag == TAG_INVALID)
-                        fprintf(fp, "\t/* XXX: invalid next tag */\n");
+      if (next_tag == TAG_INVALID)
+         fprintf(fp, "\t/* XXX: invalid next tag */\n");
 
-                if (last_next_tag > TAG_BREAK && last_next_tag != tag) {
-                        fprintf(fp, "\t/* XXX: TAG ERROR sequence, got %s expexted %s */\n",
-                                        midgard_tag_props[tag].name,
-                                        midgard_tag_props[last_next_tag].name);
-                }
+      if (last_next_tag > TAG_BREAK && last_next_tag != tag) {
+         fprintf(fp, "\t/* XXX: TAG ERROR sequence, got %s expexted %s */\n",
+                 midgard_tag_props[tag].name,
+                 midgard_tag_props[last_next_tag].name);
+      }
 
-                last_next_tag = next_tag;
+      last_next_tag = next_tag;
 
-                /* Tags are unique in the following way:
-                 *
-                 * INVALID, BREAK, UNKNOWN_*: verbosely printed
-                 * TEXTURE_4_BARRIER: verified by barrier/!barrier op
-                 * TEXTURE_4_VTX: .vtx tag printed
-                 * TEXTURE_4: tetxure lack of barriers or .vtx
-                 * TAG_LOAD_STORE_4: only load/store
-                 * TAG_ALU_4/8/12/16: by number of instructions/constants
-                 * TAG_ALU_4_8/12/16_WRITEOUT: ^^ with .writeout tag
-                 */
+      /* Tags are unique in the following way:
+       *
+       * INVALID, BREAK, UNKNOWN_*: verbosely printed
+       * TEXTURE_4_BARRIER: verified by barrier/!barrier op
+       * TEXTURE_4_VTX: .vtx tag printed
+       * TEXTURE_4: tetxure lack of barriers or .vtx
+       * TAG_LOAD_STORE_4: only load/store
+       * TAG_ALU_4/8/12/16: by number of instructions/constants
+       * TAG_ALU_4_8/12/16_WRITEOUT: ^^ with .writeout tag
+       */
 
-                switch (tag) {
-                case TAG_TEXTURE_4_VTX ... TAG_TEXTURE_4_BARRIER: {
-                        bool interpipe_aliasing =
-                                midgard_get_quirks(gpu_id) & MIDGARD_INTERPIPE_REG_ALIASING;
+      switch (tag) {
+      case TAG_TEXTURE_4_VTX ... TAG_TEXTURE_4_BARRIER: {
+         bool interpipe_aliasing =
+            midgard_get_quirks(gpu_id) & MIDGARD_INTERPIPE_REG_ALIASING;
 
-                        print_texture_word(&ctx, fp, &words[i], tabs,
-                                        interpipe_aliasing ? 0 : REG_TEX_BASE,
-                                        interpipe_aliasing ? REGISTER_LDST_BASE : REG_TEX_BASE);
-                        break;
-                }
+         print_texture_word(
+            &ctx, fp, &words[i], tabs, interpipe_aliasing ? 0 : REG_TEX_BASE,
+            interpipe_aliasing ? REGISTER_LDST_BASE : REG_TEX_BASE);
+         break;
+      }
 
-                case TAG_LOAD_STORE_4:
-                        print_load_store_word(&ctx, fp, &words[i], verbose);
-                        break;
+      case TAG_LOAD_STORE_4:
+         print_load_store_word(&ctx, fp, &words[i], verbose);
+         break;
 
-                case TAG_ALU_4 ... TAG_ALU_16_WRITEOUT:
-                        branch_forward = print_alu_word(&ctx, fp, &words[i], num_quad_words, tabs, i + 4*num_quad_words, verbose);
+      case TAG_ALU_4 ... TAG_ALU_16_WRITEOUT:
+         branch_forward = print_alu_word(&ctx, fp, &words[i], num_quad_words,
+                                         tabs, i + 4 * num_quad_words, verbose);
 
-                        /* TODO: infer/verify me */
-                        if (tag >= TAG_ALU_4_WRITEOUT)
-                                fprintf(fp, "writeout\n");
+         /* TODO: infer/verify me */
+         if (tag >= TAG_ALU_4_WRITEOUT)
+            fprintf(fp, "writeout\n");
 
-                        break;
+         break;
 
-                default:
-                        fprintf(fp, "Unknown word type %u:\n", words[i] & 0xF);
-                        num_quad_words = 1;
-                        print_quad_word(fp, &words[i], tabs);
-                        fprintf(fp, "\n");
-                        break;
-                }
+      default:
+         fprintf(fp, "Unknown word type %u:\n", words[i] & 0xF);
+         num_quad_words = 1;
+         print_quad_word(fp, &words[i], tabs);
+         fprintf(fp, "\n");
+         break;
+      }
 
-                /* Include a synthetic "break" instruction at the end of the
-                 * bundle to signify that if, absent a branch, the shader
-                 * execution will stop here. Stop disassembly at such a break
-                 * based on a heuristic */
+      /* Include a synthetic "break" instruction at the end of the
+       * bundle to signify that if, absent a branch, the shader
+       * execution will stop here. Stop disassembly at such a break
+       * based on a heuristic */
 
-                if (next_tag == TAG_BREAK) {
-                        if (branch_forward) {
-                                fprintf(fp, "break\n");
-                        } else {
-                                fprintf(fp, "\n");
-                                break;
-                        }
-                }
+      if (next_tag == TAG_BREAK) {
+         if (branch_forward) {
+            fprintf(fp, "break\n");
+         } else {
+            fprintf(fp, "\n");
+            break;
+         }
+      }
 
-                fprintf(fp, "\n");
+      fprintf(fp, "\n");
 
-                i += 4 * num_quad_words;
-        }
+      i += 4 * num_quad_words;
+   }
 
-        if (last_next_tag != TAG_BREAK) {
-                fprintf(fp, "/* XXX: shader ended with tag %s */\n",
-                                midgard_tag_props[last_next_tag].name);
-        }
+   if (last_next_tag != TAG_BREAK) {
+      fprintf(fp, "/* XXX: shader ended with tag %s */\n",
+              midgard_tag_props[last_next_tag].name);
+   }
 
-        free(ctx.midg_tags);
+   free(ctx.midg_tags);
 }
diff --git a/src/panfrost/midgard/disassemble.h b/src/panfrost/midgard/disassemble.h
index 6aaaf8c6bc5..7145c7c47a7 100644
--- a/src/panfrost/midgard/disassemble.h
+++ b/src/panfrost/midgard/disassemble.h
@@ -1,7 +1,7 @@
-#include <stdio.h>
-#include <stdint.h>
 #include <stdbool.h>
 #include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
 
-void
-disassemble_midgard(FILE *fp, uint8_t *code, size_t size, unsigned gpu_id, bool verbose);
+void disassemble_midgard(FILE *fp, uint8_t *code, size_t size, unsigned gpu_id,
+                         bool verbose);
diff --git a/src/panfrost/midgard/helpers.h b/src/panfrost/midgard/helpers.h
index 436641b5201..f2161c7a650 100644
--- a/src/panfrost/midgard/helpers.h
+++ b/src/panfrost/midgard/helpers.h
@@ -25,98 +25,74 @@
 
 #include <stdio.h>
 #include <string.h>
-#include "midgard.h"
 #include "util/macros.h"
+#include "midgard.h"
 
-#define OP_IS_LOAD_VARY_F(op) (\
-                op == midgard_op_ld_vary_16 || \
-                op == midgard_op_ld_vary_32 \
-        )
+#define OP_IS_LOAD_VARY_F(op)                                                  \
+   (op == midgard_op_ld_vary_16 || op == midgard_op_ld_vary_32)
 
-#define OP_IS_PROJECTION(op) ( \
-                op == midgard_op_ldst_perspective_div_y || \
-                op == midgard_op_ldst_perspective_div_z || \
-                op == midgard_op_ldst_perspective_div_w \
-        )
+#define OP_IS_PROJECTION(op)                                                   \
+   (op == midgard_op_ldst_perspective_div_y ||                                 \
+    op == midgard_op_ldst_perspective_div_z ||                                 \
+    op == midgard_op_ldst_perspective_div_w)
 
-#define OP_IS_VEC4_ONLY(op) ( \
-                OP_IS_PROJECTION(op) || \
-                op == midgard_op_ld_cubemap_coords \
-        )
+#define OP_IS_VEC4_ONLY(op)                                                    \
+   (OP_IS_PROJECTION(op) || op == midgard_op_ld_cubemap_coords)
 
-#define OP_IS_MOVE(op) ( \
-                (op >= midgard_alu_op_fmov && op <= midgard_alu_op_fmov_rtp) || \
-                op == midgard_alu_op_imov \
-        )
+#define OP_IS_MOVE(op)                                                         \
+   ((op >= midgard_alu_op_fmov && op <= midgard_alu_op_fmov_rtp) ||            \
+    op == midgard_alu_op_imov)
 
-#define OP_IS_UBO_READ(op) ( \
-                op >= midgard_op_ld_ubo_u8 && \
-                op <= midgard_op_ld_ubo_128_bswap8 \
-        )
+#define OP_IS_UBO_READ(op)                                                     \
+   (op >= midgard_op_ld_ubo_u8 && op <= midgard_op_ld_ubo_128_bswap8)
 
-#define OP_IS_CSEL_V(op) ( \
-                op == midgard_alu_op_icsel_v || \
-                op == midgard_alu_op_fcsel_v \
-        )
+#define OP_IS_CSEL_V(op)                                                       \
+   (op == midgard_alu_op_icsel_v || op == midgard_alu_op_fcsel_v)
 
-#define OP_IS_CSEL(op) ( \
-                OP_IS_CSEL_V(op) || \
-                op == midgard_alu_op_icsel || \
-                op == midgard_alu_op_fcsel \
-        )
+#define OP_IS_CSEL(op)                                                         \
+   (OP_IS_CSEL_V(op) || op == midgard_alu_op_icsel ||                          \
+    op == midgard_alu_op_fcsel)
 
-#define OP_IS_UNSIGNED_CMP(op) ( \
-                op == midgard_alu_op_ult || \
-                op == midgard_alu_op_ule \
-        )
+#define OP_IS_UNSIGNED_CMP(op)                                                 \
+   (op == midgard_alu_op_ult || op == midgard_alu_op_ule)
 
-#define OP_IS_INTEGER_CMP(op) ( \
-                op == midgard_alu_op_ieq || \
-                op == midgard_alu_op_ine || \
-                op == midgard_alu_op_ilt || \
-                op == midgard_alu_op_ile || \
-                OP_IS_UNSIGNED_CMP(op) \
-        )
+#define OP_IS_INTEGER_CMP(op)                                                  \
+   (op == midgard_alu_op_ieq || op == midgard_alu_op_ine ||                    \
+    op == midgard_alu_op_ilt || op == midgard_alu_op_ile ||                    \
+    OP_IS_UNSIGNED_CMP(op))
 
-#define OP_IS_COMMON_STORE(op) ( \
-                op >= midgard_op_st_u8 && \
-                op <= midgard_op_st_128_bswap8 \
-        )
+#define OP_IS_COMMON_STORE(op)                                                 \
+   (op >= midgard_op_st_u8 && op <= midgard_op_st_128_bswap8)
 
-#define OP_IS_IMAGE(op) ( \
-                (op >= midgard_op_ld_image_32f && op <= midgard_op_ld_image_32i) || \
-                (op >= midgard_op_st_image_32f && op <= midgard_op_st_image_32i) || \
-                op == midgard_op_lea_image \
-        )
+#define OP_IS_IMAGE(op)                                                        \
+   ((op >= midgard_op_ld_image_32f && op <= midgard_op_ld_image_32i) ||        \
+    (op >= midgard_op_st_image_32f && op <= midgard_op_st_image_32i) ||        \
+    op == midgard_op_lea_image)
 
-#define OP_IS_SPECIAL(op) ( \
-                (op >= midgard_op_ld_special_32f && op <= midgard_op_ld_special_32i) || \
-                (op >= midgard_op_st_special_32f && op <= midgard_op_st_special_32i) \
-        )
+#define OP_IS_SPECIAL(op)                                                      \
+   ((op >= midgard_op_ld_special_32f && op <= midgard_op_ld_special_32i) ||    \
+    (op >= midgard_op_st_special_32f && op <= midgard_op_st_special_32i))
 
-#define OP_IS_PACK_COLOUR(op) ( \
-                (op >= midgard_op_pack_colour_f32 && op <= midgard_op_pack_colour_s32) \
-        )
+#define OP_IS_PACK_COLOUR(op)                                                  \
+   ((op >= midgard_op_pack_colour_f32 && op <= midgard_op_pack_colour_s32))
 
-#define OP_IS_UNPACK_COLOUR(op) ( \
-                (op >= midgard_op_unpack_colour_f32 && op <= midgard_op_unpack_colour_s32) \
-        )
+#define OP_IS_UNPACK_COLOUR(op)                                                \
+   ((op >= midgard_op_unpack_colour_f32 && op <= midgard_op_unpack_colour_s32))
 
 /* Instructions that are on the load/store unit but don't access memory */
-#define OP_IS_REG2REG_LDST(op) ( \
-                op >= midgard_op_unpack_colour_f32 && \
-                op <= midgard_op_ldst_perspective_div_w \
-        )
+#define OP_IS_REG2REG_LDST(op)                                                 \
+   (op >= midgard_op_unpack_colour_f32 &&                                      \
+    op <= midgard_op_ldst_perspective_div_w)
 
 /* ALU control words are single bit fields with a lot of space */
 
-#define ALU_ENAB_VEC_MUL  (1 << 17)
-#define ALU_ENAB_SCAL_ADD  (1 << 19)
-#define ALU_ENAB_VEC_ADD  (1 << 21)
-#define ALU_ENAB_SCAL_MUL  (1 << 23)
-#define ALU_ENAB_VEC_LUT  (1 << 25)
+#define ALU_ENAB_VEC_MUL    (1 << 17)
+#define ALU_ENAB_SCAL_ADD   (1 << 19)
+#define ALU_ENAB_VEC_ADD    (1 << 21)
+#define ALU_ENAB_SCAL_MUL   (1 << 23)
+#define ALU_ENAB_VEC_LUT    (1 << 25)
 #define ALU_ENAB_BR_COMPACT (1 << 26)
-#define ALU_ENAB_BRANCH   (1 << 27)
+#define ALU_ENAB_BRANCH     (1 << 27)
 
 /* Other opcode properties that don't conflict with the ALU_ENABs, non-ISA */
 
@@ -128,7 +104,7 @@
  * make sense (since then why are we quirked?), so that corresponds to "no
  * count set" */
 
-#define OP_CHANNEL_COUNT(c) ((c - 1) << 0)
+#define OP_CHANNEL_COUNT(c)  ((c - 1) << 0)
 #define GET_CHANNEL_COUNT(c) ((c & (0x3 << 0)) ? ((c & (0x3 << 0)) + 1) : 0)
 
 /* For instructions that take a single argument, normally the first argument
@@ -171,11 +147,11 @@
 /* r24 and r25 are special registers that only exist during the pipeline,
  * by using them when we don't care about the register we skip a roundtrip
  * to the register file. */
-#define REGISTER_UNUSED 24
-#define REGISTER_CONSTANT 26
-#define REGISTER_LDST_BASE 26
+#define REGISTER_UNUSED       24
+#define REGISTER_CONSTANT     26
+#define REGISTER_LDST_BASE    26
 #define REGISTER_TEXTURE_BASE 28
-#define REGISTER_SELECT 31
+#define REGISTER_SELECT       31
 
 /* The following registers are read-only */
 
@@ -185,8 +161,8 @@
 /* XY is Thread Local Storage pointer, ZW is Workgroup Local Storage pointer */
 #define REGISTER_LDST_LOCAL_STORAGE_PTR 3
 
-#define REGISTER_LDST_LOCAL_THREAD_ID 4
-#define REGISTER_LDST_GROUP_ID 5
+#define REGISTER_LDST_LOCAL_THREAD_ID  4
+#define REGISTER_LDST_GROUP_ID         5
 #define REGISTER_LDST_GLOBAL_THREAD_ID 6
 
 /* This register is always zeroed when read. */
@@ -194,34 +170,38 @@
 
 /* SSA helper aliases to mimic the registers. */
 
-#define SSA_FIXED_SHIFT 24
+#define SSA_FIXED_SHIFT         24
 #define SSA_FIXED_REGISTER(reg) (((1 + (reg)) << SSA_FIXED_SHIFT) | 1)
 #define SSA_REG_FROM_FIXED(reg) ((((reg) & ~1) >> SSA_FIXED_SHIFT) - 1)
-#define SSA_FIXED_MINIMUM SSA_FIXED_REGISTER(0)
+#define SSA_FIXED_MINIMUM       SSA_FIXED_REGISTER(0)
 
 #define COMPONENT_X 0x0
 #define COMPONENT_Y 0x1
 #define COMPONENT_Z 0x2
 #define COMPONENT_W 0x3
 
-#define SWIZZLE_IDENTITY { \
-        { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, \
-        { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, \
-        { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, \
-        { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } \
-}
+#define SWIZZLE_IDENTITY                                                       \
+   {                                                                           \
+      {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},                  \
+         {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},               \
+         {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},               \
+      {                                                                        \
+         0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15                  \
+      }                                                                        \
+   }
 
-#define SWIZZLE_IDENTITY_4 { \
-        { 0, 1, 2, 3, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,  0 }, \
-        { 0, 1, 2, 3, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,  0 }, \
-        { 0, 1, 2, 3, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,  0 }, \
-        { 0, 1, 2, 3, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,  0 }, \
-}
+#define SWIZZLE_IDENTITY_4                                                     \
+   {                                                                           \
+      {0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},                        \
+         {0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},                     \
+         {0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},                     \
+         {0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},                     \
+   }
 
 static inline unsigned
 mask_of(unsigned nr_comp)
 {
-        return (1 << nr_comp) - 1;
+   return (1 << nr_comp) - 1;
 }
 
 /* See ISA notes */
@@ -242,34 +222,34 @@ mask_of(unsigned nr_comp)
 
 /* Shorthands for usual combinations of units */
 
-#define UNITS_MUL (UNIT_VMUL | UNIT_SMUL)
-#define UNITS_ADD (UNIT_VADD | UNIT_SADD)
-#define UNITS_MOST (UNITS_MUL | UNITS_ADD)
-#define UNITS_ALL (UNITS_MOST | UNIT_VLUT)
-#define UNITS_SCALAR (UNIT_SADD | UNIT_SMUL)
-#define UNITS_VECTOR (UNIT_VMUL | UNIT_VADD)
+#define UNITS_MUL        (UNIT_VMUL | UNIT_SMUL)
+#define UNITS_ADD        (UNIT_VADD | UNIT_SADD)
+#define UNITS_MOST       (UNITS_MUL | UNITS_ADD)
+#define UNITS_ALL        (UNITS_MOST | UNIT_VLUT)
+#define UNITS_SCALAR     (UNIT_SADD | UNIT_SMUL)
+#define UNITS_VECTOR     (UNIT_VMUL | UNIT_VADD)
 #define UNITS_ANY_VECTOR (UNITS_VECTOR | UNIT_VLUT)
 
 struct mir_op_props {
-        const char *name;
-        unsigned props;
+   const char *name;
+   unsigned props;
 };
 
 /* For load/store */
 
 struct mir_ldst_op_props {
-        const char *name;
-        unsigned props;
+   const char *name;
+   unsigned props;
 };
 
 struct mir_tex_op_props {
-        const char *name;
-        unsigned props;
+   const char *name;
+   unsigned props;
 };
 
 struct mir_tag_props {
-        const char *name;
-        unsigned size;
+   const char *name;
+   unsigned size;
 };
 
 /* Lower 2-bits are a midgard_reg_mode */
@@ -303,15 +283,15 @@ struct mir_tag_props {
 static inline unsigned
 expand_writemask(unsigned mask, unsigned log2_channels)
 {
-        unsigned o = 0;
-        unsigned factor = 8 >> log2_channels;
-        unsigned expanded = (1 << factor) - 1;
+   unsigned o = 0;
+   unsigned factor = 8 >> log2_channels;
+   unsigned expanded = (1 << factor) - 1;
 
-        for (unsigned i = 0; i < (1 << log2_channels); ++i)
-                if (mask & (1 << i))
-                        o |= (expanded << (factor * i));
+   for (unsigned i = 0; i < (1 << log2_channels); ++i)
+      if (mask & (1 << i))
+         o |= (expanded << (factor * i));
 
-        return o;
+   return o;
 }
 
 /* Coerce structs to integer */
@@ -319,28 +299,28 @@ expand_writemask(unsigned mask, unsigned log2_channels)
 static inline unsigned
 vector_alu_srco_unsigned(midgard_vector_alu_src src)
 {
-        unsigned u;
-        memcpy(&u, &src, sizeof(src));
-        return u;
+   unsigned u;
+   memcpy(&u, &src, sizeof(src));
+   return u;
 }
 
 static inline midgard_vector_alu_src
 vector_alu_from_unsigned(unsigned u)
 {
-        midgard_vector_alu_src s;
-        memcpy(&s, &u, sizeof(s));
-        return s;
+   midgard_vector_alu_src s;
+   memcpy(&s, &u, sizeof(s));
+   return s;
 }
 
 static inline void
 mir_compose_swizzle(unsigned *left, unsigned *right, unsigned *final_out)
 {
-        unsigned out[16];
+   unsigned out[16];
 
-        for (unsigned c = 0; c < 16; ++c)
-                out[c] = right[left[c]];
+   for (unsigned c = 0; c < 16; ++c)
+      out[c] = right[left[c]];
 
-        memcpy(final_out, out, sizeof(out));
+   memcpy(final_out, out, sizeof(out));
 }
 
 /* Checks for an xyzw.. swizzle, given a mask */
@@ -348,14 +328,15 @@ mir_compose_swizzle(unsigned *left, unsigned *right, unsigned *final_out)
 static inline bool
 mir_is_simple_swizzle(unsigned *swizzle, unsigned mask)
 {
-        for (unsigned i = 0; i < 16; ++i) {
-                if (!(mask & (1 << i))) continue;
+   for (unsigned i = 0; i < 16; ++i) {
+      if (!(mask & (1 << i)))
+         continue;
 
-                if (swizzle[i] != i)
-                        return false;
-        }
+      if (swizzle[i] != i)
+         return false;
+   }
 
-        return true;
+   return true;
 }
 
 /* Packs a load/store argument */
@@ -363,19 +344,19 @@ mir_is_simple_swizzle(unsigned *swizzle, unsigned mask)
 static inline uint8_t
 midgard_ldst_comp(unsigned reg, unsigned component, unsigned size)
 {
-        assert((reg & ~1) == 0);
-        assert(size == 16 || size == 32 || size == 64);
+   assert((reg & ~1) == 0);
+   assert(size == 16 || size == 32 || size == 64);
 
-        /* Shift so everything is in terms of 32-bit units */
-        if (size == 64) {
-                assert(component < 2);
-                component <<= 1;
-        } else if (size == 16) {
-                assert((component & 1) == 0);
-                component >>= 1;
-        }
+   /* Shift so everything is in terms of 32-bit units */
+   if (size == 64) {
+      assert(component < 2);
+      component <<= 1;
+   } else if (size == 16) {
+      assert((component & 1) == 0);
+      component >>= 1;
+   }
 
-        return component;
+   return component;
 }
 
 /* Packs/unpacks a ubo index immediate. The unpack must be defined here so it
@@ -388,55 +369,52 @@ void midgard_pack_ubo_index_imm(midgard_load_store_word *word, unsigned index);
 static inline unsigned
 midgard_unpack_ubo_index_imm(midgard_load_store_word word)
 {
-        unsigned ubo = word.arg_comp |
-                       (word.arg_reg << 2)  |
-                       (word.bitsize_toggle << 5) |
-                       (word.index_format << 6);
+   unsigned ubo = word.arg_comp | (word.arg_reg << 2) |
+                  (word.bitsize_toggle << 5) | (word.index_format << 6);
 
-        return ubo;
+   return ubo;
 }
 
-
 /* Packs/unpacks varying parameters.
  * FIXME: IMPORTANT: We currently handle varying mode weirdly, by passing all
  * parameters via an offset and using REGISTER_LDST_ZERO as base. This works
  * for most parameters, but does not allow us to encode/decode direct sample
  * position. */
-void midgard_pack_varying_params(midgard_load_store_word *word, midgard_varying_params p);
-midgard_varying_params midgard_unpack_varying_params(midgard_load_store_word word);
+void midgard_pack_varying_params(midgard_load_store_word *word,
+                                 midgard_varying_params p);
+midgard_varying_params
+midgard_unpack_varying_params(midgard_load_store_word word);
 
 /* Load/store ops' displacement helpers.
  * This is useful because different types of load/store ops have different
  * displacement bitsize. */
 
-#define UNPACK_LDST_ATTRIB_OFS(a) ((a) >> 9)
-#define UNPACK_LDST_VERTEX_OFS(a) util_sign_extend((a) & 0x1FF, 9)
+#define UNPACK_LDST_ATTRIB_OFS(a)   ((a) >> 9)
+#define UNPACK_LDST_VERTEX_OFS(a)   util_sign_extend((a)&0x1FF, 9)
 #define UNPACK_LDST_SELECTOR_OFS(a) ((a) >> 9)
-#define UNPACK_LDST_UBO_OFS(a) ((a) >> 2)
-#define UNPACK_LDST_MEM_OFS(a) ((a))
+#define UNPACK_LDST_UBO_OFS(a)      ((a) >> 2)
+#define UNPACK_LDST_MEM_OFS(a)      ((a))
 
-#define PACK_LDST_ATTRIB_OFS(a) ((a) << 9)
-#define PACK_LDST_VERTEX_OFS(a) ((a) & 0x1FF)
+#define PACK_LDST_ATTRIB_OFS(a)   ((a) << 9)
+#define PACK_LDST_VERTEX_OFS(a)   ((a)&0x1FF)
 #define PACK_LDST_SELECTOR_OFS(a) ((a) << 9)
-#define PACK_LDST_UBO_OFS(a) ((a) << 2)
-#define PACK_LDST_MEM_OFS(a) ((a))
+#define PACK_LDST_UBO_OFS(a)      ((a) << 2)
+#define PACK_LDST_MEM_OFS(a)      ((a))
 
 static inline bool
 midgard_is_branch_unit(unsigned unit)
 {
-        return (unit == ALU_ENAB_BRANCH) || (unit == ALU_ENAB_BR_COMPACT);
+   return (unit == ALU_ENAB_BRANCH) || (unit == ALU_ENAB_BR_COMPACT);
 }
 
 /* Packs ALU mod argument */
 struct midgard_instruction;
 unsigned mir_pack_mod(struct midgard_instruction *ins, unsigned i, bool scalar);
 
-void
-mir_print_constant_component(FILE *fp, const midgard_constants *consts,
-                             unsigned c, midgard_reg_mode reg_mode, bool half,
-                             unsigned mod, midgard_alu_op op);
+void mir_print_constant_component(FILE *fp, const midgard_constants *consts,
+                                  unsigned c, midgard_reg_mode reg_mode,
+                                  bool half, unsigned mod, midgard_alu_op op);
 
-void
-mir_print_outmod(FILE *fp, unsigned outmod, bool is_int);
+void mir_print_outmod(FILE *fp, unsigned outmod, bool is_int);
 
 #endif
diff --git a/src/panfrost/midgard/midgard.h b/src/panfrost/midgard/midgard.h
index 2121dc4790b..89772854ba8 100644
--- a/src/panfrost/midgard/midgard.h
+++ b/src/panfrost/midgard/midgard.h
@@ -28,49 +28,49 @@
 #ifndef __midgard_h__
 #define __midgard_h__
 
-#include <stdint.h>
 #include <stdbool.h>
+#include <stdint.h>
 
-#define MIDGARD_DBG_MSGS		0x0001
-#define MIDGARD_DBG_SHADERS		0x0002
-#define MIDGARD_DBG_SHADERDB            0x0004
-#define MIDGARD_DBG_INORDER             0x0008
-#define MIDGARD_DBG_VERBOSE             0x0010
-#define MIDGARD_DBG_INTERNAL            0x0020
+#define MIDGARD_DBG_MSGS     0x0001
+#define MIDGARD_DBG_SHADERS  0x0002
+#define MIDGARD_DBG_SHADERDB 0x0004
+#define MIDGARD_DBG_INORDER  0x0008
+#define MIDGARD_DBG_VERBOSE  0x0010
+#define MIDGARD_DBG_INTERNAL 0x0020
 
 extern int midgard_debug;
 
 typedef enum {
-        midgard_word_type_alu,
-        midgard_word_type_load_store,
-        midgard_word_type_texture
+   midgard_word_type_alu,
+   midgard_word_type_load_store,
+   midgard_word_type_texture
 } midgard_word_type;
 
 typedef enum {
-        midgard_alu_vmul,
-        midgard_alu_sadd,
-        midgard_alu_smul,
-        midgard_alu_vadd,
-        midgard_alu_lut
+   midgard_alu_vmul,
+   midgard_alu_sadd,
+   midgard_alu_smul,
+   midgard_alu_vadd,
+   midgard_alu_lut
 } midgard_alu;
 
 enum {
-        TAG_INVALID = 0x0,
-        TAG_BREAK = 0x1,
-        TAG_TEXTURE_4_VTX = 0x2,
-        TAG_TEXTURE_4 = 0x3,
-        TAG_TEXTURE_4_BARRIER = 0x4,
-        TAG_LOAD_STORE_4 = 0x5,
-        TAG_UNKNOWN_1 = 0x6,
-        TAG_UNKNOWN_2 = 0x7,
-        TAG_ALU_4 = 0x8,
-        TAG_ALU_8 = 0x9,
-        TAG_ALU_12 = 0xA,
-        TAG_ALU_16 = 0xB,
-        TAG_ALU_4_WRITEOUT = 0xC,
-        TAG_ALU_8_WRITEOUT = 0xD,
-        TAG_ALU_12_WRITEOUT = 0xE,
-        TAG_ALU_16_WRITEOUT = 0xF
+   TAG_INVALID = 0x0,
+   TAG_BREAK = 0x1,
+   TAG_TEXTURE_4_VTX = 0x2,
+   TAG_TEXTURE_4 = 0x3,
+   TAG_TEXTURE_4_BARRIER = 0x4,
+   TAG_LOAD_STORE_4 = 0x5,
+   TAG_UNKNOWN_1 = 0x6,
+   TAG_UNKNOWN_2 = 0x7,
+   TAG_ALU_4 = 0x8,
+   TAG_ALU_8 = 0x9,
+   TAG_ALU_12 = 0xA,
+   TAG_ALU_16 = 0xB,
+   TAG_ALU_4_WRITEOUT = 0xC,
+   TAG_ALU_8_WRITEOUT = 0xD,
+   TAG_ALU_12_WRITEOUT = 0xE,
+   TAG_ALU_16_WRITEOUT = 0xF
 };
 
 /*
@@ -78,200 +78,202 @@ enum {
  */
 
 typedef enum {
-        midgard_alu_op_fadd        = 0x10, /* round to even */
-        midgard_alu_op_fadd_rtz    = 0x11,
-        midgard_alu_op_fadd_rtn    = 0x12,
-        midgard_alu_op_fadd_rtp    = 0x13,
-        midgard_alu_op_fmul        = 0x14, /* round to even */
-        midgard_alu_op_fmul_rtz    = 0x15,
-        midgard_alu_op_fmul_rtn    = 0x16,
-        midgard_alu_op_fmul_rtp    = 0x17,
+   midgard_alu_op_fadd = 0x10, /* round to even */
+   midgard_alu_op_fadd_rtz = 0x11,
+   midgard_alu_op_fadd_rtn = 0x12,
+   midgard_alu_op_fadd_rtp = 0x13,
+   midgard_alu_op_fmul = 0x14, /* round to even */
+   midgard_alu_op_fmul_rtz = 0x15,
+   midgard_alu_op_fmul_rtn = 0x16,
+   midgard_alu_op_fmul_rtp = 0x17,
 
-        midgard_alu_op_fmin        = 0x28, /* if an operand is NaN, propagate the other */
-        midgard_alu_op_fmin_nan    = 0x29, /* if an operand is NaN, propagate it */
-        midgard_alu_op_fabsmin     = 0x2A, /* min(abs(a,b)) */
-        midgard_alu_op_fabsmin_nan = 0x2B, /* min_nan(abs(a,b)) */
-        midgard_alu_op_fmax        = 0x2C, /* if an operand is NaN, propagate the other */
-        midgard_alu_op_fmax_nan    = 0x2D, /* if an operand is NaN, propagate it */
-        midgard_alu_op_fabsmax     = 0x2E, /* max(abs(a,b)) */
-        midgard_alu_op_fabsmax_nan = 0x2F, /* max_nan(abs(a,b)) */
+   midgard_alu_op_fmin = 0x28, /* if an operand is NaN, propagate the other */
+   midgard_alu_op_fmin_nan = 0x29,    /* if an operand is NaN, propagate it */
+   midgard_alu_op_fabsmin = 0x2A,     /* min(abs(a,b)) */
+   midgard_alu_op_fabsmin_nan = 0x2B, /* min_nan(abs(a,b)) */
+   midgard_alu_op_fmax = 0x2C, /* if an operand is NaN, propagate the other */
+   midgard_alu_op_fmax_nan = 0x2D,    /* if an operand is NaN, propagate it */
+   midgard_alu_op_fabsmax = 0x2E,     /* max(abs(a,b)) */
+   midgard_alu_op_fabsmax_nan = 0x2F, /* max_nan(abs(a,b)) */
 
-        midgard_alu_op_fmov        = 0x30, /* fmov_rte */
-        midgard_alu_op_fmov_rtz    = 0x31,
-        midgard_alu_op_fmov_rtn    = 0x32,
-        midgard_alu_op_fmov_rtp    = 0x33,
-        midgard_alu_op_froundeven  = 0x34,
-        midgard_alu_op_ftrunc      = 0x35,
-        midgard_alu_op_ffloor      = 0x36,
-        midgard_alu_op_fceil       = 0x37,
-        midgard_alu_op_ffma        = 0x38, /* rte */
-        midgard_alu_op_ffma_rtz    = 0x39,
-        midgard_alu_op_ffma_rtn    = 0x3A,
-        midgard_alu_op_ffma_rtp    = 0x3B,
-        midgard_alu_op_fdot3       = 0x3C,
-        midgard_alu_op_fdot3r      = 0x3D,
-        midgard_alu_op_fdot4       = 0x3E,
-        midgard_alu_op_freduce     = 0x3F,
+   midgard_alu_op_fmov = 0x30, /* fmov_rte */
+   midgard_alu_op_fmov_rtz = 0x31,
+   midgard_alu_op_fmov_rtn = 0x32,
+   midgard_alu_op_fmov_rtp = 0x33,
+   midgard_alu_op_froundeven = 0x34,
+   midgard_alu_op_ftrunc = 0x35,
+   midgard_alu_op_ffloor = 0x36,
+   midgard_alu_op_fceil = 0x37,
+   midgard_alu_op_ffma = 0x38, /* rte */
+   midgard_alu_op_ffma_rtz = 0x39,
+   midgard_alu_op_ffma_rtn = 0x3A,
+   midgard_alu_op_ffma_rtp = 0x3B,
+   midgard_alu_op_fdot3 = 0x3C,
+   midgard_alu_op_fdot3r = 0x3D,
+   midgard_alu_op_fdot4 = 0x3E,
+   midgard_alu_op_freduce = 0x3F,
 
-        midgard_alu_op_iadd        = 0x40,
-        midgard_alu_op_ishladd     = 0x41, /* (a<<1) + b */
-        midgard_alu_op_isub        = 0x46,
-        midgard_alu_op_ishlsub     = 0x47, /* (a<<1) - b */
-        midgard_alu_op_iaddsat     = 0x48,
-        midgard_alu_op_uaddsat     = 0x49,
-        midgard_alu_op_isubsat     = 0x4E,
-        midgard_alu_op_usubsat     = 0x4F,
+   midgard_alu_op_iadd = 0x40,
+   midgard_alu_op_ishladd = 0x41, /* (a<<1) + b */
+   midgard_alu_op_isub = 0x46,
+   midgard_alu_op_ishlsub = 0x47, /* (a<<1) - b */
+   midgard_alu_op_iaddsat = 0x48,
+   midgard_alu_op_uaddsat = 0x49,
+   midgard_alu_op_isubsat = 0x4E,
+   midgard_alu_op_usubsat = 0x4F,
 
-        midgard_alu_op_imul        = 0x58,
-        /* Multiplies two ints and stores the result in the next larger datasize. */
-        midgard_alu_op_iwmul       = 0x59, /* sint * sint = sint */
-        midgard_alu_op_uwmul       = 0x5A, /* uint * uint = uint */
-        midgard_alu_op_iuwmul      = 0x5B, /* sint * uint = sint */
+   midgard_alu_op_imul = 0x58,
+   /* Multiplies two ints and stores the result in the next larger datasize. */
+   midgard_alu_op_iwmul = 0x59,  /* sint * sint = sint */
+   midgard_alu_op_uwmul = 0x5A,  /* uint * uint = uint */
+   midgard_alu_op_iuwmul = 0x5B, /* sint * uint = sint */
 
-        midgard_alu_op_imin        = 0x60,
-        midgard_alu_op_umin        = 0x61,
-        midgard_alu_op_imax        = 0x62,
-        midgard_alu_op_umax        = 0x63,
-        midgard_alu_op_iavg        = 0x64,
-        midgard_alu_op_uavg        = 0x65,
-        midgard_alu_op_iravg       = 0x66,
-        midgard_alu_op_uravg       = 0x67,
-        midgard_alu_op_iasr        = 0x68,
-        midgard_alu_op_ilsr        = 0x69,
-        midgard_alu_op_ishlsat     = 0x6C,
-        midgard_alu_op_ushlsat     = 0x6D,
-        midgard_alu_op_ishl        = 0x6E,
+   midgard_alu_op_imin = 0x60,
+   midgard_alu_op_umin = 0x61,
+   midgard_alu_op_imax = 0x62,
+   midgard_alu_op_umax = 0x63,
+   midgard_alu_op_iavg = 0x64,
+   midgard_alu_op_uavg = 0x65,
+   midgard_alu_op_iravg = 0x66,
+   midgard_alu_op_uravg = 0x67,
+   midgard_alu_op_iasr = 0x68,
+   midgard_alu_op_ilsr = 0x69,
+   midgard_alu_op_ishlsat = 0x6C,
+   midgard_alu_op_ushlsat = 0x6D,
+   midgard_alu_op_ishl = 0x6E,
 
-        midgard_alu_op_iand        = 0x70,
-        midgard_alu_op_ior         = 0x71,
-        midgard_alu_op_inand       = 0x72, /* ~(a & b), for inot let a = b */
-        midgard_alu_op_inor        = 0x73, /* ~(a | b) */
-        midgard_alu_op_iandnot     = 0x74, /* (a & ~b), used for not/b2f */
-        midgard_alu_op_iornot      = 0x75, /* (a | ~b) */
-        midgard_alu_op_ixor        = 0x76,
-        midgard_alu_op_inxor       = 0x77, /* ~(a ^ b) */
-        midgard_alu_op_iclz        = 0x78, /* Number of zeroes on left */
-        midgard_alu_op_ipopcnt     = 0x7A, /* Population count */
-        midgard_alu_op_imov        = 0x7B,
-        midgard_alu_op_iabsdiff    = 0x7C,
-        midgard_alu_op_uabsdiff    = 0x7D,
-        midgard_alu_op_ichoose     = 0x7E, /* vector, component number - dupe for shuffle() */
+   midgard_alu_op_iand = 0x70,
+   midgard_alu_op_ior = 0x71,
+   midgard_alu_op_inand = 0x72,   /* ~(a & b), for inot let a = b */
+   midgard_alu_op_inor = 0x73,    /* ~(a | b) */
+   midgard_alu_op_iandnot = 0x74, /* (a & ~b), used for not/b2f */
+   midgard_alu_op_iornot = 0x75,  /* (a | ~b) */
+   midgard_alu_op_ixor = 0x76,
+   midgard_alu_op_inxor = 0x77,   /* ~(a ^ b) */
+   midgard_alu_op_iclz = 0x78,    /* Number of zeroes on left */
+   midgard_alu_op_ipopcnt = 0x7A, /* Population count */
+   midgard_alu_op_imov = 0x7B,
+   midgard_alu_op_iabsdiff = 0x7C,
+   midgard_alu_op_uabsdiff = 0x7D,
+   midgard_alu_op_ichoose =
+      0x7E, /* vector, component number - dupe for shuffle() */
 
-        midgard_alu_op_feq         = 0x80,
-        midgard_alu_op_fne         = 0x81,
-        midgard_alu_op_flt         = 0x82,
-        midgard_alu_op_fle         = 0x83,
-        midgard_alu_op_fball_eq    = 0x88,
-        midgard_alu_op_fball_neq   = 0x89,
-        midgard_alu_op_fball_lt    = 0x8A, /* all(lessThan(.., ..)) */
-        midgard_alu_op_fball_lte   = 0x8B, /* all(lessThanEqual(.., ..)) */
+   midgard_alu_op_feq = 0x80,
+   midgard_alu_op_fne = 0x81,
+   midgard_alu_op_flt = 0x82,
+   midgard_alu_op_fle = 0x83,
+   midgard_alu_op_fball_eq = 0x88,
+   midgard_alu_op_fball_neq = 0x89,
+   midgard_alu_op_fball_lt = 0x8A,  /* all(lessThan(.., ..)) */
+   midgard_alu_op_fball_lte = 0x8B, /* all(lessThanEqual(.., ..)) */
 
-        midgard_alu_op_fbany_eq    = 0x90,
-        midgard_alu_op_fbany_neq   = 0x91,
-        midgard_alu_op_fbany_lt    = 0x92, /* any(lessThan(.., ..)) */
-        midgard_alu_op_fbany_lte   = 0x93, /* any(lessThanEqual(.., ..)) */
+   midgard_alu_op_fbany_eq = 0x90,
+   midgard_alu_op_fbany_neq = 0x91,
+   midgard_alu_op_fbany_lt = 0x92,  /* any(lessThan(.., ..)) */
+   midgard_alu_op_fbany_lte = 0x93, /* any(lessThanEqual(.., ..)) */
 
-        midgard_alu_op_f2i_rte     = 0x98,
-        midgard_alu_op_f2i_rtz     = 0x99,
-        midgard_alu_op_f2i_rtn     = 0x9A,
-        midgard_alu_op_f2i_rtp     = 0x9B,
-        midgard_alu_op_f2u_rte     = 0x9C,
-        midgard_alu_op_f2u_rtz     = 0x9D,
-        midgard_alu_op_f2u_rtn     = 0x9E,
-        midgard_alu_op_f2u_rtp     = 0x9F,
+   midgard_alu_op_f2i_rte = 0x98,
+   midgard_alu_op_f2i_rtz = 0x99,
+   midgard_alu_op_f2i_rtn = 0x9A,
+   midgard_alu_op_f2i_rtp = 0x9B,
+   midgard_alu_op_f2u_rte = 0x9C,
+   midgard_alu_op_f2u_rtz = 0x9D,
+   midgard_alu_op_f2u_rtn = 0x9E,
+   midgard_alu_op_f2u_rtp = 0x9F,
 
-        midgard_alu_op_ieq         = 0xA0,
-        midgard_alu_op_ine         = 0xA1,
-        midgard_alu_op_ult         = 0xA2,
-        midgard_alu_op_ule         = 0xA3,
-        midgard_alu_op_ilt         = 0xA4,
-        midgard_alu_op_ile         = 0xA5,
-        midgard_alu_op_iball_eq    = 0xA8,
-        midgard_alu_op_iball_neq   = 0xA9,
-        midgard_alu_op_uball_lt    = 0xAA,
-        midgard_alu_op_uball_lte   = 0xAB,
-        midgard_alu_op_iball_lt    = 0xAC,
-        midgard_alu_op_iball_lte   = 0xAD,
+   midgard_alu_op_ieq = 0xA0,
+   midgard_alu_op_ine = 0xA1,
+   midgard_alu_op_ult = 0xA2,
+   midgard_alu_op_ule = 0xA3,
+   midgard_alu_op_ilt = 0xA4,
+   midgard_alu_op_ile = 0xA5,
+   midgard_alu_op_iball_eq = 0xA8,
+   midgard_alu_op_iball_neq = 0xA9,
+   midgard_alu_op_uball_lt = 0xAA,
+   midgard_alu_op_uball_lte = 0xAB,
+   midgard_alu_op_iball_lt = 0xAC,
+   midgard_alu_op_iball_lte = 0xAD,
 
-        midgard_alu_op_ibany_eq    = 0xB0,
-        midgard_alu_op_ibany_neq   = 0xB1,
-        midgard_alu_op_ubany_lt    = 0xB2,
-        midgard_alu_op_ubany_lte   = 0xB3,
-        midgard_alu_op_ibany_lt    = 0xB4, /* any(lessThan(.., ..)) */
-        midgard_alu_op_ibany_lte   = 0xB5, /* any(lessThanEqual(.., ..)) */
-        midgard_alu_op_i2f_rte     = 0xB8,
-        midgard_alu_op_i2f_rtz     = 0xB9,
-        midgard_alu_op_i2f_rtn     = 0xBA,
-        midgard_alu_op_i2f_rtp     = 0xBB,
-        midgard_alu_op_u2f_rte     = 0xBC,
-        midgard_alu_op_u2f_rtz     = 0xBD,
-        midgard_alu_op_u2f_rtn     = 0xBE,
-        midgard_alu_op_u2f_rtp     = 0xBF,
+   midgard_alu_op_ibany_eq = 0xB0,
+   midgard_alu_op_ibany_neq = 0xB1,
+   midgard_alu_op_ubany_lt = 0xB2,
+   midgard_alu_op_ubany_lte = 0xB3,
+   midgard_alu_op_ibany_lt = 0xB4,  /* any(lessThan(.., ..)) */
+   midgard_alu_op_ibany_lte = 0xB5, /* any(lessThanEqual(.., ..)) */
+   midgard_alu_op_i2f_rte = 0xB8,
+   midgard_alu_op_i2f_rtz = 0xB9,
+   midgard_alu_op_i2f_rtn = 0xBA,
+   midgard_alu_op_i2f_rtp = 0xBB,
+   midgard_alu_op_u2f_rte = 0xBC,
+   midgard_alu_op_u2f_rtz = 0xBD,
+   midgard_alu_op_u2f_rtn = 0xBE,
+   midgard_alu_op_u2f_rtp = 0xBF,
 
-        /* All csel* instructions use as a condition the output of the previous
-         * vector or scalar unit, thus it must run on the second pipeline stage
-         * and be scheduled to the same bundle as the opcode that it uses as a
-         * condition. */
-        midgard_alu_op_icsel_v     = 0xC0,
-        midgard_alu_op_icsel       = 0xC1,
-        midgard_alu_op_fcsel_v     = 0xC4,
-        midgard_alu_op_fcsel       = 0xC5,
-        midgard_alu_op_froundaway  = 0xC6, /* round to nearest away */
+   /* All csel* instructions use as a condition the output of the previous
+    * vector or scalar unit, thus it must run on the second pipeline stage
+    * and be scheduled to the same bundle as the opcode that it uses as a
+    * condition. */
+   midgard_alu_op_icsel_v = 0xC0,
+   midgard_alu_op_icsel = 0xC1,
+   midgard_alu_op_fcsel_v = 0xC4,
+   midgard_alu_op_fcsel = 0xC5,
+   midgard_alu_op_froundaway = 0xC6, /* round to nearest away */
 
-        midgard_alu_op_fatan2_pt2  = 0xE8,
-        midgard_alu_op_fpow_pt1    = 0xEC,
-        midgard_alu_op_fpown_pt1   = 0xED,
-        midgard_alu_op_fpowr_pt1   = 0xEE,
+   midgard_alu_op_fatan2_pt2 = 0xE8,
+   midgard_alu_op_fpow_pt1 = 0xEC,
+   midgard_alu_op_fpown_pt1 = 0xED,
+   midgard_alu_op_fpowr_pt1 = 0xEE,
 
-        midgard_alu_op_frcp        = 0xF0,
-        midgard_alu_op_frsqrt      = 0xF2,
-        midgard_alu_op_fsqrt       = 0xF3,
-        midgard_alu_op_fexp2       = 0xF4,
-        midgard_alu_op_flog2       = 0xF5,
-        midgard_alu_op_fsinpi      = 0xF6, /* sin(pi * x) */
-        midgard_alu_op_fcospi      = 0xF7, /* cos(pi * x) */
-        midgard_alu_op_fatan2_pt1  = 0xF9,
+   midgard_alu_op_frcp = 0xF0,
+   midgard_alu_op_frsqrt = 0xF2,
+   midgard_alu_op_fsqrt = 0xF3,
+   midgard_alu_op_fexp2 = 0xF4,
+   midgard_alu_op_flog2 = 0xF5,
+   midgard_alu_op_fsinpi = 0xF6, /* sin(pi * x) */
+   midgard_alu_op_fcospi = 0xF7, /* cos(pi * x) */
+   midgard_alu_op_fatan2_pt1 = 0xF9,
 } midgard_alu_op;
 
 typedef enum {
-        midgard_outmod_none        = 0,
-        midgard_outmod_clamp_0_inf = 1, /* max(x, 0.0), NaNs become +0.0 */
-        midgard_outmod_clamp_m1_1  = 2, /* clamp(x, -1.0, 1.0), NaNs become -1.0 */
-        midgard_outmod_clamp_0_1   = 3  /* clamp(x, 0.0, 1.0), NaNs become +0.0 */
+   midgard_outmod_none = 0,
+   midgard_outmod_clamp_0_inf = 1, /* max(x, 0.0), NaNs become +0.0 */
+   midgard_outmod_clamp_m1_1 = 2,  /* clamp(x, -1.0, 1.0), NaNs become -1.0 */
+   midgard_outmod_clamp_0_1 = 3    /* clamp(x, 0.0, 1.0), NaNs become +0.0 */
 } midgard_outmod_float;
 
-/* These are applied to the resulting value that's going to be stored in the dest reg.
- * This should be set to midgard_outmod_keeplo when shrink_mode is midgard_shrink_mode_none. */
+/* These are applied to the resulting value that's going to be stored in the
+ * dest reg. This should be set to midgard_outmod_keeplo when shrink_mode is
+ * midgard_shrink_mode_none. */
 typedef enum {
-        midgard_outmod_ssat   = 0,
-        midgard_outmod_usat   = 1,
-        midgard_outmod_keeplo = 2, /* Keep low half */
-        midgard_outmod_keephi = 3, /* Keep high half */
+   midgard_outmod_ssat = 0,
+   midgard_outmod_usat = 1,
+   midgard_outmod_keeplo = 2, /* Keep low half */
+   midgard_outmod_keephi = 3, /* Keep high half */
 } midgard_outmod_int;
 
 typedef enum {
-        midgard_reg_mode_8  = 0,
-        midgard_reg_mode_16 = 1,
-        midgard_reg_mode_32 = 2,
-        midgard_reg_mode_64 = 3
+   midgard_reg_mode_8 = 0,
+   midgard_reg_mode_16 = 1,
+   midgard_reg_mode_32 = 2,
+   midgard_reg_mode_64 = 3
 } midgard_reg_mode;
 
 typedef enum {
-        midgard_shrink_mode_lower = 0,
-        midgard_shrink_mode_upper = 1,
-        midgard_shrink_mode_none  = 2
+   midgard_shrink_mode_lower = 0,
+   midgard_shrink_mode_upper = 1,
+   midgard_shrink_mode_none = 2
 } midgard_shrink_mode;
 
 /* Only used if midgard_src_expand_mode is set to one of midgard_src_expand_*. */
 typedef enum {
-        midgard_int_sign_extend = 0,
-        midgard_int_zero_extend = 1,
-        midgard_int_replicate   = 2,
-        midgard_int_left_shift  = 3
+   midgard_int_sign_extend = 0,
+   midgard_int_zero_extend = 1,
+   midgard_int_replicate = 2,
+   midgard_int_left_shift = 3
 } midgard_int_mod;
 
-/* Unlike midgard_int_mod, fload modifiers are applied after the expansion happens, so
- * they don't depend on midgard_src_expand_mode. */
+/* Unlike midgard_int_mod, fload modifiers are applied after the expansion
+ * happens, so they don't depend on midgard_src_expand_mode. */
 #define MIDGARD_FLOAT_MOD_ABS (1 << 0)
 #define MIDGARD_FLOAT_MOD_NEG (1 << 1)
 
@@ -281,78 +283,63 @@ typedef enum {
  * extended, resulting in a vec4 where each 32-bit element corresponds to a
  * 16-bit element from the low 64-bits of the input vector. */
 typedef enum {
-        midgard_src_passthrough = 0,
-        midgard_src_rep_low = 1, /* replicate lower 64 bits to higher 64 bits */
-        midgard_src_rep_high = 2, /* replicate higher 64 bits to lower 64 bits */
-        midgard_src_swap = 3, /* swap lower 64 bits with higher 64 bits */
-        midgard_src_expand_low = 4, /* expand low 64 bits */
-        midgard_src_expand_high = 5, /* expand high 64 bits */
-        midgard_src_expand_low_swap = 6, /* expand low 64 bits, then swap */
-        midgard_src_expand_high_swap = 7, /* expand high 64 bits, then swap */
+   midgard_src_passthrough = 0,
+   midgard_src_rep_low = 1,     /* replicate lower 64 bits to higher 64 bits */
+   midgard_src_rep_high = 2,    /* replicate higher 64 bits to lower 64 bits */
+   midgard_src_swap = 3,        /* swap lower 64 bits with higher 64 bits */
+   midgard_src_expand_low = 4,  /* expand low 64 bits */
+   midgard_src_expand_high = 5, /* expand high 64 bits */
+   midgard_src_expand_low_swap = 6,  /* expand low 64 bits, then swap */
+   midgard_src_expand_high_swap = 7, /* expand high 64 bits, then swap */
 } midgard_src_expand_mode;
 
-#define INPUT_EXPANDS(a) \
-        (a >= midgard_src_expand_low && a <= midgard_src_expand_high_swap)
+#define INPUT_EXPANDS(a)                                                       \
+   (a >= midgard_src_expand_low && a <= midgard_src_expand_high_swap)
 
-#define INPUT_SWAPS(a) \
-        (a == midgard_src_swap || a >= midgard_src_expand_low_swap)
+#define INPUT_SWAPS(a)                                                         \
+   (a == midgard_src_swap || a >= midgard_src_expand_low_swap)
 
-typedef struct
-__attribute__((__packed__))
-{
-        /* Either midgard_int_mod or from midgard_float_mod_*, depending on the
-         * type of op */
-        unsigned mod : 2;
-        midgard_src_expand_mode expand_mode : 3;
-        unsigned swizzle : 8;
-}
-midgard_vector_alu_src;
+typedef struct __attribute__((__packed__)) {
+   /* Either midgard_int_mod or from midgard_float_mod_*, depending on the
+    * type of op */
+   unsigned mod                        : 2;
+   midgard_src_expand_mode expand_mode : 3;
+   unsigned swizzle                    : 8;
+} midgard_vector_alu_src;
 
-typedef struct
-__attribute__((__packed__))
-{
-        midgard_alu_op op               :  8;
-        midgard_reg_mode reg_mode       :  2;
-        unsigned src1                   : 13;
-        unsigned src2                   : 13;
-        midgard_shrink_mode shrink_mode :  2;
-        unsigned outmod                 :  2;
-        unsigned mask                   :  8;
-}
-midgard_vector_alu;
+typedef struct __attribute__((__packed__)) {
+   midgard_alu_op op               : 8;
+   midgard_reg_mode reg_mode       : 2;
+   unsigned src1                   : 13;
+   unsigned src2                   : 13;
+   midgard_shrink_mode shrink_mode : 2;
+   unsigned outmod                 : 2;
+   unsigned mask                   : 8;
+} midgard_vector_alu;
 
-typedef struct
-__attribute__((__packed__))
-{
-        unsigned mod       : 2;
-        bool full          : 1; /* 0 = 16-bit, 1 = 32-bit */
-        unsigned component : 3;
-}
-midgard_scalar_alu_src;
+typedef struct __attribute__((__packed__)) {
+   unsigned mod       : 2;
+   bool full          : 1; /* 0 = 16-bit, 1 = 32-bit */
+   unsigned component : 3;
+} midgard_scalar_alu_src;
 
-typedef struct
-__attribute__((__packed__))
-{
-        midgard_alu_op op         :  8;
-        unsigned src1             :  6;
-        /* last 5 bits are used when src2 is an immediate */
-        unsigned src2             : 11;
-        unsigned reserved         :  1;
-        unsigned outmod           :  2;
-        bool output_full          :  1;
-        unsigned output_component :  3;
-}
-midgard_scalar_alu;
+typedef struct __attribute__((__packed__)) {
+   midgard_alu_op op : 8;
+   unsigned src1     : 6;
+   /* last 5 bits are used when src2 is an immediate */
+   unsigned src2             : 11;
+   unsigned reserved         : 1;
+   unsigned outmod           : 2;
+   bool output_full          : 1;
+   unsigned output_component : 3;
+} midgard_scalar_alu;
 
-typedef struct
-__attribute__((__packed__))
-{
-        unsigned src1_reg : 5;
-        unsigned src2_reg : 5;
-        unsigned out_reg  : 5;
-        bool src2_imm     : 1;
-}
-midgard_reg_info;
+typedef struct __attribute__((__packed__)) {
+   unsigned src1_reg : 5;
+   unsigned src2_reg : 5;
+   unsigned out_reg  : 5;
+   bool src2_imm     : 1;
+} midgard_reg_info;
 
 /* In addition to conditional branches and jumps (unconditional branches),
  * Midgard implements a bit of fixed function functionality used in fragment
@@ -361,679 +348,647 @@ midgard_reg_info;
  * fixed-function operation as the branch condition.  */
 
 typedef enum {
-        /* Regular branches */
-        midgard_jmp_writeout_op_branch_uncond = 1,
-        midgard_jmp_writeout_op_branch_cond = 2,
+   /* Regular branches */
+   midgard_jmp_writeout_op_branch_uncond = 1,
+   midgard_jmp_writeout_op_branch_cond = 2,
 
-        /* In a fragment shader, execute a discard_if instruction, with the
-         * corresponding condition code. Terminates the shader, so generally
-         * set the branch target to out of the shader */
-        midgard_jmp_writeout_op_discard = 4,
+   /* In a fragment shader, execute a discard_if instruction, with the
+    * corresponding condition code. Terminates the shader, so generally
+    * set the branch target to out of the shader */
+   midgard_jmp_writeout_op_discard = 4,
 
-        /* Branch if the tilebuffer is not yet ready. At the beginning of a
-         * fragment shader that reads from the tile buffer, for instance via
-         * ARM_shader_framebuffer_fetch or EXT_pixel_local_storage, this branch
-         * operation should be used as a loop. An instruction like
-         * "br.tilebuffer.always -1" does the trick, corresponding to
-         * "while(!is_tilebuffer_ready) */
-        midgard_jmp_writeout_op_tilebuffer_pending = 6,
+   /* Branch if the tilebuffer is not yet ready. At the beginning of a
+    * fragment shader that reads from the tile buffer, for instance via
+    * ARM_shader_framebuffer_fetch or EXT_pixel_local_storage, this branch
+    * operation should be used as a loop. An instruction like
+    * "br.tilebuffer.always -1" does the trick, corresponding to
+    * "while(!is_tilebuffer_ready) */
+   midgard_jmp_writeout_op_tilebuffer_pending = 6,
 
-        /* In a fragment shader, try to write out the value pushed to r0 to the
-         * tilebuffer, subject to state in r1.z and r1.w. If this
-         * succeeds, the shader terminates. If it fails, it branches to the
-         * specified branch target. Generally, this should be used in a loop to
-         * itself, acting as "do { write(r0); } while(!write_successful);" */
-        midgard_jmp_writeout_op_writeout = 7,
+   /* In a fragment shader, try to write out the value pushed to r0 to the
+    * tilebuffer, subject to state in r1.z and r1.w. If this
+    * succeeds, the shader terminates. If it fails, it branches to the
+    * specified branch target. Generally, this should be used in a loop to
+    * itself, acting as "do { write(r0); } while(!write_successful);" */
+   midgard_jmp_writeout_op_writeout = 7,
 } midgard_jmp_writeout_op;
 
 typedef enum {
-        midgard_condition_write0 = 0,
+   midgard_condition_write0 = 0,
 
-        /* These condition codes denote a conditional branch on FALSE and on
-         * TRUE respectively */
-        midgard_condition_false = 1,
-        midgard_condition_true = 2,
+   /* These condition codes denote a conditional branch on FALSE and on
+    * TRUE respectively */
+   midgard_condition_false = 1,
+   midgard_condition_true = 2,
 
-        /* This condition code always branches. For a pure branch, the
-         * unconditional branch coding should be used instead, but for
-         * fixed-function branch opcodes, this is still useful */
-        midgard_condition_always = 3,
+   /* This condition code always branches. For a pure branch, the
+    * unconditional branch coding should be used instead, but for
+    * fixed-function branch opcodes, this is still useful */
+   midgard_condition_always = 3,
 } midgard_condition;
 
 enum midgard_call_mode {
-        midgard_call_mode_default = 1,
-        midgard_call_mode_call = 2,
-        midgard_call_mode_return = 3
+   midgard_call_mode_default = 1,
+   midgard_call_mode_call = 2,
+   midgard_call_mode_return = 3
 };
 
-typedef struct
-__attribute__((__packed__))
-{
-        midgard_jmp_writeout_op op : 3; /* == branch_uncond */
-        unsigned dest_tag : 4; /* tag of branch destination */
-        enum midgard_call_mode call_mode : 2;
-        int offset : 7;
-}
-midgard_branch_uncond;
+typedef struct __attribute__((__packed__)) {
+   midgard_jmp_writeout_op op       : 3; /* == branch_uncond */
+   unsigned dest_tag                : 4; /* tag of branch destination */
+   enum midgard_call_mode call_mode : 2;
+   int offset                       : 7;
+} midgard_branch_uncond;
 
-typedef struct
-__attribute__((__packed__))
-{
-        midgard_jmp_writeout_op op : 3; /* == branch_cond */
-        unsigned dest_tag : 4; /* tag of branch destination */
-        int offset : 7;
-        midgard_condition cond : 2;
-}
-midgard_branch_cond;
+typedef struct __attribute__((__packed__)) {
+   midgard_jmp_writeout_op op : 3; /* == branch_cond */
+   unsigned dest_tag          : 4; /* tag of branch destination */
+   int offset                 : 7;
+   midgard_condition cond     : 2;
+} midgard_branch_cond;
 
-typedef struct
-__attribute__((__packed__))
-{
-        midgard_jmp_writeout_op op : 3; /* == branch_cond */
-        unsigned dest_tag : 4; /* tag of branch destination */
-        enum midgard_call_mode call_mode : 2;
-        signed offset : 23;
+typedef struct __attribute__((__packed__)) {
+   midgard_jmp_writeout_op op       : 3; /* == branch_cond */
+   unsigned dest_tag                : 4; /* tag of branch destination */
+   enum midgard_call_mode call_mode : 2;
+   signed offset                    : 23;
 
-        /* Extended branches permit inputting up to 4 conditions loaded into
-         * r31 (two in r31.w and two in r31.x). In the most general case, we
-         * specify a function f(A, B, C, D) mapping 4 1-bit conditions to a
-         * single 1-bit branch criteria. Note that the domain of f has 2^(2^4)
-         * elements, each mapping to 1-bit of output, so we can trivially
-         * construct a Godel numbering of f as a (2^4)=16-bit integer. This
-         * 16-bit integer serves as a lookup table to compute f, subject to
-         * some swaps for ordering.
-         *
-         * Interesting, the standard 2-bit condition codes are also a LUT with
-         * the same format (2^1-bit), but it's usually easier to use enums. */
+   /* Extended branches permit inputting up to 4 conditions loaded into
+    * r31 (two in r31.w and two in r31.x). In the most general case, we
+    * specify a function f(A, B, C, D) mapping 4 1-bit conditions to a
+    * single 1-bit branch criteria. Note that the domain of f has 2^(2^4)
+    * elements, each mapping to 1-bit of output, so we can trivially
+    * construct a Godel numbering of f as a (2^4)=16-bit integer. This
+    * 16-bit integer serves as a lookup table to compute f, subject to
+    * some swaps for ordering.
+    *
+    * Interesting, the standard 2-bit condition codes are also a LUT with
+    * the same format (2^1-bit), but it's usually easier to use enums. */
 
-        unsigned cond : 16;
-}
-midgard_branch_extended;
+   unsigned cond : 16;
+} midgard_branch_extended;
 
-typedef struct
-__attribute__((__packed__))
-{
-        midgard_jmp_writeout_op op : 3; /* == writeout */
-        unsigned unknown : 13;
-}
-midgard_writeout;
+typedef struct __attribute__((__packed__)) {
+   midgard_jmp_writeout_op op : 3; /* == writeout */
+   unsigned unknown           : 13;
+} midgard_writeout;
 
 /*
  * Load/store words
  */
 
 typedef enum {
-        midgard_op_ld_st_noop   = 0x03,
+   midgard_op_ld_st_noop = 0x03,
 
-        /* Unpacks a colour from a native format to <format> */
-        midgard_op_unpack_colour_f32 = 0x04,
-        midgard_op_unpack_colour_f16 = 0x05,
-        midgard_op_unpack_colour_u32 = 0x06,
-        midgard_op_unpack_colour_s32 = 0x07,
+   /* Unpacks a colour from a native format to <format> */
+   midgard_op_unpack_colour_f32 = 0x04,
+   midgard_op_unpack_colour_f16 = 0x05,
+   midgard_op_unpack_colour_u32 = 0x06,
+   midgard_op_unpack_colour_s32 = 0x07,
 
-        /* Packs a colour from <format> to a native format */
-        midgard_op_pack_colour_f32 = 0x08,
-        midgard_op_pack_colour_f16 = 0x09,
-        midgard_op_pack_colour_u32 = 0x0A,
-        midgard_op_pack_colour_s32 = 0x0B,
+   /* Packs a colour from <format> to a native format */
+   midgard_op_pack_colour_f32 = 0x08,
+   midgard_op_pack_colour_f16 = 0x09,
+   midgard_op_pack_colour_u32 = 0x0A,
+   midgard_op_pack_colour_s32 = 0x0B,
 
-        /* Computes the effective address of a mem address expression */
-        midgard_op_lea = 0x0C,
+   /* Computes the effective address of a mem address expression */
+   midgard_op_lea = 0x0C,
 
-        /* Converts image coordinates into mem address */
-        midgard_op_lea_image = 0x0D,
+   /* Converts image coordinates into mem address */
+   midgard_op_lea_image = 0x0D,
 
-        /* Unclear why this is on the L/S unit, but moves fp32 cube map
-         * coordinates in r27 to its cube map texture coordinate destination
-         * (e.g r29). */
+   /* Unclear why this is on the L/S unit, but moves fp32 cube map
+    * coordinates in r27 to its cube map texture coordinate destination
+    * (e.g r29). */
 
-        midgard_op_ld_cubemap_coords = 0x0E,
+   midgard_op_ld_cubemap_coords = 0x0E,
 
-        /* A mov between registers that the ldst pipeline can access */
-        midgard_op_ldst_mov = 0x10,
+   /* A mov between registers that the ldst pipeline can access */
+   midgard_op_ldst_mov = 0x10,
 
-        /* The L/S unit can do perspective division a clock faster than the ALU
-         * if you're lucky. Put the vec4 in r27, and call with 0x24 as the
-         * unknown state; the output will be <x/w, y/w, z/w, 1>. Replace w with
-         * z for the z version */
-        midgard_op_ldst_perspective_div_y = 0x11,
-        midgard_op_ldst_perspective_div_z = 0x12,
-        midgard_op_ldst_perspective_div_w = 0x13,
+   /* The L/S unit can do perspective division a clock faster than the ALU
+    * if you're lucky. Put the vec4 in r27, and call with 0x24 as the
+    * unknown state; the output will be <x/w, y/w, z/w, 1>. Replace w with
+    * z for the z version */
+   midgard_op_ldst_perspective_div_y = 0x11,
+   midgard_op_ldst_perspective_div_z = 0x12,
+   midgard_op_ldst_perspective_div_w = 0x13,
 
-        /* val in r27.y, address embedded, outputs result to argument. Invert val for sub. Let val = +-1 for inc/dec. */
-        midgard_op_atomic_add = 0x40,
-        midgard_op_atomic_add64 = 0x41,
-        midgard_op_atomic_add_be = 0x42,
-        midgard_op_atomic_add64_be = 0x43,
+   /* val in r27.y, address embedded, outputs result to argument. Invert val for
+      sub. Let val = +-1 for inc/dec. */
+   midgard_op_atomic_add = 0x40,
+   midgard_op_atomic_add64 = 0x41,
+   midgard_op_atomic_add_be = 0x42,
+   midgard_op_atomic_add64_be = 0x43,
 
-        midgard_op_atomic_and = 0x44,
-        midgard_op_atomic_and64 = 0x45,
-        midgard_op_atomic_and_be = 0x46,
-        midgard_op_atomic_and64_be = 0x47,
-        midgard_op_atomic_or = 0x48,
-        midgard_op_atomic_or64 = 0x49,
-        midgard_op_atomic_or_be = 0x4A,
-        midgard_op_atomic_or64_be = 0x4B,
-        midgard_op_atomic_xor = 0x4C,
-        midgard_op_atomic_xor64 = 0x4D,
-        midgard_op_atomic_xor_be = 0x4E,
-        midgard_op_atomic_xor64_be = 0x4F,
+   midgard_op_atomic_and = 0x44,
+   midgard_op_atomic_and64 = 0x45,
+   midgard_op_atomic_and_be = 0x46,
+   midgard_op_atomic_and64_be = 0x47,
+   midgard_op_atomic_or = 0x48,
+   midgard_op_atomic_or64 = 0x49,
+   midgard_op_atomic_or_be = 0x4A,
+   midgard_op_atomic_or64_be = 0x4B,
+   midgard_op_atomic_xor = 0x4C,
+   midgard_op_atomic_xor64 = 0x4D,
+   midgard_op_atomic_xor_be = 0x4E,
+   midgard_op_atomic_xor64_be = 0x4F,
 
-        midgard_op_atomic_imin = 0x50,
-        midgard_op_atomic_imin64 = 0x51,
-        midgard_op_atomic_imin_be = 0x52,
-        midgard_op_atomic_imin64_be = 0x53,
-        midgard_op_atomic_umin = 0x54,
-        midgard_op_atomic_umin64 = 0x55,
-        midgard_op_atomic_umin_be = 0x56,
-        midgard_op_atomic_umin64_be = 0x57,
-        midgard_op_atomic_imax = 0x58,
-        midgard_op_atomic_imax64 = 0x59,
-        midgard_op_atomic_imax_be = 0x5A,
-        midgard_op_atomic_imax64_be = 0x5B,
-        midgard_op_atomic_umax = 0x5C,
-        midgard_op_atomic_umax64 = 0x5D,
-        midgard_op_atomic_umax_be = 0x5E,
-        midgard_op_atomic_umax64_be = 0x5F,
+   midgard_op_atomic_imin = 0x50,
+   midgard_op_atomic_imin64 = 0x51,
+   midgard_op_atomic_imin_be = 0x52,
+   midgard_op_atomic_imin64_be = 0x53,
+   midgard_op_atomic_umin = 0x54,
+   midgard_op_atomic_umin64 = 0x55,
+   midgard_op_atomic_umin_be = 0x56,
+   midgard_op_atomic_umin64_be = 0x57,
+   midgard_op_atomic_imax = 0x58,
+   midgard_op_atomic_imax64 = 0x59,
+   midgard_op_atomic_imax_be = 0x5A,
+   midgard_op_atomic_imax64_be = 0x5B,
+   midgard_op_atomic_umax = 0x5C,
+   midgard_op_atomic_umax64 = 0x5D,
+   midgard_op_atomic_umax_be = 0x5E,
+   midgard_op_atomic_umax64_be = 0x5F,
 
-        midgard_op_atomic_xchg = 0x60,
-        midgard_op_atomic_xchg64 = 0x61,
-        midgard_op_atomic_xchg_be = 0x62,
-        midgard_op_atomic_xchg64_be = 0x63,
+   midgard_op_atomic_xchg = 0x60,
+   midgard_op_atomic_xchg64 = 0x61,
+   midgard_op_atomic_xchg_be = 0x62,
+   midgard_op_atomic_xchg64_be = 0x63,
 
-        midgard_op_atomic_cmpxchg = 0x64,
-        midgard_op_atomic_cmpxchg64 = 0x65,
-        midgard_op_atomic_cmpxchg_be = 0x66,
-        midgard_op_atomic_cmpxchg64_be = 0x67,
+   midgard_op_atomic_cmpxchg = 0x64,
+   midgard_op_atomic_cmpxchg64 = 0x65,
+   midgard_op_atomic_cmpxchg_be = 0x66,
+   midgard_op_atomic_cmpxchg64_be = 0x67,
 
-        /* Used for compute shader's __global arguments, __local
-         * variables (or for register spilling) */
+   /* Used for compute shader's __global arguments, __local
+    * variables (or for register spilling) */
 
-        midgard_op_ld_u8         = 0x80, /* zero extends */
-        midgard_op_ld_i8         = 0x81, /* sign extends */
-        midgard_op_ld_u16        = 0x84, /* zero extends */
-        midgard_op_ld_i16        = 0x85, /* sign extends */
-        midgard_op_ld_u16_be     = 0x86, /* zero extends, big endian */
-        midgard_op_ld_i16_be     = 0x87, /* sign extends, big endian */
-        midgard_op_ld_32         = 0x88, /* short2, int, float */
-        midgard_op_ld_32_bswap2  = 0x89, /* 16-bit big endian vector */
-        midgard_op_ld_32_bswap4  = 0x8A, /* 32-bit big endian scalar */
-        midgard_op_ld_64         = 0x8C, /* int2, float2, long */
-        midgard_op_ld_64_bswap2  = 0x8D, /* 16-bit big endian vector */
-        midgard_op_ld_64_bswap4  = 0x8E, /* 32-bit big endian vector */
-        midgard_op_ld_64_bswap8  = 0x8F, /* 64-bit big endian scalar */
-        midgard_op_ld_128        = 0x90, /* float4, long2 */
-        midgard_op_ld_128_bswap2 = 0x91, /* 16-bit big endian vector */
-        midgard_op_ld_128_bswap4 = 0x92, /* 32-bit big endian vector */
-        midgard_op_ld_128_bswap8 = 0x93, /* 64-bit big endian vector */
+   midgard_op_ld_u8 = 0x80,         /* zero extends */
+   midgard_op_ld_i8 = 0x81,         /* sign extends */
+   midgard_op_ld_u16 = 0x84,        /* zero extends */
+   midgard_op_ld_i16 = 0x85,        /* sign extends */
+   midgard_op_ld_u16_be = 0x86,     /* zero extends, big endian */
+   midgard_op_ld_i16_be = 0x87,     /* sign extends, big endian */
+   midgard_op_ld_32 = 0x88,         /* short2, int, float */
+   midgard_op_ld_32_bswap2 = 0x89,  /* 16-bit big endian vector */
+   midgard_op_ld_32_bswap4 = 0x8A,  /* 32-bit big endian scalar */
+   midgard_op_ld_64 = 0x8C,         /* int2, float2, long */
+   midgard_op_ld_64_bswap2 = 0x8D,  /* 16-bit big endian vector */
+   midgard_op_ld_64_bswap4 = 0x8E,  /* 32-bit big endian vector */
+   midgard_op_ld_64_bswap8 = 0x8F,  /* 64-bit big endian scalar */
+   midgard_op_ld_128 = 0x90,        /* float4, long2 */
+   midgard_op_ld_128_bswap2 = 0x91, /* 16-bit big endian vector */
+   midgard_op_ld_128_bswap4 = 0x92, /* 32-bit big endian vector */
+   midgard_op_ld_128_bswap8 = 0x93, /* 64-bit big endian vector */
 
-        midgard_op_ld_attr_32 = 0x94,
-        midgard_op_ld_attr_16 = 0x95,
-        midgard_op_ld_attr_32u = 0x96,
-        midgard_op_ld_attr_32i = 0x97,
-        midgard_op_ld_vary_32 = 0x98,
-        midgard_op_ld_vary_16 = 0x99,
-        midgard_op_ld_vary_32u = 0x9A,
-        midgard_op_ld_vary_32i = 0x9B,
+   midgard_op_ld_attr_32 = 0x94,
+   midgard_op_ld_attr_16 = 0x95,
+   midgard_op_ld_attr_32u = 0x96,
+   midgard_op_ld_attr_32i = 0x97,
+   midgard_op_ld_vary_32 = 0x98,
+   midgard_op_ld_vary_16 = 0x99,
+   midgard_op_ld_vary_32u = 0x9A,
+   midgard_op_ld_vary_32i = 0x9B,
 
-        /* This instruction behaves differently depending if the gpu is a v4
-         * or a newer gpu. The main difference hinges on which values of the
-         * second argument are valid for each gpu.
-         * TODO: properly document and decode each possible value for the
-         * second argument. */
-        midgard_op_ld_special_32f = 0x9C,
-        midgard_op_ld_special_16f = 0x9D,
-        midgard_op_ld_special_32u = 0x9E,
-        midgard_op_ld_special_32i = 0x9F,
+   /* This instruction behaves differently depending if the gpu is a v4
+    * or a newer gpu. The main difference hinges on which values of the
+    * second argument are valid for each gpu.
+    * TODO: properly document and decode each possible value for the
+    * second argument. */
+   midgard_op_ld_special_32f = 0x9C,
+   midgard_op_ld_special_16f = 0x9D,
+   midgard_op_ld_special_32u = 0x9E,
+   midgard_op_ld_special_32i = 0x9F,
 
-        /* The distinction between these ops is the alignment
-         * requirement / accompanying shift. Thus, the offset to
-         * ld_ubo_128 is in 16-byte units and can load 128-bit. The
-         * offset to ld_ubo_64 is in 8-byte units; ld_ubo_32 in 4-byte
-         * units. */
-        midgard_op_ld_ubo_u8         = 0xA0, /* theoretical */
-        midgard_op_ld_ubo_i8         = 0xA1, /* theoretical */
-        midgard_op_ld_ubo_u16        = 0xA4, /* theoretical */
-        midgard_op_ld_ubo_i16        = 0xA5, /* theoretical */
-        midgard_op_ld_ubo_u16_be     = 0xA6, /* theoretical */
-        midgard_op_ld_ubo_i16_be     = 0xA7, /* theoretical */
-        midgard_op_ld_ubo_32         = 0xA8,
-        midgard_op_ld_ubo_32_bswap2  = 0xA9,
-        midgard_op_ld_ubo_32_bswap4  = 0xAA,
-        midgard_op_ld_ubo_64         = 0xAC,
-        midgard_op_ld_ubo_64_bswap2  = 0xAD,
-        midgard_op_ld_ubo_64_bswap4  = 0xAE,
-        midgard_op_ld_ubo_64_bswap8  = 0xAF,
-        midgard_op_ld_ubo_128        = 0xB0,
-        midgard_op_ld_ubo_128_bswap2 = 0xB1,
-        midgard_op_ld_ubo_128_bswap4 = 0xB2,
-        midgard_op_ld_ubo_128_bswap8 = 0xB3,
+   /* The distinction between these ops is the alignment
+    * requirement / accompanying shift. Thus, the offset to
+    * ld_ubo_128 is in 16-byte units and can load 128-bit. The
+    * offset to ld_ubo_64 is in 8-byte units; ld_ubo_32 in 4-byte
+    * units. */
+   midgard_op_ld_ubo_u8 = 0xA0,     /* theoretical */
+   midgard_op_ld_ubo_i8 = 0xA1,     /* theoretical */
+   midgard_op_ld_ubo_u16 = 0xA4,    /* theoretical */
+   midgard_op_ld_ubo_i16 = 0xA5,    /* theoretical */
+   midgard_op_ld_ubo_u16_be = 0xA6, /* theoretical */
+   midgard_op_ld_ubo_i16_be = 0xA7, /* theoretical */
+   midgard_op_ld_ubo_32 = 0xA8,
+   midgard_op_ld_ubo_32_bswap2 = 0xA9,
+   midgard_op_ld_ubo_32_bswap4 = 0xAA,
+   midgard_op_ld_ubo_64 = 0xAC,
+   midgard_op_ld_ubo_64_bswap2 = 0xAD,
+   midgard_op_ld_ubo_64_bswap4 = 0xAE,
+   midgard_op_ld_ubo_64_bswap8 = 0xAF,
+   midgard_op_ld_ubo_128 = 0xB0,
+   midgard_op_ld_ubo_128_bswap2 = 0xB1,
+   midgard_op_ld_ubo_128_bswap4 = 0xB2,
+   midgard_op_ld_ubo_128_bswap8 = 0xB3,
 
-        midgard_op_ld_image_32f = 0xB4,
-        midgard_op_ld_image_16f = 0xB5,
-        midgard_op_ld_image_32u = 0xB6,
-        midgard_op_ld_image_32i = 0xB7,
+   midgard_op_ld_image_32f = 0xB4,
+   midgard_op_ld_image_16f = 0xB5,
+   midgard_op_ld_image_32u = 0xB6,
+   midgard_op_ld_image_32i = 0xB7,
 
-        /* Only works on v5 or newer.
-         * Older cards must use ld_special with tilebuffer selectors. */
-        midgard_op_ld_tilebuffer_32f = 0xB8,
-        midgard_op_ld_tilebuffer_16f = 0xB9,
-        midgard_op_ld_tilebuffer_raw = 0xBA,
+   /* Only works on v5 or newer.
+    * Older cards must use ld_special with tilebuffer selectors. */
+   midgard_op_ld_tilebuffer_32f = 0xB8,
+   midgard_op_ld_tilebuffer_16f = 0xB9,
+   midgard_op_ld_tilebuffer_raw = 0xBA,
 
-        midgard_op_st_u8         = 0xC0, /* zero extends */
-        midgard_op_st_i8         = 0xC1, /* sign extends */
-        midgard_op_st_u16        = 0xC4, /* zero extends */
-        midgard_op_st_i16        = 0xC5, /* sign extends */
-        midgard_op_st_u16_be     = 0xC6, /* zero extends, big endian */
-        midgard_op_st_i16_be     = 0xC7, /* sign extends, big endian */
-        midgard_op_st_32         = 0xC8, /* short2, int, float */
-        midgard_op_st_32_bswap2  = 0xC9, /* 16-bit big endian vector */
-        midgard_op_st_32_bswap4  = 0xCA, /* 32-bit big endian scalar */
-        midgard_op_st_64         = 0xCC, /* int2, float2, long */
-        midgard_op_st_64_bswap2  = 0xCD, /* 16-bit big endian vector */
-        midgard_op_st_64_bswap4  = 0xCE, /* 32-bit big endian vector */
-        midgard_op_st_64_bswap8  = 0xCF, /* 64-bit big endian scalar */
-        midgard_op_st_128        = 0xD0, /* float4, long2 */
-        midgard_op_st_128_bswap2 = 0xD1, /* 16-bit big endian vector */
-        midgard_op_st_128_bswap4 = 0xD2, /* 32-bit big endian vector */
-        midgard_op_st_128_bswap8 = 0xD3, /* 64-bit big endian vector */
+   midgard_op_st_u8 = 0xC0,         /* zero extends */
+   midgard_op_st_i8 = 0xC1,         /* sign extends */
+   midgard_op_st_u16 = 0xC4,        /* zero extends */
+   midgard_op_st_i16 = 0xC5,        /* sign extends */
+   midgard_op_st_u16_be = 0xC6,     /* zero extends, big endian */
+   midgard_op_st_i16_be = 0xC7,     /* sign extends, big endian */
+   midgard_op_st_32 = 0xC8,         /* short2, int, float */
+   midgard_op_st_32_bswap2 = 0xC9,  /* 16-bit big endian vector */
+   midgard_op_st_32_bswap4 = 0xCA,  /* 32-bit big endian scalar */
+   midgard_op_st_64 = 0xCC,         /* int2, float2, long */
+   midgard_op_st_64_bswap2 = 0xCD,  /* 16-bit big endian vector */
+   midgard_op_st_64_bswap4 = 0xCE,  /* 32-bit big endian vector */
+   midgard_op_st_64_bswap8 = 0xCF,  /* 64-bit big endian scalar */
+   midgard_op_st_128 = 0xD0,        /* float4, long2 */
+   midgard_op_st_128_bswap2 = 0xD1, /* 16-bit big endian vector */
+   midgard_op_st_128_bswap4 = 0xD2, /* 32-bit big endian vector */
+   midgard_op_st_128_bswap8 = 0xD3, /* 64-bit big endian vector */
 
-        midgard_op_st_vary_32 = 0xD4,
-        midgard_op_st_vary_16 = 0xD5,
-        midgard_op_st_vary_32u = 0xD6,
-        midgard_op_st_vary_32i = 0xD7,
+   midgard_op_st_vary_32 = 0xD4,
+   midgard_op_st_vary_16 = 0xD5,
+   midgard_op_st_vary_32u = 0xD6,
+   midgard_op_st_vary_32i = 0xD7,
 
-        /* Value to st in r27, location r26.w as short2 */
-        midgard_op_st_image_32f = 0xD8,
-        midgard_op_st_image_16f = 0xD9,
-        midgard_op_st_image_32u = 0xDA,
-        midgard_op_st_image_32i = 0xDB,
+   /* Value to st in r27, location r26.w as short2 */
+   midgard_op_st_image_32f = 0xD8,
+   midgard_op_st_image_16f = 0xD9,
+   midgard_op_st_image_32u = 0xDA,
+   midgard_op_st_image_32i = 0xDB,
 
-        midgard_op_st_special_32f = 0xDC,
-        midgard_op_st_special_16f = 0xDD,
-        midgard_op_st_special_32u = 0xDE,
-        midgard_op_st_special_32i = 0xDF,
+   midgard_op_st_special_32f = 0xDC,
+   midgard_op_st_special_16f = 0xDD,
+   midgard_op_st_special_32u = 0xDE,
+   midgard_op_st_special_32i = 0xDF,
 
-        /* Only works on v5 or newer.
-         * Older cards must use ld_special with tilebuffer selectors. */
-        midgard_op_st_tilebuffer_32f = 0xE8,
-        midgard_op_st_tilebuffer_16f = 0xE9,
-        midgard_op_st_tilebuffer_raw = 0xEA,
-        midgard_op_trap = 0xFC,
+   /* Only works on v5 or newer.
+    * Older cards must use ld_special with tilebuffer selectors. */
+   midgard_op_st_tilebuffer_32f = 0xE8,
+   midgard_op_st_tilebuffer_16f = 0xE9,
+   midgard_op_st_tilebuffer_raw = 0xEA,
+   midgard_op_trap = 0xFC,
 } midgard_load_store_op;
 
 typedef enum {
-        midgard_interp_sample = 0,
-        midgard_interp_centroid = 1,
-        midgard_interp_default = 2
+   midgard_interp_sample = 0,
+   midgard_interp_centroid = 1,
+   midgard_interp_default = 2
 } midgard_interpolation;
 
 typedef enum {
-        midgard_varying_mod_none = 0,
+   midgard_varying_mod_none = 0,
 
-        /* Take the would-be result and divide all components by its y/z/w
-         * (perspective division baked in with the load)  */
-        midgard_varying_mod_perspective_y = 1,
-        midgard_varying_mod_perspective_z = 2,
-        midgard_varying_mod_perspective_w = 3,
+   /* Take the would-be result and divide all components by its y/z/w
+    * (perspective division baked in with the load)  */
+   midgard_varying_mod_perspective_y = 1,
+   midgard_varying_mod_perspective_z = 2,
+   midgard_varying_mod_perspective_w = 3,
 
-        /* The result is a 64-bit cubemap descriptor to use with
-         * midgard_tex_op_normal or midgard_tex_op_gradient */
-        midgard_varying_mod_cubemap = 4,
+   /* The result is a 64-bit cubemap descriptor to use with
+    * midgard_tex_op_normal or midgard_tex_op_gradient */
+   midgard_varying_mod_cubemap = 4,
 } midgard_varying_modifier;
 
-typedef struct
-__attribute__((__packed__))
-{
-        midgard_varying_modifier modifier : 3;
+typedef struct __attribute__((__packed__)) {
+   midgard_varying_modifier modifier : 3;
 
-        bool flat_shading : 1;
+   bool flat_shading : 1;
 
-        /* These are ignored if flat_shading is enabled. */
-        bool perspective_correction : 1;
-        bool centroid_mapping : 1;
+   /* These are ignored if flat_shading is enabled. */
+   bool perspective_correction : 1;
+   bool centroid_mapping       : 1;
 
-        /* This is ignored if the shader only runs once per pixel. */
-        bool interpolate_sample : 1;
+   /* This is ignored if the shader only runs once per pixel. */
+   bool interpolate_sample : 1;
 
-        bool zero0 : 1; /* Always zero */
+   bool zero0 : 1; /* Always zero */
 
-        unsigned direct_sample_pos_x : 4;
-        unsigned direct_sample_pos_y : 4;
-}
-midgard_varying_params;
+   unsigned direct_sample_pos_x : 4;
+   unsigned direct_sample_pos_y : 4;
+} midgard_varying_params;
 
 /* 8-bit register/etc selector for load/store ops */
-typedef struct
-__attribute__((__packed__))
-{
-        /* Indexes into the register */
-        unsigned component : 2;
+typedef struct __attribute__((__packed__)) {
+   /* Indexes into the register */
+   unsigned component : 2;
 
-        /* Register select between r26/r27 */
-        unsigned select : 1;
+   /* Register select between r26/r27 */
+   unsigned select : 1;
 
-        unsigned unknown : 2;
+   unsigned unknown : 2;
 
-        /* Like any good Arm instruction set, load/store arguments can be
-         * implicitly left-shifted... but only the second argument. Zero for no
-         * shifting, up to <<7 possible though. This is useful for indexing.
-         *
-         * For the first argument, it's unknown what these bits mean */
-        unsigned shift : 3;
-}
-midgard_ldst_register_select;
+   /* Like any good Arm instruction set, load/store arguments can be
+    * implicitly left-shifted... but only the second argument. Zero for no
+    * shifting, up to <<7 possible though. This is useful for indexing.
+    *
+    * For the first argument, it's unknown what these bits mean */
+   unsigned shift : 3;
+} midgard_ldst_register_select;
 
 typedef enum {
-        /* 0 is reserved */
-        midgard_index_address_u64 = 1,
-        midgard_index_address_u32 = 2,
-        midgard_index_address_s32 = 3,
+   /* 0 is reserved */
+   midgard_index_address_u64 = 1,
+   midgard_index_address_u32 = 2,
+   midgard_index_address_s32 = 3,
 } midgard_index_address_format;
 
-typedef struct
-__attribute__((__packed__))
-{
-        midgard_load_store_op op : 8;
+typedef struct __attribute__((__packed__)) {
+   midgard_load_store_op op : 8;
 
-        /* Source/dest reg */
-        unsigned reg  : 5;
+   /* Source/dest reg */
+   unsigned reg : 5;
 
-        /* Generally is a writemask.
-         * For ST_ATTR and ST_TEX, unused.
-         * For other stores, each bit masks 1/4th of the output. */
-        unsigned mask : 4;
+   /* Generally is a writemask.
+    * For ST_ATTR and ST_TEX, unused.
+    * For other stores, each bit masks 1/4th of the output. */
+   unsigned mask : 4;
 
-        /* Swizzle for stores, but for atomics it encodes also the source
-         * register. This fits because atomics dont need a swizzle since they
-         * are not vectorized instructions. */
-        unsigned swizzle : 8;
+   /* Swizzle for stores, but for atomics it encodes also the source
+    * register. This fits because atomics dont need a swizzle since they
+    * are not vectorized instructions. */
+   unsigned swizzle : 8;
 
-        /* Arg reg, meaning changes according to each opcode */
-        unsigned arg_comp : 2;
-        unsigned arg_reg  : 3;
+   /* Arg reg, meaning changes according to each opcode */
+   unsigned arg_comp : 2;
+   unsigned arg_reg  : 3;
 
-        /* 64-bit address enable
-         * 32-bit data type enable for CUBEMAP and perspective div.
-         * Explicit indexing enable for LD_ATTR.
-         * 64-bit coordinate enable for LD_IMAGE. */
-        bool bitsize_toggle : 1;
+   /* 64-bit address enable
+    * 32-bit data type enable for CUBEMAP and perspective div.
+    * Explicit indexing enable for LD_ATTR.
+    * 64-bit coordinate enable for LD_IMAGE. */
+   bool bitsize_toggle : 1;
 
-        /* These are mainly used for opcodes that have addresses.
-         * For cmpxchg, index_reg is used for the comparison value.
-         * For ops that access the attrib table, bit 1 encodes which table.
-         * For LD_VAR and LD/ST_ATTR, bit 0 enables dest/src type inferral. */
-        midgard_index_address_format index_format : 2;
-        unsigned index_comp  : 2;
-        unsigned index_reg   : 3;
-        unsigned index_shift : 4;
+   /* These are mainly used for opcodes that have addresses.
+    * For cmpxchg, index_reg is used for the comparison value.
+    * For ops that access the attrib table, bit 1 encodes which table.
+    * For LD_VAR and LD/ST_ATTR, bit 0 enables dest/src type inferral. */
+   midgard_index_address_format index_format : 2;
+   unsigned index_comp                       : 2;
+   unsigned index_reg                        : 3;
+   unsigned index_shift                      : 4;
 
-        /* Generaly is a signed offset, but has different bitsize and starts at
-         * different bits depending on the opcode, LDST_*_DISPLACEMENT helpers
-         * are recommended when packing/unpacking this attribute.
-         * For LD_UBO, bit 0 enables ubo index immediate.
-         * For LD_TILEBUFFER_RAW, bit 0 disables sample index immediate. */
-        int signed_offset : 18;
-}
-midgard_load_store_word;
+   /* Generaly is a signed offset, but has different bitsize and starts at
+    * different bits depending on the opcode, LDST_*_DISPLACEMENT helpers
+    * are recommended when packing/unpacking this attribute.
+    * For LD_UBO, bit 0 enables ubo index immediate.
+    * For LD_TILEBUFFER_RAW, bit 0 disables sample index immediate. */
+   int signed_offset : 18;
+} midgard_load_store_word;
 
-typedef struct
-__attribute__((__packed__))
-{
-        unsigned type      : 4;
-        unsigned next_type : 4;
-        uint64_t word1     : 60;
-        uint64_t word2     : 60;
-}
-midgard_load_store;
+typedef struct __attribute__((__packed__)) {
+   unsigned type      : 4;
+   unsigned next_type : 4;
+   uint64_t word1     : 60;
+   uint64_t word2     : 60;
+} midgard_load_store;
 
 /* 8-bit register selector used in texture ops to select a bias/LOD/gradient
  * register, shoved into the `bias` field */
 
-typedef struct
-__attribute__((__packed__))
-{
-        /* 32-bit register, clear for half-register */
-        unsigned full : 1;
+typedef struct __attribute__((__packed__)) {
+   /* 32-bit register, clear for half-register */
+   unsigned full : 1;
 
-        /* Register select between r28/r29 */
-        unsigned select : 1;
+   /* Register select between r28/r29 */
+   unsigned select : 1;
 
-        /* For a half-register, selects the upper half */
-        unsigned upper : 1;
+   /* For a half-register, selects the upper half */
+   unsigned upper : 1;
 
-        /* Indexes into the register */
-        unsigned component : 2;
+   /* Indexes into the register */
+   unsigned component : 2;
 
-        /* Padding to make this 8-bit */
-        unsigned zero : 3;
-}
-midgard_tex_register_select;
+   /* Padding to make this 8-bit */
+   unsigned zero : 3;
+} midgard_tex_register_select;
 
 /* Texture pipeline results are in r28-r29 */
 #define REG_TEX_BASE 28
 
 enum mali_texture_op {
-        /* [texture + LOD bias]
-         * If the texture is mipmapped, barriers must be enabled in the
-         * instruction word in order for this opcode to compute the output
-         * correctly. */
-        midgard_tex_op_normal = 1,
+   /* [texture + LOD bias]
+    * If the texture is mipmapped, barriers must be enabled in the
+    * instruction word in order for this opcode to compute the output
+    * correctly. */
+   midgard_tex_op_normal = 1,
 
-        /* [texture + gradient for LOD and anisotropy]
-         * Unlike midgard_tex_op_normal, this opcode does not require barriers
-         * to compute the output correctly. */
-        midgard_tex_op_gradient = 2,
+   /* [texture + gradient for LOD and anisotropy]
+    * Unlike midgard_tex_op_normal, this opcode does not require barriers
+    * to compute the output correctly. */
+   midgard_tex_op_gradient = 2,
 
-        /* [unfiltered texturing]
-         * Unlike midgard_tex_op_normal, this opcode does not require barriers
-         * to compute the output correctly. */
-        midgard_tex_op_fetch = 4,
+   /* [unfiltered texturing]
+    * Unlike midgard_tex_op_normal, this opcode does not require barriers
+    * to compute the output correctly. */
+   midgard_tex_op_fetch = 4,
 
-        /* [gradient from derivative] */
-        midgard_tex_op_grad_from_derivative = 9,
+   /* [gradient from derivative] */
+   midgard_tex_op_grad_from_derivative = 9,
 
-        /* [mov] */
-        midgard_tex_op_mov = 10,
+   /* [mov] */
+   midgard_tex_op_mov = 10,
 
-        /* [noop]
-         * Mostly used for barriers. */
-        midgard_tex_op_barrier = 11,
+   /* [noop]
+    * Mostly used for barriers. */
+   midgard_tex_op_barrier = 11,
 
-        /* [gradient from coords] */
-        midgard_tex_op_grad_from_coords = 12,
+   /* [gradient from coords] */
+   midgard_tex_op_grad_from_coords = 12,
 
-        /* [derivative]
-         * Computes derivatives in 2x2 fragment blocks. */
-        midgard_tex_op_derivative = 13
+   /* [derivative]
+    * Computes derivatives in 2x2 fragment blocks. */
+   midgard_tex_op_derivative = 13
 };
 
 enum mali_sampler_type {
-        /* 0 is reserved */
-        MALI_SAMPLER_FLOAT      = 0x1, /* sampler */
-        MALI_SAMPLER_UNSIGNED   = 0x2, /* usampler */
-        MALI_SAMPLER_SIGNED     = 0x3, /* isampler */
+   /* 0 is reserved */
+   MALI_SAMPLER_FLOAT = 0x1,    /* sampler */
+   MALI_SAMPLER_UNSIGNED = 0x2, /* usampler */
+   MALI_SAMPLER_SIGNED = 0x3,   /* isampler */
 };
 
 /* Texture modes */
 enum mali_texture_mode {
-        TEXTURE_NORMAL = 1,
-        TEXTURE_SHADOW = 5,
-        TEXTURE_GATHER_SHADOW = 6,
-        TEXTURE_GATHER_X = 8,
-        TEXTURE_GATHER_Y = 9,
-        TEXTURE_GATHER_Z = 10,
-        TEXTURE_GATHER_W = 11,
+   TEXTURE_NORMAL = 1,
+   TEXTURE_SHADOW = 5,
+   TEXTURE_GATHER_SHADOW = 6,
+   TEXTURE_GATHER_X = 8,
+   TEXTURE_GATHER_Y = 9,
+   TEXTURE_GATHER_Z = 10,
+   TEXTURE_GATHER_W = 11,
 };
 
 enum mali_derivative_mode {
-        TEXTURE_DFDX = 0,
-        TEXTURE_DFDY = 1,
+   TEXTURE_DFDX = 0,
+   TEXTURE_DFDY = 1,
 };
 
 enum midgard_partial_execution {
-        MIDGARD_PARTIAL_EXECUTION_SKIP = 1,
-        MIDGARD_PARTIAL_EXECUTION_KILL = 2,
-        MIDGARD_PARTIAL_EXECUTION_NONE = 3
+   MIDGARD_PARTIAL_EXECUTION_SKIP = 1,
+   MIDGARD_PARTIAL_EXECUTION_KILL = 2,
+   MIDGARD_PARTIAL_EXECUTION_NONE = 3
 };
 
-typedef struct
-__attribute__((__packed__))
-{
-        unsigned type      : 4;
-        unsigned next_type : 4;
+typedef struct __attribute__((__packed__)) {
+   unsigned type      : 4;
+   unsigned next_type : 4;
 
-        enum mali_texture_op op  : 4;
-        unsigned mode : 4;
-        enum midgard_partial_execution exec : 2;
+   enum mali_texture_op op             : 4;
+   unsigned mode                       : 4;
+   enum midgard_partial_execution exec : 2;
 
-        unsigned format : 2;
+   unsigned format : 2;
 
-        /* Are sampler_handle/texture_handler respectively set by registers? If
-         * true, the lower 8-bits of the respective field is a register word.
-         * If false, they are an immediate */
+   /* Are sampler_handle/texture_handler respectively set by registers? If
+    * true, the lower 8-bits of the respective field is a register word.
+    * If false, they are an immediate */
 
-        unsigned sampler_register : 1;
-        unsigned texture_register : 1;
+   unsigned sampler_register : 1;
+   unsigned texture_register : 1;
 
-        /* Is a register used to specify the
-         * LOD/bias/offset? If set, use the `bias` field as
-         * a register index. If clear, use the `bias` field
-         * as an immediate. */
-        unsigned lod_register : 1;
+   /* Is a register used to specify the
+    * LOD/bias/offset? If set, use the `bias` field as
+    * a register index. If clear, use the `bias` field
+    * as an immediate. */
+   unsigned lod_register : 1;
 
-        /* Is a register used to specify an offset? If set, use the
-         * offset_reg_* fields to encode this, duplicated for each of the
-         * components. If clear, there is implcitly always an immediate offst
-         * specificed in offset_imm_* */
-        unsigned offset_register : 1;
+   /* Is a register used to specify an offset? If set, use the
+    * offset_reg_* fields to encode this, duplicated for each of the
+    * components. If clear, there is implcitly always an immediate offst
+    * specificed in offset_imm_* */
+   unsigned offset_register : 1;
 
-        unsigned in_reg_full  : 1;
-        unsigned in_reg_select : 1;
-        unsigned in_reg_upper  : 1;
-        unsigned in_reg_swizzle : 8;
+   unsigned in_reg_full    : 1;
+   unsigned in_reg_select  : 1;
+   unsigned in_reg_upper   : 1;
+   unsigned in_reg_swizzle : 8;
 
-        unsigned unknown8  : 2;
+   unsigned unknown8 : 2;
 
-        unsigned out_full  : 1;
+   unsigned out_full : 1;
 
-        enum mali_sampler_type sampler_type : 2;
+   enum mali_sampler_type sampler_type : 2;
 
-        unsigned out_reg_select : 1;
-        unsigned out_upper : 1;
+   unsigned out_reg_select : 1;
+   unsigned out_upper      : 1;
 
-        unsigned mask : 4;
+   unsigned mask : 4;
 
-        /* Intriguingly, textures can take an outmod just like alu ops. Int
-         * outmods are not supported as far as I can tell, so this is only
-         * meaningful for float samplers */
-        midgard_outmod_float outmod  : 2;
+   /* Intriguingly, textures can take an outmod just like alu ops. Int
+    * outmods are not supported as far as I can tell, so this is only
+    * meaningful for float samplers */
+   midgard_outmod_float outmod : 2;
 
-        unsigned swizzle  : 8;
+   unsigned swizzle : 8;
 
-         /* These indicate how many bundles after this texture op may be
-          * executed in parallel with this op. We may execute only ALU and
-         * ld/st in parallel (not other textures), and obviously there cannot
-         * be any dependency (the blob appears to forbid even accessing other
-         * channels of a given texture register). */
+   /* These indicate how many bundles after this texture op may be
+    * executed in parallel with this op. We may execute only ALU and
+    * ld/st in parallel (not other textures), and obviously there cannot
+    * be any dependency (the blob appears to forbid even accessing other
+    * channels of a given texture register). */
 
-        unsigned out_of_order   : 4;
-        unsigned unknown4  : 8;
+   unsigned out_of_order : 4;
+   unsigned unknown4     : 8;
 
-        /* In immediate mode, each offset field is an immediate range [0, 7].
-         *
-         * In register mode, offset_x becomes a register (full, select, upper)
-         * triplet followed by a vec3 swizzle is splattered across
-         * offset_y/offset_z in a genuinely bizarre way.
-         *
-         * For texel fetches in immediate mode, the range is the full [-8, 7],
-         * but for normal texturing the top bit must be zero and a register
-         * used instead. It's not clear where this limitation is from.
-         *
-         * union {
-         *      struct {
-         *              signed offset_x  : 4;
-         *              signed offset_y  : 4;
-         *              signed offset_z  : 4;
-         *      } immediate;
-         *      struct {
-         *              bool full        : 1;
-         *              bool select      : 1;
-         *              bool upper       : 1;
-         *              unsigned swizzle : 8;
-         *              unsigned zero    : 1;
-         *      } register;
-         * }
-         */
+   /* In immediate mode, each offset field is an immediate range [0, 7].
+    *
+    * In register mode, offset_x becomes a register (full, select, upper)
+    * triplet followed by a vec3 swizzle is splattered across
+    * offset_y/offset_z in a genuinely bizarre way.
+    *
+    * For texel fetches in immediate mode, the range is the full [-8, 7],
+    * but for normal texturing the top bit must be zero and a register
+    * used instead. It's not clear where this limitation is from.
+    *
+    * union {
+    *      struct {
+    *              signed offset_x  : 4;
+    *              signed offset_y  : 4;
+    *              signed offset_z  : 4;
+    *      } immediate;
+    *      struct {
+    *              bool full        : 1;
+    *              bool select      : 1;
+    *              bool upper       : 1;
+    *              unsigned swizzle : 8;
+    *              unsigned zero    : 1;
+    *      } register;
+    * }
+    */
 
-        unsigned offset : 12;
+   unsigned offset : 12;
 
-        /* In immediate bias mode, for a normal texture op, this is
-         * texture bias, computed as int(2^8 * frac(biasf)), with
-         * bias_int = floor(bias). For a textureLod, it's that, but
-         * s/bias/lod. For a texel fetch, this is the LOD as-is.
-         *
-         * In register mode, this is a midgard_tex_register_select
-         * structure and bias_int is zero */
+   /* In immediate bias mode, for a normal texture op, this is
+    * texture bias, computed as int(2^8 * frac(biasf)), with
+    * bias_int = floor(bias). For a textureLod, it's that, but
+    * s/bias/lod. For a texel fetch, this is the LOD as-is.
+    *
+    * In register mode, this is a midgard_tex_register_select
+    * structure and bias_int is zero */
 
-        unsigned bias : 8;
-        signed bias_int  : 8;
+   unsigned bias   : 8;
+   signed bias_int : 8;
 
-        /* If sampler/texture_register is set, the bottom 8-bits are
-         * midgard_tex_register_select and the top 8-bits are zero. If they are
-         * clear, they are immediate texture indices */
+   /* If sampler/texture_register is set, the bottom 8-bits are
+    * midgard_tex_register_select and the top 8-bits are zero. If they are
+    * clear, they are immediate texture indices */
 
-        unsigned sampler_handle : 16;
-        unsigned texture_handle : 16;
-}
-midgard_texture_word;
+   unsigned sampler_handle : 16;
+   unsigned texture_handle : 16;
+} midgard_texture_word;
 
 /* Technically barriers are texture instructions but it's less work to add them
  * as an explicitly zeroed special case, since most fields are forced to go to
  * zero */
 
-typedef struct
-__attribute__((__packed__))
-{
-        unsigned type      : 4;
-        unsigned next_type : 4;
+typedef struct __attribute__((__packed__)) {
+   unsigned type      : 4;
+   unsigned next_type : 4;
 
-        /* op = TEXTURE_OP_BARRIER */
-        unsigned op  : 6;
-        unsigned zero1    : 2;
+   /* op = TEXTURE_OP_BARRIER */
+   unsigned op    : 6;
+   unsigned zero1 : 2;
 
-        /* Since helper invocations don't make any sense, these are forced to one */
-        unsigned cont  : 1;
-        unsigned last  : 1;
-        unsigned zero2 : 14;
+   /* Since helper invocations don't make any sense, these are forced to one */
+   unsigned cont  : 1;
+   unsigned last  : 1;
+   unsigned zero2 : 14;
 
-        unsigned zero3 : 24;
-        unsigned out_of_order : 4;
-        unsigned zero4 : 4;
+   unsigned zero3        : 24;
+   unsigned out_of_order : 4;
+   unsigned zero4        : 4;
 
-        uint64_t zero5;
+   uint64_t zero5;
 } midgard_texture_barrier_word;
 
 typedef union midgard_constants {
-        double f64[2];
-        uint64_t u64[2];
-        int64_t i64[2];
-        float f32[4];
-        uint32_t u32[4];
-        int32_t i32[4];
-        uint16_t f16[8];
-        uint16_t u16[8];
-        int16_t i16[8];
-        uint8_t u8[16];
-        int8_t i8[16];
-}
-midgard_constants;
+   double f64[2];
+   uint64_t u64[2];
+   int64_t i64[2];
+   float f32[4];
+   uint32_t u32[4];
+   int32_t i32[4];
+   uint16_t f16[8];
+   uint16_t u16[8];
+   int16_t i16[8];
+   uint8_t u8[16];
+   int8_t i8[16];
+} midgard_constants;
 
 enum midgard_roundmode {
-        MIDGARD_RTE = 0x0, /* round to even */
-        MIDGARD_RTZ = 0x1, /* round to zero */
-        MIDGARD_RTN = 0x2, /* round to negative */
-        MIDGARD_RTP = 0x3, /* round to positive */
+   MIDGARD_RTE = 0x0, /* round to even */
+   MIDGARD_RTZ = 0x1, /* round to zero */
+   MIDGARD_RTN = 0x2, /* round to negative */
+   MIDGARD_RTP = 0x3, /* round to positive */
 };
 
 #endif
diff --git a/src/panfrost/midgard/midgard_address.c b/src/panfrost/midgard/midgard_address.c
index 8b80f042ad0..6b514e5aa61 100644
--- a/src/panfrost/midgard/midgard_address.c
+++ b/src/panfrost/midgard/midgard_address.c
@@ -33,32 +33,33 @@
  *
  *      A + (zext?(B) << #s) + #c
  *
- * This allows for fast indexing into arrays. This file tries to pattern match the offset in NIR with this form to reduce pressure on the ALU pipe.
+ * This allows for fast indexing into arrays. This file tries to pattern match
+ * the offset in NIR with this form to reduce pressure on the ALU pipe.
  */
 
 struct mir_address {
-        nir_ssa_scalar A;
-        nir_ssa_scalar B;
+   nir_ssa_scalar A;
+   nir_ssa_scalar B;
 
-        midgard_index_address_format type;
-        unsigned shift;
-        unsigned bias;
+   midgard_index_address_format type;
+   unsigned shift;
+   unsigned bias;
 };
 
 static bool
 mir_args_ssa(nir_ssa_scalar s, unsigned count)
 {
-        nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
+   nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
 
-        if (count > nir_op_infos[alu->op].num_inputs)
-                return false;
+   if (count > nir_op_infos[alu->op].num_inputs)
+      return false;
 
-        for (unsigned i = 0; i < count; ++i) {
-                if (!alu->src[i].src.is_ssa)
-                        return false;
-        }
+   for (unsigned i = 0; i < count; ++i) {
+      if (!alu->src[i].src.is_ssa)
+         return false;
+   }
 
-        return true;
+   return true;
 }
 
 /* Matches a constant in either slot and moves it to the bias */
@@ -66,15 +67,15 @@ mir_args_ssa(nir_ssa_scalar s, unsigned count)
 static void
 mir_match_constant(struct mir_address *address)
 {
-        if (address->A.def && nir_ssa_scalar_is_const(address->A)) {
-                address->bias += nir_ssa_scalar_as_uint(address->A);
-                address->A.def = NULL;
-        }
+   if (address->A.def && nir_ssa_scalar_is_const(address->A)) {
+      address->bias += nir_ssa_scalar_as_uint(address->A);
+      address->A.def = NULL;
+   }
 
-        if (address->B.def && nir_ssa_scalar_is_const(address->B)) {
-                address->bias += nir_ssa_scalar_as_uint(address->B);
-                address->B.def = NULL;
-        }
+   if (address->B.def && nir_ssa_scalar_is_const(address->B)) {
+      address->bias += nir_ssa_scalar_as_uint(address->B);
+      address->B.def = NULL;
+   }
 }
 
 /* Matches an iadd when there is a free slot or constant */
@@ -85,33 +86,33 @@ mir_match_constant(struct mir_address *address)
 static void
 mir_match_iadd(struct mir_address *address, bool first_free)
 {
-        if (!address->B.def || !nir_ssa_scalar_is_alu(address->B))
-                return;
+   if (!address->B.def || !nir_ssa_scalar_is_alu(address->B))
+      return;
 
-        if (!mir_args_ssa(address->B, 2))
-                return;
+   if (!mir_args_ssa(address->B, 2))
+      return;
 
-        nir_op op = nir_ssa_scalar_alu_op(address->B);
+   nir_op op = nir_ssa_scalar_alu_op(address->B);
 
-        if (op != nir_op_iadd) return;
+   if (op != nir_op_iadd)
+      return;
 
-        nir_ssa_scalar op1 = nir_ssa_scalar_chase_alu_src(address->B, 0);
-        nir_ssa_scalar op2 = nir_ssa_scalar_chase_alu_src(address->B, 1);
+   nir_ssa_scalar op1 = nir_ssa_scalar_chase_alu_src(address->B, 0);
+   nir_ssa_scalar op2 = nir_ssa_scalar_chase_alu_src(address->B, 1);
 
-        if (nir_ssa_scalar_is_const(op1) &&
-            nir_ssa_scalar_as_uint(op1) <= MAX_POSITIVE_OFFSET) {
-                address->bias += nir_ssa_scalar_as_uint(op1);
-                address->B = op2;
-        } else if (nir_ssa_scalar_is_const(op2) &&
-                   nir_ssa_scalar_as_uint(op2) <= MAX_POSITIVE_OFFSET) {
-                address->bias += nir_ssa_scalar_as_uint(op2);
-                address->B = op1;
-        } else if (!nir_ssa_scalar_is_const(op1) &&
-                   !nir_ssa_scalar_is_const(op2) &&
-                   first_free && !address->A.def) {
-                address->A = op1;
-                address->B = op2;
-        }
+   if (nir_ssa_scalar_is_const(op1) &&
+       nir_ssa_scalar_as_uint(op1) <= MAX_POSITIVE_OFFSET) {
+      address->bias += nir_ssa_scalar_as_uint(op1);
+      address->B = op2;
+   } else if (nir_ssa_scalar_is_const(op2) &&
+              nir_ssa_scalar_as_uint(op2) <= MAX_POSITIVE_OFFSET) {
+      address->bias += nir_ssa_scalar_as_uint(op2);
+      address->B = op1;
+   } else if (!nir_ssa_scalar_is_const(op1) && !nir_ssa_scalar_is_const(op2) &&
+              first_free && !address->A.def) {
+      address->A = op1;
+      address->B = op2;
+   }
 }
 
 /* Matches u2u64 and sets type */
@@ -119,18 +120,19 @@ mir_match_iadd(struct mir_address *address, bool first_free)
 static void
 mir_match_u2u64(struct mir_address *address)
 {
-        if (!address->B.def || !nir_ssa_scalar_is_alu(address->B))
-                return;
+   if (!address->B.def || !nir_ssa_scalar_is_alu(address->B))
+      return;
 
-        if (!mir_args_ssa(address->B, 1))
-                return;
+   if (!mir_args_ssa(address->B, 1))
+      return;
 
-        nir_op op = nir_ssa_scalar_alu_op(address->B);
-        if (op != nir_op_u2u64) return;
-        nir_ssa_scalar arg = nir_ssa_scalar_chase_alu_src(address->B, 0);
+   nir_op op = nir_ssa_scalar_alu_op(address->B);
+   if (op != nir_op_u2u64)
+      return;
+   nir_ssa_scalar arg = nir_ssa_scalar_chase_alu_src(address->B, 0);
 
-        address->B = arg;
-        address->type = midgard_index_address_u32;
+   address->B = arg;
+   address->type = midgard_index_address_u32;
 }
 
 /* Matches i2i64 and sets type */
@@ -138,18 +140,19 @@ mir_match_u2u64(struct mir_address *address)
 static void
 mir_match_i2i64(struct mir_address *address)
 {
-        if (!address->B.def || !nir_ssa_scalar_is_alu(address->B))
-                return;
+   if (!address->B.def || !nir_ssa_scalar_is_alu(address->B))
+      return;
 
-        if (!mir_args_ssa(address->B, 1))
-                return;
+   if (!mir_args_ssa(address->B, 1))
+      return;
 
-        nir_op op = nir_ssa_scalar_alu_op(address->B);
-        if (op != nir_op_i2i64) return;
-        nir_ssa_scalar arg = nir_ssa_scalar_chase_alu_src(address->B, 0);
+   nir_op op = nir_ssa_scalar_alu_op(address->B);
+   if (op != nir_op_i2i64)
+      return;
+   nir_ssa_scalar arg = nir_ssa_scalar_chase_alu_src(address->B, 0);
 
-        address->B = arg;
-        address->type = midgard_index_address_s32;
+   address->B = arg;
+   address->type = midgard_index_address_s32;
 }
 
 /* Matches ishl to shift */
@@ -157,24 +160,27 @@ mir_match_i2i64(struct mir_address *address)
 static void
 mir_match_ishl(struct mir_address *address)
 {
-        if (!address->B.def || !nir_ssa_scalar_is_alu(address->B))
-                return;
+   if (!address->B.def || !nir_ssa_scalar_is_alu(address->B))
+      return;
 
-        if (!mir_args_ssa(address->B, 2))
-                return;
+   if (!mir_args_ssa(address->B, 2))
+      return;
 
-        nir_op op = nir_ssa_scalar_alu_op(address->B);
-        if (op != nir_op_ishl) return;
-        nir_ssa_scalar op1 = nir_ssa_scalar_chase_alu_src(address->B, 0);
-        nir_ssa_scalar op2 = nir_ssa_scalar_chase_alu_src(address->B, 1);
+   nir_op op = nir_ssa_scalar_alu_op(address->B);
+   if (op != nir_op_ishl)
+      return;
+   nir_ssa_scalar op1 = nir_ssa_scalar_chase_alu_src(address->B, 0);
+   nir_ssa_scalar op2 = nir_ssa_scalar_chase_alu_src(address->B, 1);
 
-        if (!nir_ssa_scalar_is_const(op2)) return;
+   if (!nir_ssa_scalar_is_const(op2))
+      return;
 
-        unsigned shift = nir_ssa_scalar_as_uint(op2);
-        if (shift > 0x7) return;
+   unsigned shift = nir_ssa_scalar_as_uint(op2);
+   if (shift > 0x7)
+      return;
 
-        address->B = op1;
-        address->shift = shift;
+   address->B = op1;
+   address->shift = shift;
 }
 
 /* Strings through mov which can happen from NIR vectorization */
@@ -182,19 +188,19 @@ mir_match_ishl(struct mir_address *address)
 static void
 mir_match_mov(struct mir_address *address)
 {
-        if (address->A.def && nir_ssa_scalar_is_alu(address->A)) {
-                nir_op op = nir_ssa_scalar_alu_op(address->A);
+   if (address->A.def && nir_ssa_scalar_is_alu(address->A)) {
+      nir_op op = nir_ssa_scalar_alu_op(address->A);
 
-                if (op == nir_op_mov && mir_args_ssa(address->A, 1))
-                        address->A = nir_ssa_scalar_chase_alu_src(address->A, 0);
-        }
+      if (op == nir_op_mov && mir_args_ssa(address->A, 1))
+         address->A = nir_ssa_scalar_chase_alu_src(address->A, 0);
+   }
 
-        if (address->B.def && nir_ssa_scalar_is_alu(address->B)) {
-                nir_op op = nir_ssa_scalar_alu_op(address->B);
+   if (address->B.def && nir_ssa_scalar_is_alu(address->B)) {
+      nir_op op = nir_ssa_scalar_alu_op(address->B);
 
-                if (op == nir_op_mov && mir_args_ssa(address->B, 1))
-                        address->B = nir_ssa_scalar_chase_alu_src(address->B, 0);
-        }
+      if (op == nir_op_mov && mir_args_ssa(address->B, 1))
+         address->B = nir_ssa_scalar_chase_alu_src(address->B, 0);
+   }
 }
 
 /* Tries to pattern match into mir_address */
@@ -202,105 +208,105 @@ mir_match_mov(struct mir_address *address)
 static struct mir_address
 mir_match_offset(nir_ssa_def *offset, bool first_free, bool extend)
 {
-        struct mir_address address = {
-                .B = { .def = offset },
-                .type = extend ? midgard_index_address_u64 : midgard_index_address_u32,
-        };
+   struct mir_address address = {
+      .B = {.def = offset},
+      .type = extend ? midgard_index_address_u64 : midgard_index_address_u32,
+   };
 
-        mir_match_mov(&address);
-        mir_match_constant(&address);
-        mir_match_mov(&address);
-        mir_match_iadd(&address, first_free);
-        mir_match_mov(&address);
+   mir_match_mov(&address);
+   mir_match_constant(&address);
+   mir_match_mov(&address);
+   mir_match_iadd(&address, first_free);
+   mir_match_mov(&address);
 
-        if (extend) {
-                mir_match_u2u64(&address);
-                mir_match_i2i64(&address);
-                mir_match_mov(&address);
-        }
+   if (extend) {
+      mir_match_u2u64(&address);
+      mir_match_i2i64(&address);
+      mir_match_mov(&address);
+   }
 
-        mir_match_ishl(&address);
+   mir_match_ishl(&address);
 
-        return address;
+   return address;
 }
 
 void
-mir_set_offset(compiler_context *ctx, midgard_instruction *ins, nir_src *offset, unsigned seg)
+mir_set_offset(compiler_context *ctx, midgard_instruction *ins, nir_src *offset,
+               unsigned seg)
 {
-        for(unsigned i = 0; i < 16; ++i) {
-                ins->swizzle[1][i] = 0;
-                ins->swizzle[2][i] = 0;
-        }
+   for (unsigned i = 0; i < 16; ++i) {
+      ins->swizzle[1][i] = 0;
+      ins->swizzle[2][i] = 0;
+   }
 
-        /* Sign extend instead of zero extend in case the address is something
-         * like `base + offset + 20`, where offset could be negative. */
-        bool force_sext = (nir_src_bit_size(*offset) < 64);
+   /* Sign extend instead of zero extend in case the address is something
+    * like `base + offset + 20`, where offset could be negative. */
+   bool force_sext = (nir_src_bit_size(*offset) < 64);
 
-        if (!offset->is_ssa) {
-                ins->load_store.bitsize_toggle = true;
-                ins->load_store.arg_comp = seg & 0x3;
-                ins->load_store.arg_reg = (seg >> 2) & 0x7;
-                ins->src[2] = nir_src_index(ctx, offset);
-                ins->src_types[2] = nir_type_uint | nir_src_bit_size(*offset);
+   if (!offset->is_ssa) {
+      ins->load_store.bitsize_toggle = true;
+      ins->load_store.arg_comp = seg & 0x3;
+      ins->load_store.arg_reg = (seg >> 2) & 0x7;
+      ins->src[2] = nir_src_index(ctx, offset);
+      ins->src_types[2] = nir_type_uint | nir_src_bit_size(*offset);
 
-                if (force_sext)
-                        ins->load_store.index_format = midgard_index_address_s32;
-                else
-                        ins->load_store.index_format = midgard_index_address_u64;
+      if (force_sext)
+         ins->load_store.index_format = midgard_index_address_s32;
+      else
+         ins->load_store.index_format = midgard_index_address_u64;
 
-                return;
-        }
+      return;
+   }
 
-        bool first_free = (seg == LDST_GLOBAL);
+   bool first_free = (seg == LDST_GLOBAL);
 
-        struct mir_address match = mir_match_offset(offset->ssa, first_free, true);
+   struct mir_address match = mir_match_offset(offset->ssa, first_free, true);
 
-        if (match.A.def) {
-                unsigned bitsize = match.A.def->bit_size;
-                assert(bitsize == 32 || bitsize == 64);
+   if (match.A.def) {
+      unsigned bitsize = match.A.def->bit_size;
+      assert(bitsize == 32 || bitsize == 64);
 
-                ins->src[1] = nir_ssa_index(match.A.def);
-                ins->swizzle[1][0] = match.A.comp;
-                ins->src_types[1] = nir_type_uint | bitsize;
-                ins->load_store.bitsize_toggle = (bitsize == 64);
-        } else {
-                ins->load_store.bitsize_toggle = true;
-                ins->load_store.arg_comp = seg & 0x3;
-                ins->load_store.arg_reg = (seg >> 2) & 0x7;
-        }
+      ins->src[1] = nir_ssa_index(match.A.def);
+      ins->swizzle[1][0] = match.A.comp;
+      ins->src_types[1] = nir_type_uint | bitsize;
+      ins->load_store.bitsize_toggle = (bitsize == 64);
+   } else {
+      ins->load_store.bitsize_toggle = true;
+      ins->load_store.arg_comp = seg & 0x3;
+      ins->load_store.arg_reg = (seg >> 2) & 0x7;
+   }
 
-        if (match.B.def) {
-                ins->src[2] = nir_ssa_index(match.B.def);
-                ins->swizzle[2][0] = match.B.comp;
-                ins->src_types[2] = nir_type_uint | match.B.def->bit_size;
-        } else
-                ins->load_store.index_reg = REGISTER_LDST_ZERO;
+   if (match.B.def) {
+      ins->src[2] = nir_ssa_index(match.B.def);
+      ins->swizzle[2][0] = match.B.comp;
+      ins->src_types[2] = nir_type_uint | match.B.def->bit_size;
+   } else
+      ins->load_store.index_reg = REGISTER_LDST_ZERO;
 
-        if (force_sext)
-                match.type = midgard_index_address_s32;
+   if (force_sext)
+      match.type = midgard_index_address_s32;
 
-        ins->load_store.index_format = match.type;
+   ins->load_store.index_format = match.type;
 
-        assert(match.shift <= 7);
-        ins->load_store.index_shift = match.shift;
+   assert(match.shift <= 7);
+   ins->load_store.index_shift = match.shift;
 
-        ins->constants.u32[0] = match.bias;
+   ins->constants.u32[0] = match.bias;
 }
 
-
 void
 mir_set_ubo_offset(midgard_instruction *ins, nir_src *src, unsigned bias)
 {
-        assert(src->is_ssa);
-        struct mir_address match = mir_match_offset(src->ssa, false, false);
+   assert(src->is_ssa);
+   struct mir_address match = mir_match_offset(src->ssa, false, false);
 
-        if (match.B.def) {
-                ins->src[2] = nir_ssa_index(match.B.def);
+   if (match.B.def) {
+      ins->src[2] = nir_ssa_index(match.B.def);
 
-                for (unsigned i = 0; i < ARRAY_SIZE(ins->swizzle[2]); ++i)
-                        ins->swizzle[2][i] = match.B.comp;
-        }
+      for (unsigned i = 0; i < ARRAY_SIZE(ins->swizzle[2]); ++i)
+         ins->swizzle[2][i] = match.B.comp;
+   }
 
-        ins->load_store.index_shift = match.shift;
-        ins->constants.u32[0] = match.bias + bias;
+   ins->load_store.index_shift = match.shift;
+   ins->constants.u32[0] = match.bias + bias;
 }
diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c
index 10e05fbf454..c95bb2414a5 100644
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@@ -22,78 +22,78 @@
  * SOFTWARE.
  */
 
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
+#include <err.h>
 #include <fcntl.h>
 #include <stdint.h>
-#include <stdlib.h>
 #include <stdio.h>
-#include <err.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
 
 #include "compiler/glsl/glsl_to_nir.h"
-#include "compiler/nir_types.h"
 #include "compiler/nir/nir_builder.h"
+#include "compiler/nir_types.h"
 #include "util/half_float.h"
-#include "util/u_math.h"
+#include "util/list.h"
 #include "util/u_debug.h"
 #include "util/u_dynarray.h"
-#include "util/list.h"
+#include "util/u_math.h"
 
-#include "midgard.h"
-#include "midgard_nir.h"
-#include "midgard_compile.h"
-#include "midgard_ops.h"
-#include "helpers.h"
-#include "compiler.h"
-#include "midgard_quirks.h"
 #include "panfrost/util/pan_lower_framebuffer.h"
+#include "compiler.h"
+#include "helpers.h"
+#include "midgard.h"
+#include "midgard_compile.h"
+#include "midgard_nir.h"
+#include "midgard_ops.h"
+#include "midgard_quirks.h"
 
 #include "disassemble.h"
 
 static const struct debug_named_value midgard_debug_options[] = {
-   {"msgs",       MIDGARD_DBG_MSGS,		   "Print debug messages"},
-   {"shaders",    MIDGARD_DBG_SHADERS,	   "Dump shaders in NIR and MIR"},
-   {"shaderdb",   MIDGARD_DBG_SHADERDB,   "Prints shader-db statistics"},
-   {"inorder",    MIDGARD_DBG_INORDER,    "Disables out-of-order scheduling"},
-   {"verbose",    MIDGARD_DBG_VERBOSE,    "Dump shaders verbosely"},
-   {"internal",   MIDGARD_DBG_INTERNAL,   "Dump internal shaders"},
-   DEBUG_NAMED_VALUE_END
-};
+   {"msgs", MIDGARD_DBG_MSGS, "Print debug messages"},
+   {"shaders", MIDGARD_DBG_SHADERS, "Dump shaders in NIR and MIR"},
+   {"shaderdb", MIDGARD_DBG_SHADERDB, "Prints shader-db statistics"},
+   {"inorder", MIDGARD_DBG_INORDER, "Disables out-of-order scheduling"},
+   {"verbose", MIDGARD_DBG_VERBOSE, "Dump shaders verbosely"},
+   {"internal", MIDGARD_DBG_INTERNAL, "Dump internal shaders"},
+   DEBUG_NAMED_VALUE_END};
 
-DEBUG_GET_ONCE_FLAGS_OPTION(midgard_debug, "MIDGARD_MESA_DEBUG", midgard_debug_options, 0)
+DEBUG_GET_ONCE_FLAGS_OPTION(midgard_debug, "MIDGARD_MESA_DEBUG",
+                            midgard_debug_options, 0)
 
 int midgard_debug = 0;
 
-#define DBG(fmt, ...) \
-		do { if (midgard_debug & MIDGARD_DBG_MSGS) \
-			fprintf(stderr, "%s:%d: "fmt, \
-				__func__, __LINE__, ##__VA_ARGS__); } while (0)
+#define DBG(fmt, ...)                                                          \
+   do {                                                                        \
+      if (midgard_debug & MIDGARD_DBG_MSGS)                                    \
+         fprintf(stderr, "%s:%d: " fmt, __func__, __LINE__, ##__VA_ARGS__);    \
+   } while (0)
 static midgard_block *
 create_empty_block(compiler_context *ctx)
 {
-        midgard_block *blk = rzalloc(ctx, midgard_block);
+   midgard_block *blk = rzalloc(ctx, midgard_block);
 
-        blk->base.predecessors = _mesa_set_create(blk,
-                        _mesa_hash_pointer,
-                        _mesa_key_pointer_equal);
+   blk->base.predecessors =
+      _mesa_set_create(blk, _mesa_hash_pointer, _mesa_key_pointer_equal);
 
-        blk->base.name = ctx->block_source_count++;
+   blk->base.name = ctx->block_source_count++;
 
-        return blk;
+   return blk;
 }
 
 static void
 schedule_barrier(compiler_context *ctx)
 {
-        midgard_block *temp = ctx->after_block;
-        ctx->after_block = create_empty_block(ctx);
-        ctx->block_count++;
-        list_addtail(&ctx->after_block->base.link, &ctx->blocks);
-        list_inithead(&ctx->after_block->base.instructions);
-        pan_block_add_successor(&ctx->current_block->base, &ctx->after_block->base);
-        ctx->current_block = ctx->after_block;
-        ctx->after_block = temp;
+   midgard_block *temp = ctx->after_block;
+   ctx->after_block = create_empty_block(ctx);
+   ctx->block_count++;
+   list_addtail(&ctx->after_block->base.link, &ctx->blocks);
+   list_inithead(&ctx->after_block->base.instructions);
+   pan_block_add_successor(&ctx->current_block->base, &ctx->after_block->base);
+   ctx->current_block = ctx->after_block;
+   ctx->after_block = temp;
 }
 
 /* Helpers to generate midgard_instruction's using macro magic, since every
@@ -101,32 +101,34 @@ schedule_barrier(compiler_context *ctx)
 
 #define EMIT(op, ...) emit_mir_instruction(ctx, v_##op(__VA_ARGS__));
 
-#define M_LOAD_STORE(name, store, T) \
-	static midgard_instruction m_##name(unsigned ssa, unsigned address) { \
-		midgard_instruction i = { \
-			.type = TAG_LOAD_STORE_4, \
-                        .mask = 0xF, \
-                        .dest = ~0, \
-                        .src = { ~0, ~0, ~0, ~0 }, \
-                        .swizzle = SWIZZLE_IDENTITY_4, \
-                        .op = midgard_op_##name, \
-			.load_store = { \
-				.signed_offset = address, \
-			}, \
-		}; \
-                \
-                if (store) { \
-                        i.src[0] = ssa; \
-                        i.src_types[0] = T; \
-                        i.dest_type = T; \
-                } else { \
-                        i.dest = ssa; \
-                        i.dest_type = T; \
-                } \
-		return i; \
-	}
+#define M_LOAD_STORE(name, store, T)                                           \
+   static midgard_instruction m_##name(unsigned ssa, unsigned address)         \
+   {                                                                           \
+      midgard_instruction i = {                                                \
+         .type = TAG_LOAD_STORE_4,                                             \
+         .mask = 0xF,                                                          \
+         .dest = ~0,                                                           \
+         .src = {~0, ~0, ~0, ~0},                                              \
+         .swizzle = SWIZZLE_IDENTITY_4,                                        \
+         .op = midgard_op_##name,                                              \
+         .load_store =                                                         \
+            {                                                                  \
+               .signed_offset = address,                                       \
+            },                                                                 \
+      };                                                                       \
+                                                                               \
+      if (store) {                                                             \
+         i.src[0] = ssa;                                                       \
+         i.src_types[0] = T;                                                   \
+         i.dest_type = T;                                                      \
+      } else {                                                                 \
+         i.dest = ssa;                                                         \
+         i.dest_type = T;                                                      \
+      }                                                                        \
+      return i;                                                                \
+   }
 
-#define M_LOAD(name, T) M_LOAD_STORE(name, false, T)
+#define M_LOAD(name, T)  M_LOAD_STORE(name, false, T)
 #define M_STORE(name, T) M_LOAD_STORE(name, true, T)
 
 M_LOAD(ld_attr_32, nir_type_uint32);
@@ -162,23 +164,23 @@ M_STORE(st_image_32u, nir_type_uint32);
 M_STORE(st_image_32i, nir_type_int32);
 M_LOAD(lea_image, nir_type_uint64);
 
-#define M_IMAGE(op) \
-static midgard_instruction \
-op ## _image(nir_alu_type type, unsigned val, unsigned address) \
-{ \
-        switch (type) { \
-        case nir_type_float32: \
-                 return m_ ## op ## _image_32f(val, address); \
-        case nir_type_float16: \
-                 return m_ ## op ## _image_16f(val, address); \
-        case nir_type_uint32: \
-                 return m_ ## op ## _image_32u(val, address); \
-        case nir_type_int32: \
-                 return m_ ## op ## _image_32i(val, address); \
-        default: \
-                 unreachable("Invalid image type"); \
-        } \
-}
+#define M_IMAGE(op)                                                            \
+   static midgard_instruction op##_image(nir_alu_type type, unsigned val,      \
+                                         unsigned address)                     \
+   {                                                                           \
+      switch (type) {                                                          \
+      case nir_type_float32:                                                   \
+         return m_##op##_image_32f(val, address);                              \
+      case nir_type_float16:                                                   \
+         return m_##op##_image_16f(val, address);                              \
+      case nir_type_uint32:                                                    \
+         return m_##op##_image_32u(val, address);                              \
+      case nir_type_int32:                                                     \
+         return m_##op##_image_32i(val, address);                              \
+      default:                                                                 \
+         unreachable("Invalid image type");                                    \
+      }                                                                        \
+   }
 
 M_IMAGE(ld);
 M_IMAGE(st);
@@ -186,284 +188,280 @@ M_IMAGE(st);
 static midgard_instruction
 v_branch(bool conditional, bool invert)
 {
-        midgard_instruction ins = {
-                .type = TAG_ALU_4,
-                .unit = ALU_ENAB_BRANCH,
-                .compact_branch = true,
-                .branch = {
-                        .conditional = conditional,
-                        .invert_conditional = invert,
-                },
-                .dest = ~0,
-                .src = { ~0, ~0, ~0, ~0 },
-        };
+   midgard_instruction ins = {
+      .type = TAG_ALU_4,
+      .unit = ALU_ENAB_BRANCH,
+      .compact_branch = true,
+      .branch =
+         {
+            .conditional = conditional,
+            .invert_conditional = invert,
+         },
+      .dest = ~0,
+      .src = {~0, ~0, ~0, ~0},
+   };
 
-        return ins;
+   return ins;
 }
 
 static void
-attach_constants(compiler_context *ctx, midgard_instruction *ins, void *constants, int name)
+attach_constants(compiler_context *ctx, midgard_instruction *ins,
+                 void *constants, int name)
 {
-        ins->has_constants = true;
-        memcpy(&ins->constants, constants, 16);
+   ins->has_constants = true;
+   memcpy(&ins->constants, constants, 16);
 }
 
 static int
 glsl_type_size(const struct glsl_type *type, bool bindless)
 {
-        return glsl_count_attribute_slots(type, false);
+   return glsl_count_attribute_slots(type, false);
 }
 
 static bool
-midgard_nir_lower_global_load_instr(nir_builder *b, nir_instr *instr, void *data)
+midgard_nir_lower_global_load_instr(nir_builder *b, nir_instr *instr,
+                                    void *data)
 {
-        if (instr->type != nir_instr_type_intrinsic)
-                return false;
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
 
-        nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-        if (intr->intrinsic != nir_intrinsic_load_global &&
-            intr->intrinsic != nir_intrinsic_load_shared)
-                return false;
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   if (intr->intrinsic != nir_intrinsic_load_global &&
+       intr->intrinsic != nir_intrinsic_load_shared)
+      return false;
 
-        unsigned compsz = nir_dest_bit_size(intr->dest);
-        unsigned totalsz = compsz * nir_dest_num_components(intr->dest);
-        /* 8, 16, 32, 64 and 128 bit loads don't need to be lowered */
-        if (util_bitcount(totalsz) < 2 && totalsz <= 128)
-                return false;
+   unsigned compsz = nir_dest_bit_size(intr->dest);
+   unsigned totalsz = compsz * nir_dest_num_components(intr->dest);
+   /* 8, 16, 32, 64 and 128 bit loads don't need to be lowered */
+   if (util_bitcount(totalsz) < 2 && totalsz <= 128)
+      return false;
 
-        b->cursor = nir_before_instr(instr);
+   b->cursor = nir_before_instr(instr);
 
-        assert(intr->src[0].is_ssa);
-        nir_ssa_def *addr = intr->src[0].ssa;
+   assert(intr->src[0].is_ssa);
+   nir_ssa_def *addr = intr->src[0].ssa;
 
-        nir_ssa_def *comps[MIR_VEC_COMPONENTS];
-        unsigned ncomps = 0;
+   nir_ssa_def *comps[MIR_VEC_COMPONENTS];
+   unsigned ncomps = 0;
 
-        while (totalsz) {
-                unsigned loadsz = MIN2(1 << (util_last_bit(totalsz) - 1), 128);
-                unsigned loadncomps = loadsz / compsz;
+   while (totalsz) {
+      unsigned loadsz = MIN2(1 << (util_last_bit(totalsz) - 1), 128);
+      unsigned loadncomps = loadsz / compsz;
 
-                nir_ssa_def *load;
-                if (intr->intrinsic == nir_intrinsic_load_global) {
-                        load = nir_load_global(b, addr, compsz / 8, loadncomps, compsz);
-                } else {
-                        assert(intr->intrinsic == nir_intrinsic_load_shared);
-                        nir_intrinsic_instr *shared_load =
-                                nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_shared);
-                        shared_load->num_components = loadncomps;
-                        shared_load->src[0] = nir_src_for_ssa(addr);
-                        nir_intrinsic_set_align(shared_load, compsz / 8, 0);
-                        nir_intrinsic_set_base(shared_load, nir_intrinsic_base(intr));
-                        nir_ssa_dest_init(&shared_load->instr, &shared_load->dest,
-                                          shared_load->num_components, compsz, NULL);
-                        nir_builder_instr_insert(b, &shared_load->instr);
-                        load = &shared_load->dest.ssa;
-                }
+      nir_ssa_def *load;
+      if (intr->intrinsic == nir_intrinsic_load_global) {
+         load = nir_load_global(b, addr, compsz / 8, loadncomps, compsz);
+      } else {
+         assert(intr->intrinsic == nir_intrinsic_load_shared);
+         nir_intrinsic_instr *shared_load =
+            nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_shared);
+         shared_load->num_components = loadncomps;
+         shared_load->src[0] = nir_src_for_ssa(addr);
+         nir_intrinsic_set_align(shared_load, compsz / 8, 0);
+         nir_intrinsic_set_base(shared_load, nir_intrinsic_base(intr));
+         nir_ssa_dest_init(&shared_load->instr, &shared_load->dest,
+                           shared_load->num_components, compsz, NULL);
+         nir_builder_instr_insert(b, &shared_load->instr);
+         load = &shared_load->dest.ssa;
+      }
 
-                for (unsigned i = 0; i < loadncomps; i++)
-                        comps[ncomps++] = nir_channel(b, load, i);
+      for (unsigned i = 0; i < loadncomps; i++)
+         comps[ncomps++] = nir_channel(b, load, i);
 
-                totalsz -= loadsz;
-                addr = nir_iadd(b, addr, nir_imm_intN_t(b, loadsz / 8, addr->bit_size));
-        }
+      totalsz -= loadsz;
+      addr = nir_iadd(b, addr, nir_imm_intN_t(b, loadsz / 8, addr->bit_size));
+   }
 
-        assert(ncomps == nir_dest_num_components(intr->dest));
-        nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_vec(b, comps, ncomps));
+   assert(ncomps == nir_dest_num_components(intr->dest));
+   nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_vec(b, comps, ncomps));
 
-        return true;
+   return true;
 }
 
 static bool
 midgard_nir_lower_global_load(nir_shader *shader)
 {
-        return nir_shader_instructions_pass(shader,
-                                            midgard_nir_lower_global_load_instr,
-                                            nir_metadata_block_index | nir_metadata_dominance,
-                                            NULL);
+   return nir_shader_instructions_pass(
+      shader, midgard_nir_lower_global_load_instr,
+      nir_metadata_block_index | nir_metadata_dominance, NULL);
 }
 
 static bool
 mdg_should_scalarize(const nir_instr *instr, const void *_unused)
 {
-        const nir_alu_instr *alu = nir_instr_as_alu(instr);
+   const nir_alu_instr *alu = nir_instr_as_alu(instr);
 
-        if (nir_src_bit_size(alu->src[0].src) == 64)
-                return true;
+   if (nir_src_bit_size(alu->src[0].src) == 64)
+      return true;
 
-        if (nir_dest_bit_size(alu->dest.dest) == 64)
-                return true;
+   if (nir_dest_bit_size(alu->dest.dest) == 64)
+      return true;
 
-        switch (alu->op) {
-        case nir_op_fdot2:
-        case nir_op_umul_high:
-        case nir_op_imul_high:
-        case nir_op_pack_half_2x16:
-        case nir_op_unpack_half_2x16:
-                return true;
-        default:
-                return false;
-        }
+   switch (alu->op) {
+   case nir_op_fdot2:
+   case nir_op_umul_high:
+   case nir_op_imul_high:
+   case nir_op_pack_half_2x16:
+   case nir_op_unpack_half_2x16:
+      return true;
+   default:
+      return false;
+   }
 }
 
 /* Only vectorize int64 up to vec2 */
 static uint8_t
 midgard_vectorize_filter(const nir_instr *instr, const void *data)
 {
-        if (instr->type != nir_instr_type_alu)
-                return 0;
+   if (instr->type != nir_instr_type_alu)
+      return 0;
 
-        const nir_alu_instr *alu = nir_instr_as_alu(instr);
-        int src_bit_size = nir_src_bit_size(alu->src[0].src);
-        int dst_bit_size = nir_dest_bit_size(alu->dest.dest);
+   const nir_alu_instr *alu = nir_instr_as_alu(instr);
+   int src_bit_size = nir_src_bit_size(alu->src[0].src);
+   int dst_bit_size = nir_dest_bit_size(alu->dest.dest);
 
-        if (src_bit_size == 64 || dst_bit_size == 64)
-                return 2;
+   if (src_bit_size == 64 || dst_bit_size == 64)
+      return 2;
 
-        return 4;
+   return 4;
 }
 
 static void
 optimise_nir(nir_shader *nir, unsigned quirks, bool is_blend, bool is_blit)
 {
-        bool progress;
-        unsigned lower_flrp =
-                (nir->options->lower_flrp16 ? 16 : 0) |
-                (nir->options->lower_flrp32 ? 32 : 0) |
-                (nir->options->lower_flrp64 ? 64 : 0);
+   bool progress;
+   unsigned lower_flrp = (nir->options->lower_flrp16 ? 16 : 0) |
+                         (nir->options->lower_flrp32 ? 32 : 0) |
+                         (nir->options->lower_flrp64 ? 64 : 0);
 
-        NIR_PASS(progress, nir, nir_lower_regs_to_ssa);
-        nir_lower_idiv_options idiv_options = {
-                .allow_fp16 = true,
-        };
-        NIR_PASS(progress, nir, nir_lower_idiv, &idiv_options);
+   NIR_PASS(progress, nir, nir_lower_regs_to_ssa);
+   nir_lower_idiv_options idiv_options = {
+      .allow_fp16 = true,
+   };
+   NIR_PASS(progress, nir, nir_lower_idiv, &idiv_options);
 
-        nir_lower_tex_options lower_tex_options = {
-                .lower_txs_lod = true,
-                .lower_txp = ~0,
-                .lower_tg4_broadcom_swizzle = true,
-                /* TODO: we have native gradient.. */
-                .lower_txd = true,
-                .lower_invalid_implicit_lod = true,
-        };
+   nir_lower_tex_options lower_tex_options = {
+      .lower_txs_lod = true,
+      .lower_txp = ~0,
+      .lower_tg4_broadcom_swizzle = true,
+      /* TODO: we have native gradient.. */
+      .lower_txd = true,
+      .lower_invalid_implicit_lod = true,
+   };
 
-        NIR_PASS(progress, nir, nir_lower_tex, &lower_tex_options);
+   NIR_PASS(progress, nir, nir_lower_tex, &lower_tex_options);
 
+   /* TEX_GRAD fails to apply sampler descriptor settings on some
+    * implementations, requiring a lowering. However, blit shaders do not
+    * use the affected settings and should skip the workaround.
+    */
+   if ((quirks & MIDGARD_BROKEN_LOD) && !is_blit)
+      NIR_PASS_V(nir, midgard_nir_lod_errata);
 
-        /* TEX_GRAD fails to apply sampler descriptor settings on some
-         * implementations, requiring a lowering. However, blit shaders do not
-         * use the affected settings and should skip the workaround.
-         */
-        if ((quirks & MIDGARD_BROKEN_LOD) && !is_blit)
-                NIR_PASS_V(nir, midgard_nir_lod_errata);
+   /* Midgard image ops coordinates are 16-bit instead of 32-bit */
+   NIR_PASS(progress, nir, midgard_nir_lower_image_bitsize);
+   NIR_PASS(progress, nir, midgard_nir_lower_helper_writes);
+   NIR_PASS(progress, nir, pan_lower_helper_invocation);
+   NIR_PASS(progress, nir, pan_lower_sample_pos);
 
-        /* Midgard image ops coordinates are 16-bit instead of 32-bit */
-        NIR_PASS(progress, nir, midgard_nir_lower_image_bitsize);
-        NIR_PASS(progress, nir, midgard_nir_lower_helper_writes);
-        NIR_PASS(progress, nir, pan_lower_helper_invocation);
-        NIR_PASS(progress, nir, pan_lower_sample_pos);
+   if (nir->xfb_info != NULL && nir->info.has_transform_feedback_varyings) {
+      NIR_PASS_V(nir, nir_io_add_const_offset_to_base,
+                 nir_var_shader_in | nir_var_shader_out);
+      NIR_PASS_V(nir, nir_io_add_intrinsic_xfb_info);
+      NIR_PASS_V(nir, pan_lower_xfb);
+   }
 
-        if (nir->xfb_info != NULL && nir->info.has_transform_feedback_varyings) {
-                NIR_PASS_V(nir, nir_io_add_const_offset_to_base,
-                           nir_var_shader_in | nir_var_shader_out);
-                NIR_PASS_V(nir, nir_io_add_intrinsic_xfb_info);
-                NIR_PASS_V(nir, pan_lower_xfb);
-        }
+   NIR_PASS(progress, nir, midgard_nir_lower_algebraic_early);
+   NIR_PASS_V(nir, nir_lower_alu_to_scalar, mdg_should_scalarize, NULL);
 
-        NIR_PASS(progress, nir, midgard_nir_lower_algebraic_early);
-        NIR_PASS_V(nir, nir_lower_alu_to_scalar, mdg_should_scalarize, NULL);
+   do {
+      progress = false;
 
-        do {
-                progress = false;
+      NIR_PASS(progress, nir, nir_lower_var_copies);
+      NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
 
-                NIR_PASS(progress, nir, nir_lower_var_copies);
-                NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
+      NIR_PASS(progress, nir, nir_copy_prop);
+      NIR_PASS(progress, nir, nir_opt_remove_phis);
+      NIR_PASS(progress, nir, nir_opt_dce);
+      NIR_PASS(progress, nir, nir_opt_dead_cf);
+      NIR_PASS(progress, nir, nir_opt_cse);
+      NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
+      NIR_PASS(progress, nir, nir_opt_algebraic);
+      NIR_PASS(progress, nir, nir_opt_constant_folding);
 
-                NIR_PASS(progress, nir, nir_copy_prop);
-                NIR_PASS(progress, nir, nir_opt_remove_phis);
-                NIR_PASS(progress, nir, nir_opt_dce);
-                NIR_PASS(progress, nir, nir_opt_dead_cf);
-                NIR_PASS(progress, nir, nir_opt_cse);
-                NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
-                NIR_PASS(progress, nir, nir_opt_algebraic);
-                NIR_PASS(progress, nir, nir_opt_constant_folding);
+      if (lower_flrp != 0) {
+         bool lower_flrp_progress = false;
+         NIR_PASS(lower_flrp_progress, nir, nir_lower_flrp, lower_flrp,
+                  false /* always_precise */);
+         if (lower_flrp_progress) {
+            NIR_PASS(progress, nir, nir_opt_constant_folding);
+            progress = true;
+         }
 
-                if (lower_flrp != 0) {
-                        bool lower_flrp_progress = false;
-                        NIR_PASS(lower_flrp_progress,
-                                 nir,
-                                 nir_lower_flrp,
-                                 lower_flrp,
-                                 false /* always_precise */);
-                        if (lower_flrp_progress) {
-                                NIR_PASS(progress, nir,
-                                         nir_opt_constant_folding);
-                                progress = true;
-                        }
+         /* Nothing should rematerialize any flrps, so we only
+          * need to do this lowering once.
+          */
+         lower_flrp = 0;
+      }
 
-                        /* Nothing should rematerialize any flrps, so we only
-                         * need to do this lowering once.
-                         */
-                        lower_flrp = 0;
-                }
+      NIR_PASS(progress, nir, nir_opt_undef);
+      NIR_PASS(progress, nir, nir_lower_undef_to_zero);
 
-                NIR_PASS(progress, nir, nir_opt_undef);
-                NIR_PASS(progress, nir, nir_lower_undef_to_zero);
+      NIR_PASS(progress, nir, nir_opt_loop_unroll);
 
-                NIR_PASS(progress, nir, nir_opt_loop_unroll);
+      NIR_PASS(progress, nir, nir_opt_vectorize, midgard_vectorize_filter,
+               NULL);
+   } while (progress);
 
-                NIR_PASS(progress, nir, nir_opt_vectorize,
-                         midgard_vectorize_filter, NULL);
-        } while (progress);
+   NIR_PASS_V(nir, nir_lower_alu_to_scalar, mdg_should_scalarize, NULL);
 
-        NIR_PASS_V(nir, nir_lower_alu_to_scalar, mdg_should_scalarize, NULL);
+   /* Run after opts so it can hit more */
+   if (!is_blend)
+      NIR_PASS(progress, nir, nir_fuse_io_16);
 
-        /* Run after opts so it can hit more */
-        if (!is_blend)
-                NIR_PASS(progress, nir, nir_fuse_io_16);
+   /* Must be run at the end to prevent creation of fsin/fcos ops */
+   NIR_PASS(progress, nir, midgard_nir_scale_trig);
 
-        /* Must be run at the end to prevent creation of fsin/fcos ops */
-        NIR_PASS(progress, nir, midgard_nir_scale_trig);
+   do {
+      progress = false;
 
-        do {
-                progress = false;
+      NIR_PASS(progress, nir, nir_opt_dce);
+      NIR_PASS(progress, nir, nir_opt_algebraic);
+      NIR_PASS(progress, nir, nir_opt_constant_folding);
+      NIR_PASS(progress, nir, nir_copy_prop);
+   } while (progress);
 
-                NIR_PASS(progress, nir, nir_opt_dce);
-                NIR_PASS(progress, nir, nir_opt_algebraic);
-                NIR_PASS(progress, nir, nir_opt_constant_folding);
-                NIR_PASS(progress, nir, nir_copy_prop);
-        } while (progress);
+   NIR_PASS(progress, nir, nir_opt_algebraic_late);
+   NIR_PASS(progress, nir, nir_opt_algebraic_distribute_src_mods);
 
-        NIR_PASS(progress, nir, nir_opt_algebraic_late);
-        NIR_PASS(progress, nir, nir_opt_algebraic_distribute_src_mods);
+   /* We implement booleans as 32-bit 0/~0 */
+   NIR_PASS(progress, nir, nir_lower_bool_to_int32);
 
-        /* We implement booleans as 32-bit 0/~0 */
-        NIR_PASS(progress, nir, nir_lower_bool_to_int32);
+   /* Now that booleans are lowered, we can run out late opts */
+   NIR_PASS(progress, nir, midgard_nir_lower_algebraic_late);
+   NIR_PASS(progress, nir, midgard_nir_cancel_inot);
 
-        /* Now that booleans are lowered, we can run out late opts */
-        NIR_PASS(progress, nir, midgard_nir_lower_algebraic_late);
-        NIR_PASS(progress, nir, midgard_nir_cancel_inot);
+   NIR_PASS(progress, nir, nir_copy_prop);
+   NIR_PASS(progress, nir, nir_opt_dce);
 
-        NIR_PASS(progress, nir, nir_copy_prop);
-        NIR_PASS(progress, nir, nir_opt_dce);
+   /* Backend scheduler is purely local, so do some global optimizations
+    * to reduce register pressure. */
+   nir_move_options move_all = nir_move_const_undef | nir_move_load_ubo |
+                               nir_move_load_input | nir_move_comparisons |
+                               nir_move_copies | nir_move_load_ssbo;
 
-        /* Backend scheduler is purely local, so do some global optimizations
-         * to reduce register pressure. */
-        nir_move_options move_all =
-                nir_move_const_undef | nir_move_load_ubo | nir_move_load_input |
-                nir_move_comparisons | nir_move_copies | nir_move_load_ssbo;
+   NIR_PASS_V(nir, nir_opt_sink, move_all);
+   NIR_PASS_V(nir, nir_opt_move, move_all);
 
-        NIR_PASS_V(nir, nir_opt_sink, move_all);
-        NIR_PASS_V(nir, nir_opt_move, move_all);
+   /* Take us out of SSA */
+   NIR_PASS(progress, nir, nir_lower_locals_to_regs);
+   NIR_PASS(progress, nir, nir_convert_from_ssa, true);
 
-        /* Take us out of SSA */
-        NIR_PASS(progress, nir, nir_lower_locals_to_regs);
-        NIR_PASS(progress, nir, nir_convert_from_ssa, true);
+   /* We are a vector architecture; write combine where possible */
+   NIR_PASS(progress, nir, nir_move_vec_src_uses_to_dest);
+   NIR_PASS(progress, nir, nir_lower_vec_to_movs, NULL, NULL);
 
-        /* We are a vector architecture; write combine where possible */
-        NIR_PASS(progress, nir, nir_move_vec_src_uses_to_dest);
-        NIR_PASS(progress, nir, nir_lower_vec_to_movs, NULL, NULL);
-
-        NIR_PASS(progress, nir, nir_opt_dce);
+   NIR_PASS(progress, nir, nir_opt_dce);
 }
 
 /* Do not actually emit a load; instead, cache the constant for inlining */
@@ -471,35 +469,37 @@ optimise_nir(nir_shader *nir, unsigned quirks, bool is_blend, bool is_blit)
 static void
 emit_load_const(compiler_context *ctx, nir_load_const_instr *instr)
 {
-        nir_ssa_def def = instr->def;
+   nir_ssa_def def = instr->def;
 
-        midgard_constants *consts = rzalloc(ctx, midgard_constants);
+   midgard_constants *consts = rzalloc(ctx, midgard_constants);
 
-        assert(instr->def.num_components * instr->def.bit_size <= sizeof(*consts) * 8);
+   assert(instr->def.num_components * instr->def.bit_size <=
+          sizeof(*consts) * 8);
 
-#define RAW_CONST_COPY(bits)                                         \
-        nir_const_value_to_array(consts->u##bits, instr->value,      \
-                                 instr->def.num_components, u##bits)
+#define RAW_CONST_COPY(bits)                                                   \
+   nir_const_value_to_array(consts->u##bits, instr->value,                     \
+                            instr->def.num_components, u##bits)
 
-        switch (instr->def.bit_size) {
-        case 64:
-                RAW_CONST_COPY(64);
-                break;
-        case 32:
-                RAW_CONST_COPY(32);
-                break;
-        case 16:
-                RAW_CONST_COPY(16);
-                break;
-        case 8:
-                RAW_CONST_COPY(8);
-                break;
-        default:
-                unreachable("Invalid bit_size for load_const instruction\n");
-        }
+   switch (instr->def.bit_size) {
+   case 64:
+      RAW_CONST_COPY(64);
+      break;
+   case 32:
+      RAW_CONST_COPY(32);
+      break;
+   case 16:
+      RAW_CONST_COPY(16);
+      break;
+   case 8:
+      RAW_CONST_COPY(8);
+      break;
+   default:
+      unreachable("Invalid bit_size for load_const instruction\n");
+   }
 
-        /* Shifted for SSA, +1 for off-by-one */
-        _mesa_hash_table_u64_insert(ctx->ssa_constants, (def.index << 1) + 1, consts);
+   /* Shifted for SSA, +1 for off-by-one */
+   _mesa_hash_table_u64_insert(ctx->ssa_constants, (def.index << 1) + 1,
+                               consts);
 }
 
 /* Normally constants are embedded implicitly, but for I/O and such we have to
@@ -508,100 +508,102 @@ emit_load_const(compiler_context *ctx, nir_load_const_instr *instr)
 static void
 emit_explicit_constant(compiler_context *ctx, unsigned node, unsigned to)
 {
-        void *constant_value = _mesa_hash_table_u64_search(ctx->ssa_constants, node + 1);
+   void *constant_value =
+      _mesa_hash_table_u64_search(ctx->ssa_constants, node + 1);
 
-        if (constant_value) {
-                midgard_instruction ins = v_mov(SSA_FIXED_REGISTER(REGISTER_CONSTANT), to);
-                attach_constants(ctx, &ins, constant_value, node + 1);
-                emit_mir_instruction(ctx, ins);
-        }
+   if (constant_value) {
+      midgard_instruction ins =
+         v_mov(SSA_FIXED_REGISTER(REGISTER_CONSTANT), to);
+      attach_constants(ctx, &ins, constant_value, node + 1);
+      emit_mir_instruction(ctx, ins);
+   }
 }
 
 static bool
 nir_is_non_scalar_swizzle(nir_alu_src *src, unsigned nr_components)
 {
-        unsigned comp = src->swizzle[0];
+   unsigned comp = src->swizzle[0];
 
-        for (unsigned c = 1; c < nr_components; ++c) {
-                if (src->swizzle[c] != comp)
-                        return true;
-        }
+   for (unsigned c = 1; c < nr_components; ++c) {
+      if (src->swizzle[c] != comp)
+         return true;
+   }
 
-        return false;
+   return false;
 }
 
-#define ATOMIC_CASE_IMPL(ctx, instr, nir, op, is_shared) \
-        case nir_intrinsic_##nir: \
-                emit_atomic(ctx, instr, is_shared, midgard_op_##op, ~0); \
-                break;
+#define ATOMIC_CASE_IMPL(ctx, instr, nir, op, is_shared)                       \
+   case nir_intrinsic_##nir:                                                   \
+      emit_atomic(ctx, instr, is_shared, midgard_op_##op, ~0);                 \
+      break;
 
-#define ATOMIC_CASE(ctx, instr, nir, op) \
-        ATOMIC_CASE_IMPL(ctx, instr, shared_atomic_##nir, atomic_##op, true); \
-        ATOMIC_CASE_IMPL(ctx, instr, global_atomic_##nir, atomic_##op, false);
+#define ATOMIC_CASE(ctx, instr, nir, op)                                       \
+   ATOMIC_CASE_IMPL(ctx, instr, shared_atomic_##nir, atomic_##op, true);       \
+   ATOMIC_CASE_IMPL(ctx, instr, global_atomic_##nir, atomic_##op, false);
 
-#define IMAGE_ATOMIC_CASE(ctx, instr, nir, op) \
-        case nir_intrinsic_image_atomic_##nir: { \
-                midgard_instruction ins = emit_image_op(ctx, instr, true); \
-                emit_atomic(ctx, instr, false, midgard_op_atomic_##op, ins.dest); \
-                break; \
-        }
+#define IMAGE_ATOMIC_CASE(ctx, instr, nir, op)                                 \
+   case nir_intrinsic_image_atomic_##nir: {                                    \
+      midgard_instruction ins = emit_image_op(ctx, instr, true);               \
+      emit_atomic(ctx, instr, false, midgard_op_atomic_##op, ins.dest);        \
+      break;                                                                   \
+   }
 
-#define ALU_CASE(nir, _op) \
-	case nir_op_##nir: \
-		op = midgard_alu_op_##_op; \
-                assert(src_bitsize == dst_bitsize); \
-		break;
+#define ALU_CASE(nir, _op)                                                     \
+   case nir_op_##nir:                                                          \
+      op = midgard_alu_op_##_op;                                               \
+      assert(src_bitsize == dst_bitsize);                                      \
+      break;
 
-#define ALU_CASE_RTZ(nir, _op) \
-	case nir_op_##nir: \
-		op = midgard_alu_op_##_op; \
-                roundmode = MIDGARD_RTZ; \
-		break;
+#define ALU_CASE_RTZ(nir, _op)                                                 \
+   case nir_op_##nir:                                                          \
+      op = midgard_alu_op_##_op;                                               \
+      roundmode = MIDGARD_RTZ;                                                 \
+      break;
 
-#define ALU_CHECK_CMP() \
-                assert(src_bitsize == 16 || src_bitsize == 32 || src_bitsize == 64); \
-                assert(dst_bitsize == 16 || dst_bitsize == 32); \
+#define ALU_CHECK_CMP()                                                        \
+   assert(src_bitsize == 16 || src_bitsize == 32 || src_bitsize == 64);        \
+   assert(dst_bitsize == 16 || dst_bitsize == 32);
 
-#define ALU_CASE_BCAST(nir, _op, count) \
-        case nir_op_##nir: \
-                op = midgard_alu_op_##_op; \
-                broadcast_swizzle = count; \
-                ALU_CHECK_CMP(); \
-                break;
+#define ALU_CASE_BCAST(nir, _op, count)                                        \
+   case nir_op_##nir:                                                          \
+      op = midgard_alu_op_##_op;                                               \
+      broadcast_swizzle = count;                                               \
+      ALU_CHECK_CMP();                                                         \
+      break;
 
-#define ALU_CASE_CMP(nir, _op) \
-	case nir_op_##nir: \
-		op = midgard_alu_op_##_op; \
-                ALU_CHECK_CMP(); \
-                break;
+#define ALU_CASE_CMP(nir, _op)                                                 \
+   case nir_op_##nir:                                                          \
+      op = midgard_alu_op_##_op;                                               \
+      ALU_CHECK_CMP();                                                         \
+      break;
 
 /* Compare mir_lower_invert */
 static bool
 nir_accepts_inot(nir_op op, unsigned src)
 {
-        switch (op) {
-        case nir_op_ior:
-        case nir_op_iand: /* TODO: b2f16 */
-        case nir_op_ixor:
-                return true;
-        case nir_op_b32csel:
-                /* Only the condition */
-                return (src == 0);
-        default:
-                return false;
-        }
+   switch (op) {
+   case nir_op_ior:
+   case nir_op_iand: /* TODO: b2f16 */
+   case nir_op_ixor:
+      return true;
+   case nir_op_b32csel:
+      /* Only the condition */
+      return (src == 0);
+   default:
+      return false;
+   }
 }
 
 static bool
 mir_accept_dest_mod(compiler_context *ctx, nir_dest **dest, nir_op op)
 {
-        if (pan_has_dest_mod(dest, op)) {
-                assert((*dest)->is_ssa);
-                BITSET_SET(ctx->already_emitted, (*dest)->ssa.index);
-                return true;
-        }
+   if (pan_has_dest_mod(dest, op)) {
+      assert((*dest)->is_ssa);
+      BITSET_SET(ctx->already_emitted, (*dest)->ssa.index);
+      return true;
+   }
 
-        return false;
+   return false;
 }
 
 /* Look for floating point mods. We have the mods clamp_m1_1, clamp_0_1,
@@ -618,77 +620,82 @@ mir_accept_dest_mod(compiler_context *ctx, nir_dest **dest, nir_op op)
  * clamp_m1_1 alone.
  */
 static unsigned
-mir_determine_float_outmod(compiler_context *ctx, nir_dest **dest, unsigned prior_outmod)
+mir_determine_float_outmod(compiler_context *ctx, nir_dest **dest,
+                           unsigned prior_outmod)
 {
-        bool clamp_0_inf = mir_accept_dest_mod(ctx, dest, nir_op_fclamp_pos_mali);
-        bool clamp_0_1 = mir_accept_dest_mod(ctx, dest, nir_op_fsat);
-        bool clamp_m1_1 = mir_accept_dest_mod(ctx, dest, nir_op_fsat_signed_mali);
-        bool prior = (prior_outmod != midgard_outmod_none);
-        int count = (int) prior + (int) clamp_0_inf + (int) clamp_0_1 + (int) clamp_m1_1;
+   bool clamp_0_inf = mir_accept_dest_mod(ctx, dest, nir_op_fclamp_pos_mali);
+   bool clamp_0_1 = mir_accept_dest_mod(ctx, dest, nir_op_fsat);
+   bool clamp_m1_1 = mir_accept_dest_mod(ctx, dest, nir_op_fsat_signed_mali);
+   bool prior = (prior_outmod != midgard_outmod_none);
+   int count = (int)prior + (int)clamp_0_inf + (int)clamp_0_1 + (int)clamp_m1_1;
 
-        return ((count > 1) || clamp_0_1) ?  midgard_outmod_clamp_0_1 :
-                                clamp_0_inf ? midgard_outmod_clamp_0_inf :
-                                clamp_m1_1 ?   midgard_outmod_clamp_m1_1 :
-                                prior_outmod;
+   return ((count > 1) || clamp_0_1) ? midgard_outmod_clamp_0_1
+          : clamp_0_inf              ? midgard_outmod_clamp_0_inf
+          : clamp_m1_1               ? midgard_outmod_clamp_m1_1
+                                     : prior_outmod;
 }
 
 static void
-mir_copy_src(midgard_instruction *ins, nir_alu_instr *instr, unsigned i, unsigned to, bool *abs, bool *neg, bool *not, enum midgard_roundmode *roundmode, bool is_int, unsigned bcast_count)
+mir_copy_src(midgard_instruction *ins, nir_alu_instr *instr, unsigned i,
+             unsigned to, bool *abs, bool *neg, bool * not,
+             enum midgard_roundmode *roundmode, bool is_int,
+             unsigned bcast_count)
 {
-        nir_alu_src src = instr->src[i];
+   nir_alu_src src = instr->src[i];
 
-        if (!is_int) {
-                if (pan_has_source_mod(&src, nir_op_fneg))
-                        *neg = !(*neg);
+   if (!is_int) {
+      if (pan_has_source_mod(&src, nir_op_fneg))
+         *neg = !(*neg);
 
-                if (pan_has_source_mod(&src, nir_op_fabs))
-                        *abs = true;
-        }
+      if (pan_has_source_mod(&src, nir_op_fabs))
+         *abs = true;
+   }
 
-        if (nir_accepts_inot(instr->op, i) && pan_has_source_mod(&src, nir_op_inot))
-                *not = true;
+   if (nir_accepts_inot(instr->op, i) && pan_has_source_mod(&src, nir_op_inot))
+      *not = true;
 
-        if (roundmode) {
-                if (pan_has_source_mod(&src, nir_op_fround_even))
-                        *roundmode = MIDGARD_RTE;
+   if (roundmode) {
+      if (pan_has_source_mod(&src, nir_op_fround_even))
+         *roundmode = MIDGARD_RTE;
 
-                if (pan_has_source_mod(&src, nir_op_ftrunc))
-                        *roundmode = MIDGARD_RTZ;
+      if (pan_has_source_mod(&src, nir_op_ftrunc))
+         *roundmode = MIDGARD_RTZ;
 
-                if (pan_has_source_mod(&src, nir_op_ffloor))
-                        *roundmode = MIDGARD_RTN;
+      if (pan_has_source_mod(&src, nir_op_ffloor))
+         *roundmode = MIDGARD_RTN;
 
-                if (pan_has_source_mod(&src, nir_op_fceil))
-                        *roundmode = MIDGARD_RTP;
-        }
+      if (pan_has_source_mod(&src, nir_op_fceil))
+         *roundmode = MIDGARD_RTP;
+   }
 
-        unsigned bits = nir_src_bit_size(src.src);
+   unsigned bits = nir_src_bit_size(src.src);
 
-        ins->src[to] = nir_src_index(NULL, &src.src);
-        ins->src_types[to] = nir_op_infos[instr->op].input_types[i] | bits;
+   ins->src[to] = nir_src_index(NULL, &src.src);
+   ins->src_types[to] = nir_op_infos[instr->op].input_types[i] | bits;
 
-        /* Figure out which component we should fill unused channels with. This
-         * doesn't matter too much in the non-broadcast case, but it makes
-         * should that scalar sources are packed with replicated swizzles,
-         * which works around issues seen with the combination of source
-         * expansion and destination shrinking.
-         */
-        unsigned replicate_c = 0;
-        if (bcast_count) {
-                replicate_c = bcast_count - 1;
-        } else {
-                for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; ++c) {
-                        if (nir_alu_instr_channel_used(instr, i, c))
-                                replicate_c = c;
-                }
-        }
+   /* Figure out which component we should fill unused channels with. This
+    * doesn't matter too much in the non-broadcast case, but it makes
+    * should that scalar sources are packed with replicated swizzles,
+    * which works around issues seen with the combination of source
+    * expansion and destination shrinking.
+    */
+   unsigned replicate_c = 0;
+   if (bcast_count) {
+      replicate_c = bcast_count - 1;
+   } else {
+      for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; ++c) {
+         if (nir_alu_instr_channel_used(instr, i, c))
+            replicate_c = c;
+      }
+   }
 
-        for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; ++c) {
-                ins->swizzle[to][c] = src.swizzle[
-                        ((!bcast_count || c < bcast_count) &&
-                          nir_alu_instr_channel_used(instr, i, c)) ?
-                        c : replicate_c];
-        }
+   for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; ++c) {
+      ins->swizzle[to][c] =
+         src.swizzle[((!bcast_count || c < bcast_count) &&
+                      nir_alu_instr_channel_used(instr, i, c))
+                        ? c
+                        : replicate_c];
+   }
 }
 
 /* Midgard features both fcsel and icsel, depending on whether you want int or
@@ -699,480 +706,474 @@ mir_copy_src(midgard_instruction *ins, nir_alu_instr *instr, unsigned i, unsigne
 static bool
 mir_is_bcsel_float(nir_alu_instr *instr)
 {
-        nir_op intmods[] = {
-                nir_op_i2i8, nir_op_i2i16,
-                nir_op_i2i32, nir_op_i2i64
-        };
+   nir_op intmods[] = {nir_op_i2i8, nir_op_i2i16, nir_op_i2i32, nir_op_i2i64};
 
-        nir_op floatmods[] = {
-                nir_op_fabs, nir_op_fneg,
-                nir_op_f2f16, nir_op_f2f32,
-                nir_op_f2f64
-        };
+   nir_op floatmods[] = {nir_op_fabs, nir_op_fneg, nir_op_f2f16, nir_op_f2f32,
+                         nir_op_f2f64};
 
-        nir_op floatdestmods[] = {
-                nir_op_fsat, nir_op_fsat_signed_mali, nir_op_fclamp_pos_mali,
-                nir_op_f2f16, nir_op_f2f32
-        };
+   nir_op floatdestmods[] = {nir_op_fsat, nir_op_fsat_signed_mali,
+                             nir_op_fclamp_pos_mali, nir_op_f2f16,
+                             nir_op_f2f32};
 
-        signed score = 0;
+   signed score = 0;
 
-        for (unsigned i = 1; i < 3; ++i) {
-                nir_alu_src s = instr->src[i];
-                for (unsigned q = 0; q < ARRAY_SIZE(intmods); ++q) {
-                        if (pan_has_source_mod(&s, intmods[q]))
-                                score--;
-                }
-        }
+   for (unsigned i = 1; i < 3; ++i) {
+      nir_alu_src s = instr->src[i];
+      for (unsigned q = 0; q < ARRAY_SIZE(intmods); ++q) {
+         if (pan_has_source_mod(&s, intmods[q]))
+            score--;
+      }
+   }
 
-        for (unsigned i = 1; i < 3; ++i) {
-                nir_alu_src s = instr->src[i];
-                for (unsigned q = 0; q < ARRAY_SIZE(floatmods); ++q) {
-                        if (pan_has_source_mod(&s, floatmods[q]))
-                                score++;
-                }
-        }
+   for (unsigned i = 1; i < 3; ++i) {
+      nir_alu_src s = instr->src[i];
+      for (unsigned q = 0; q < ARRAY_SIZE(floatmods); ++q) {
+         if (pan_has_source_mod(&s, floatmods[q]))
+            score++;
+      }
+   }
 
-        for (unsigned q = 0; q < ARRAY_SIZE(floatdestmods); ++q) {
-                nir_dest *dest = &instr->dest.dest;
-                if (pan_has_dest_mod(&dest, floatdestmods[q]))
-                        score++;
-        }
+   for (unsigned q = 0; q < ARRAY_SIZE(floatdestmods); ++q) {
+      nir_dest *dest = &instr->dest.dest;
+      if (pan_has_dest_mod(&dest, floatdestmods[q]))
+         score++;
+   }
 
-        return (score > 0);
+   return (score > 0);
 }
 
 static void
 emit_alu(compiler_context *ctx, nir_alu_instr *instr)
 {
-        nir_dest *dest = &instr->dest.dest;
+   nir_dest *dest = &instr->dest.dest;
 
-        if (dest->is_ssa && BITSET_TEST(ctx->already_emitted, dest->ssa.index))
-                return;
+   if (dest->is_ssa && BITSET_TEST(ctx->already_emitted, dest->ssa.index))
+      return;
 
-        /* Derivatives end up emitted on the texture pipe, not the ALUs. This
-         * is handled elsewhere */
+   /* Derivatives end up emitted on the texture pipe, not the ALUs. This
+    * is handled elsewhere */
 
-        if (instr->op == nir_op_fddx || instr->op == nir_op_fddy) {
-                midgard_emit_derivatives(ctx, instr);
-                return;
-        }
+   if (instr->op == nir_op_fddx || instr->op == nir_op_fddy) {
+      midgard_emit_derivatives(ctx, instr);
+      return;
+   }
 
-        bool is_ssa = dest->is_ssa;
+   bool is_ssa = dest->is_ssa;
 
-        unsigned nr_components = nir_dest_num_components(*dest);
-        unsigned nr_inputs = nir_op_infos[instr->op].num_inputs;
-        unsigned op = 0;
+   unsigned nr_components = nir_dest_num_components(*dest);
+   unsigned nr_inputs = nir_op_infos[instr->op].num_inputs;
+   unsigned op = 0;
 
-        /* Number of components valid to check for the instruction (the rest
-         * will be forced to the last), or 0 to use as-is. Relevant as
-         * ball-type instructions have a channel count in NIR but are all vec4
-         * in Midgard */
+   /* Number of components valid to check for the instruction (the rest
+    * will be forced to the last), or 0 to use as-is. Relevant as
+    * ball-type instructions have a channel count in NIR but are all vec4
+    * in Midgard */
 
-        unsigned broadcast_swizzle = 0;
+   unsigned broadcast_swizzle = 0;
 
-        /* Should we swap arguments? */
-        bool flip_src12 = false;
+   /* Should we swap arguments? */
+   bool flip_src12 = false;
 
-        ASSERTED unsigned src_bitsize = nir_src_bit_size(instr->src[0].src);
-        ASSERTED unsigned dst_bitsize = nir_dest_bit_size(*dest);
+   ASSERTED unsigned src_bitsize = nir_src_bit_size(instr->src[0].src);
+   ASSERTED unsigned dst_bitsize = nir_dest_bit_size(*dest);
 
-        enum midgard_roundmode roundmode = MIDGARD_RTE;
+   enum midgard_roundmode roundmode = MIDGARD_RTE;
 
-        switch (instr->op) {
-                ALU_CASE(fadd, fadd);
-                ALU_CASE(fmul, fmul);
-                ALU_CASE(fmin, fmin);
-                ALU_CASE(fmax, fmax);
-                ALU_CASE(imin, imin);
-                ALU_CASE(imax, imax);
-                ALU_CASE(umin, umin);
-                ALU_CASE(umax, umax);
-                ALU_CASE(ffloor, ffloor);
-                ALU_CASE(fround_even, froundeven);
-                ALU_CASE(ftrunc, ftrunc);
-                ALU_CASE(fceil, fceil);
-                ALU_CASE(fdot3, fdot3);
-                ALU_CASE(fdot4, fdot4);
-                ALU_CASE(iadd, iadd);
-                ALU_CASE(isub, isub);
-                ALU_CASE(iadd_sat, iaddsat);
-                ALU_CASE(isub_sat, isubsat);
-                ALU_CASE(uadd_sat, uaddsat);
-                ALU_CASE(usub_sat, usubsat);
-                ALU_CASE(imul, imul);
-                ALU_CASE(imul_high, imul);
-                ALU_CASE(umul_high, imul);
-                ALU_CASE(uclz, iclz);
+   switch (instr->op) {
+      ALU_CASE(fadd, fadd);
+      ALU_CASE(fmul, fmul);
+      ALU_CASE(fmin, fmin);
+      ALU_CASE(fmax, fmax);
+      ALU_CASE(imin, imin);
+      ALU_CASE(imax, imax);
+      ALU_CASE(umin, umin);
+      ALU_CASE(umax, umax);
+      ALU_CASE(ffloor, ffloor);
+      ALU_CASE(fround_even, froundeven);
+      ALU_CASE(ftrunc, ftrunc);
+      ALU_CASE(fceil, fceil);
+      ALU_CASE(fdot3, fdot3);
+      ALU_CASE(fdot4, fdot4);
+      ALU_CASE(iadd, iadd);
+      ALU_CASE(isub, isub);
+      ALU_CASE(iadd_sat, iaddsat);
+      ALU_CASE(isub_sat, isubsat);
+      ALU_CASE(uadd_sat, uaddsat);
+      ALU_CASE(usub_sat, usubsat);
+      ALU_CASE(imul, imul);
+      ALU_CASE(imul_high, imul);
+      ALU_CASE(umul_high, imul);
+      ALU_CASE(uclz, iclz);
 
-                /* Zero shoved as second-arg */
-                ALU_CASE(iabs, iabsdiff);
+      /* Zero shoved as second-arg */
+      ALU_CASE(iabs, iabsdiff);
 
-                ALU_CASE(uabs_isub, iabsdiff);
-                ALU_CASE(uabs_usub, uabsdiff);
+      ALU_CASE(uabs_isub, iabsdiff);
+      ALU_CASE(uabs_usub, uabsdiff);
 
-                ALU_CASE(mov, imov);
+      ALU_CASE(mov, imov);
 
-                ALU_CASE_CMP(feq32, feq);
-                ALU_CASE_CMP(fneu32, fne);
-                ALU_CASE_CMP(flt32, flt);
-                ALU_CASE_CMP(ieq32, ieq);
-                ALU_CASE_CMP(ine32, ine);
-                ALU_CASE_CMP(ilt32, ilt);
-                ALU_CASE_CMP(ult32, ult);
+      ALU_CASE_CMP(feq32, feq);
+      ALU_CASE_CMP(fneu32, fne);
+      ALU_CASE_CMP(flt32, flt);
+      ALU_CASE_CMP(ieq32, ieq);
+      ALU_CASE_CMP(ine32, ine);
+      ALU_CASE_CMP(ilt32, ilt);
+      ALU_CASE_CMP(ult32, ult);
 
-                /* We don't have a native b2f32 instruction. Instead, like many
-                 * GPUs, we exploit booleans as 0/~0 for false/true, and
-                 * correspondingly AND
-                 * by 1.0 to do the type conversion. For the moment, prime us
-                 * to emit:
-                 *
-                 * iand [whatever], #0
-                 *
-                 * At the end of emit_alu (as MIR), we'll fix-up the constant
-                 */
+      /* We don't have a native b2f32 instruction. Instead, like many
+       * GPUs, we exploit booleans as 0/~0 for false/true, and
+       * correspondingly AND
+       * by 1.0 to do the type conversion. For the moment, prime us
+       * to emit:
+       *
+       * iand [whatever], #0
+       *
+       * At the end of emit_alu (as MIR), we'll fix-up the constant
+       */
 
-                ALU_CASE_CMP(b2f32, iand);
-                ALU_CASE_CMP(b2f16, iand);
-                ALU_CASE_CMP(b2i32, iand);
+      ALU_CASE_CMP(b2f32, iand);
+      ALU_CASE_CMP(b2f16, iand);
+      ALU_CASE_CMP(b2i32, iand);
 
-                /* Likewise, we don't have a dedicated f2b32 instruction, but
-                 * we can do a "not equal to 0.0" test. */
+      /* Likewise, we don't have a dedicated f2b32 instruction, but
+       * we can do a "not equal to 0.0" test. */
 
-                ALU_CASE_CMP(f2b32, fne);
+      ALU_CASE_CMP(f2b32, fne);
 
-                ALU_CASE(frcp, frcp);
-                ALU_CASE(frsq, frsqrt);
-                ALU_CASE(fsqrt, fsqrt);
-                ALU_CASE(fexp2, fexp2);
-                ALU_CASE(flog2, flog2);
+      ALU_CASE(frcp, frcp);
+      ALU_CASE(frsq, frsqrt);
+      ALU_CASE(fsqrt, fsqrt);
+      ALU_CASE(fexp2, fexp2);
+      ALU_CASE(flog2, flog2);
 
-                ALU_CASE_RTZ(f2i64, f2i_rte);
-                ALU_CASE_RTZ(f2u64, f2u_rte);
-                ALU_CASE_RTZ(i2f64, i2f_rte);
-                ALU_CASE_RTZ(u2f64, u2f_rte);
+      ALU_CASE_RTZ(f2i64, f2i_rte);
+      ALU_CASE_RTZ(f2u64, f2u_rte);
+      ALU_CASE_RTZ(i2f64, i2f_rte);
+      ALU_CASE_RTZ(u2f64, u2f_rte);
 
-                ALU_CASE_RTZ(f2i32, f2i_rte);
-                ALU_CASE_RTZ(f2u32, f2u_rte);
-                ALU_CASE_RTZ(i2f32, i2f_rte);
-                ALU_CASE_RTZ(u2f32, u2f_rte);
+      ALU_CASE_RTZ(f2i32, f2i_rte);
+      ALU_CASE_RTZ(f2u32, f2u_rte);
+      ALU_CASE_RTZ(i2f32, i2f_rte);
+      ALU_CASE_RTZ(u2f32, u2f_rte);
 
-                ALU_CASE_RTZ(f2i8, f2i_rte);
-                ALU_CASE_RTZ(f2u8, f2u_rte);
+      ALU_CASE_RTZ(f2i8, f2i_rte);
+      ALU_CASE_RTZ(f2u8, f2u_rte);
 
-                ALU_CASE_RTZ(f2i16, f2i_rte);
-                ALU_CASE_RTZ(f2u16, f2u_rte);
-                ALU_CASE_RTZ(i2f16, i2f_rte);
-                ALU_CASE_RTZ(u2f16, u2f_rte);
+      ALU_CASE_RTZ(f2i16, f2i_rte);
+      ALU_CASE_RTZ(f2u16, f2u_rte);
+      ALU_CASE_RTZ(i2f16, i2f_rte);
+      ALU_CASE_RTZ(u2f16, u2f_rte);
 
-                ALU_CASE(fsin, fsinpi);
-                ALU_CASE(fcos, fcospi);
+      ALU_CASE(fsin, fsinpi);
+      ALU_CASE(fcos, fcospi);
 
-                /* We'll get 0 in the second arg, so:
-                 * ~a = ~(a | 0) = nor(a, 0) */
-                ALU_CASE(inot, inor);
-                ALU_CASE(iand, iand);
-                ALU_CASE(ior, ior);
-                ALU_CASE(ixor, ixor);
-                ALU_CASE(ishl, ishl);
-                ALU_CASE(ishr, iasr);
-                ALU_CASE(ushr, ilsr);
+      /* We'll get 0 in the second arg, so:
+       * ~a = ~(a | 0) = nor(a, 0) */
+      ALU_CASE(inot, inor);
+      ALU_CASE(iand, iand);
+      ALU_CASE(ior, ior);
+      ALU_CASE(ixor, ixor);
+      ALU_CASE(ishl, ishl);
+      ALU_CASE(ishr, iasr);
+      ALU_CASE(ushr, ilsr);
 
-                ALU_CASE_BCAST(b32all_fequal2, fball_eq, 2);
-                ALU_CASE_BCAST(b32all_fequal3, fball_eq, 3);
-                ALU_CASE_CMP(b32all_fequal4, fball_eq);
+      ALU_CASE_BCAST(b32all_fequal2, fball_eq, 2);
+      ALU_CASE_BCAST(b32all_fequal3, fball_eq, 3);
+      ALU_CASE_CMP(b32all_fequal4, fball_eq);
 
-                ALU_CASE_BCAST(b32any_fnequal2, fbany_neq, 2);
-                ALU_CASE_BCAST(b32any_fnequal3, fbany_neq, 3);
-                ALU_CASE_CMP(b32any_fnequal4, fbany_neq);
+      ALU_CASE_BCAST(b32any_fnequal2, fbany_neq, 2);
+      ALU_CASE_BCAST(b32any_fnequal3, fbany_neq, 3);
+      ALU_CASE_CMP(b32any_fnequal4, fbany_neq);
 
-                ALU_CASE_BCAST(b32all_iequal2, iball_eq, 2);
-                ALU_CASE_BCAST(b32all_iequal3, iball_eq, 3);
-                ALU_CASE_CMP(b32all_iequal4, iball_eq);
+      ALU_CASE_BCAST(b32all_iequal2, iball_eq, 2);
+      ALU_CASE_BCAST(b32all_iequal3, iball_eq, 3);
+      ALU_CASE_CMP(b32all_iequal4, iball_eq);
 
-                ALU_CASE_BCAST(b32any_inequal2, ibany_neq, 2);
-                ALU_CASE_BCAST(b32any_inequal3, ibany_neq, 3);
-                ALU_CASE_CMP(b32any_inequal4, ibany_neq);
+      ALU_CASE_BCAST(b32any_inequal2, ibany_neq, 2);
+      ALU_CASE_BCAST(b32any_inequal3, ibany_neq, 3);
+      ALU_CASE_CMP(b32any_inequal4, ibany_neq);
 
-                /* Source mods will be shoved in later */
-                ALU_CASE(fabs, fmov);
-                ALU_CASE(fneg, fmov);
-                ALU_CASE(fsat, fmov);
-                ALU_CASE(fsat_signed_mali, fmov);
-                ALU_CASE(fclamp_pos_mali, fmov);
+      /* Source mods will be shoved in later */
+      ALU_CASE(fabs, fmov);
+      ALU_CASE(fneg, fmov);
+      ALU_CASE(fsat, fmov);
+      ALU_CASE(fsat_signed_mali, fmov);
+      ALU_CASE(fclamp_pos_mali, fmov);
 
-        /* For size conversion, we use a move. Ideally though we would squash
-         * these ops together; maybe that has to happen after in NIR as part of
-         * propagation...? An earlier algebraic pass ensured we step down by
-         * only / exactly one size. If stepping down, we use a dest override to
-         * reduce the size; if stepping up, we use a larger-sized move with a
-         * half source and a sign/zero-extension modifier */
+      /* For size conversion, we use a move. Ideally though we would squash
+       * these ops together; maybe that has to happen after in NIR as part of
+       * propagation...? An earlier algebraic pass ensured we step down by
+       * only / exactly one size. If stepping down, we use a dest override to
+       * reduce the size; if stepping up, we use a larger-sized move with a
+       * half source and a sign/zero-extension modifier */
 
-        case nir_op_i2i8:
-        case nir_op_i2i16:
-        case nir_op_i2i32:
-        case nir_op_i2i64:
-        case nir_op_u2u8:
-        case nir_op_u2u16:
-        case nir_op_u2u32:
-        case nir_op_u2u64:
-        case nir_op_f2f16:
-        case nir_op_f2f32:
-        case nir_op_f2f64: {
-                if (instr->op == nir_op_f2f16 || instr->op == nir_op_f2f32 ||
-                    instr->op == nir_op_f2f64)
-                        op = midgard_alu_op_fmov;
-                else
-                        op = midgard_alu_op_imov;
+   case nir_op_i2i8:
+   case nir_op_i2i16:
+   case nir_op_i2i32:
+   case nir_op_i2i64:
+   case nir_op_u2u8:
+   case nir_op_u2u16:
+   case nir_op_u2u32:
+   case nir_op_u2u64:
+   case nir_op_f2f16:
+   case nir_op_f2f32:
+   case nir_op_f2f64: {
+      if (instr->op == nir_op_f2f16 || instr->op == nir_op_f2f32 ||
+          instr->op == nir_op_f2f64)
+         op = midgard_alu_op_fmov;
+      else
+         op = midgard_alu_op_imov;
 
-                break;
-        }
+      break;
+   }
 
-        /* For greater-or-equal, we lower to less-or-equal and flip the
-         * arguments */
+      /* For greater-or-equal, we lower to less-or-equal and flip the
+       * arguments */
 
-        case nir_op_fge:
-        case nir_op_fge32:
-        case nir_op_ige32:
-        case nir_op_uge32: {
-                op =
-                        instr->op == nir_op_fge   ? midgard_alu_op_fle :
-                        instr->op == nir_op_fge32 ? midgard_alu_op_fle :
-                        instr->op == nir_op_ige32 ? midgard_alu_op_ile :
-                        instr->op == nir_op_uge32 ? midgard_alu_op_ule :
-                        0;
+   case nir_op_fge:
+   case nir_op_fge32:
+   case nir_op_ige32:
+   case nir_op_uge32: {
+      op = instr->op == nir_op_fge     ? midgard_alu_op_fle
+           : instr->op == nir_op_fge32 ? midgard_alu_op_fle
+           : instr->op == nir_op_ige32 ? midgard_alu_op_ile
+           : instr->op == nir_op_uge32 ? midgard_alu_op_ule
+                                       : 0;
 
-                flip_src12 = true;
-                ALU_CHECK_CMP();
-                break;
-        }
+      flip_src12 = true;
+      ALU_CHECK_CMP();
+      break;
+   }
 
-        case nir_op_b32csel: {
-                bool mixed = nir_is_non_scalar_swizzle(&instr->src[0], nr_components);
-                bool is_float = mir_is_bcsel_float(instr);
-                op = is_float ?
-                        (mixed ? midgard_alu_op_fcsel_v : midgard_alu_op_fcsel) :
-                        (mixed ? midgard_alu_op_icsel_v : midgard_alu_op_icsel);
+   case nir_op_b32csel: {
+      bool mixed = nir_is_non_scalar_swizzle(&instr->src[0], nr_components);
+      bool is_float = mir_is_bcsel_float(instr);
+      op = is_float ? (mixed ? midgard_alu_op_fcsel_v : midgard_alu_op_fcsel)
+                    : (mixed ? midgard_alu_op_icsel_v : midgard_alu_op_icsel);
 
-                break;
-        }
+      break;
+   }
 
-        case nir_op_unpack_32_2x16:
-        case nir_op_unpack_32_4x8:
-        case nir_op_pack_32_2x16:
-        case nir_op_pack_32_4x8: {
-                op = midgard_alu_op_imov;
-                break;
-        }
+   case nir_op_unpack_32_2x16:
+   case nir_op_unpack_32_4x8:
+   case nir_op_pack_32_2x16:
+   case nir_op_pack_32_4x8: {
+      op = midgard_alu_op_imov;
+      break;
+   }
 
-        default:
-                mesa_loge("Unhandled ALU op %s\n", nir_op_infos[instr->op].name);
-                assert(0);
-                return;
-        }
+   default:
+      mesa_loge("Unhandled ALU op %s\n", nir_op_infos[instr->op].name);
+      assert(0);
+      return;
+   }
 
-        /* Promote imov to fmov if it might help inline a constant */
-        if (op == midgard_alu_op_imov && nir_src_is_const(instr->src[0].src)
-                        && nir_src_bit_size(instr->src[0].src) == 32
-                        && nir_is_same_comp_swizzle(instr->src[0].swizzle,
+   /* Promote imov to fmov if it might help inline a constant */
+   if (op == midgard_alu_op_imov && nir_src_is_const(instr->src[0].src) &&
+       nir_src_bit_size(instr->src[0].src) == 32 &&
+       nir_is_same_comp_swizzle(instr->src[0].swizzle,
                                 nir_src_num_components(instr->src[0].src))) {
-                op = midgard_alu_op_fmov;
-        }
+      op = midgard_alu_op_fmov;
+   }
 
-        /* Midgard can perform certain modifiers on output of an ALU op */
+   /* Midgard can perform certain modifiers on output of an ALU op */
 
-        unsigned outmod = 0;
-        bool is_int = midgard_is_integer_op(op);
+   unsigned outmod = 0;
+   bool is_int = midgard_is_integer_op(op);
 
-        if (instr->op == nir_op_umul_high || instr->op == nir_op_imul_high) {
-                outmod = midgard_outmod_keephi;
-        } else if (midgard_is_integer_out_op(op)) {
-                outmod = midgard_outmod_keeplo;
-        } else if (instr->op == nir_op_fsat) {
-                outmod = midgard_outmod_clamp_0_1;
-        } else if (instr->op == nir_op_fsat_signed_mali) {
-                outmod = midgard_outmod_clamp_m1_1;
-        } else if (instr->op == nir_op_fclamp_pos_mali) {
-                outmod = midgard_outmod_clamp_0_inf;
-        }
+   if (instr->op == nir_op_umul_high || instr->op == nir_op_imul_high) {
+      outmod = midgard_outmod_keephi;
+   } else if (midgard_is_integer_out_op(op)) {
+      outmod = midgard_outmod_keeplo;
+   } else if (instr->op == nir_op_fsat) {
+      outmod = midgard_outmod_clamp_0_1;
+   } else if (instr->op == nir_op_fsat_signed_mali) {
+      outmod = midgard_outmod_clamp_m1_1;
+   } else if (instr->op == nir_op_fclamp_pos_mali) {
+      outmod = midgard_outmod_clamp_0_inf;
+   }
 
-        /* Fetch unit, quirks, etc information */
-        unsigned opcode_props = alu_opcode_props[op].props;
-        bool quirk_flipped_r24 = opcode_props & QUIRK_FLIPPED_R24;
+   /* Fetch unit, quirks, etc information */
+   unsigned opcode_props = alu_opcode_props[op].props;
+   bool quirk_flipped_r24 = opcode_props & QUIRK_FLIPPED_R24;
 
-        if (!midgard_is_integer_out_op(op)) {
-                outmod = mir_determine_float_outmod(ctx, &dest, outmod);
-        }
+   if (!midgard_is_integer_out_op(op)) {
+      outmod = mir_determine_float_outmod(ctx, &dest, outmod);
+   }
 
-        midgard_instruction ins = {
-                .type = TAG_ALU_4,
-                .dest = nir_dest_index(dest),
-                .dest_type = nir_op_infos[instr->op].output_type
-                        | nir_dest_bit_size(*dest),
-                .roundmode = roundmode,
-        };
+   midgard_instruction ins = {
+      .type = TAG_ALU_4,
+      .dest = nir_dest_index(dest),
+      .dest_type =
+         nir_op_infos[instr->op].output_type | nir_dest_bit_size(*dest),
+      .roundmode = roundmode,
+   };
 
-        enum midgard_roundmode *roundptr = (opcode_props & MIDGARD_ROUNDS) ?
-                &ins.roundmode : NULL;
+   enum midgard_roundmode *roundptr =
+      (opcode_props & MIDGARD_ROUNDS) ? &ins.roundmode : NULL;
 
-        for (unsigned i = nr_inputs; i < ARRAY_SIZE(ins.src); ++i)
-                ins.src[i] = ~0;
+   for (unsigned i = nr_inputs; i < ARRAY_SIZE(ins.src); ++i)
+      ins.src[i] = ~0;
 
-        if (quirk_flipped_r24) {
-                ins.src[0] = ~0;
-                mir_copy_src(&ins, instr, 0, 1, &ins.src_abs[1], &ins.src_neg[1], &ins.src_invert[1], roundptr, is_int, broadcast_swizzle);
-        } else {
-                for (unsigned i = 0; i < nr_inputs; ++i) {
-                        unsigned to = i;
+   if (quirk_flipped_r24) {
+      ins.src[0] = ~0;
+      mir_copy_src(&ins, instr, 0, 1, &ins.src_abs[1], &ins.src_neg[1],
+                   &ins.src_invert[1], roundptr, is_int, broadcast_swizzle);
+   } else {
+      for (unsigned i = 0; i < nr_inputs; ++i) {
+         unsigned to = i;
 
-                        if (instr->op == nir_op_b32csel) {
-                                /* The condition is the first argument; move
-                                 * the other arguments up one to be a binary
-                                 * instruction for Midgard with the condition
-                                 * last */
+         if (instr->op == nir_op_b32csel) {
+            /* The condition is the first argument; move
+             * the other arguments up one to be a binary
+             * instruction for Midgard with the condition
+             * last */
 
-                                if (i == 0)
-                                        to = 2;
-                                else if (flip_src12)
-                                        to = 2 - i;
-                                else
-                                        to = i - 1;
-                        } else if (flip_src12) {
-                                to = 1 - to;
-                        }
+            if (i == 0)
+               to = 2;
+            else if (flip_src12)
+               to = 2 - i;
+            else
+               to = i - 1;
+         } else if (flip_src12) {
+            to = 1 - to;
+         }
 
-                        mir_copy_src(&ins, instr, i, to, &ins.src_abs[to], &ins.src_neg[to], &ins.src_invert[to], roundptr, is_int, broadcast_swizzle);
+         mir_copy_src(&ins, instr, i, to, &ins.src_abs[to], &ins.src_neg[to],
+                      &ins.src_invert[to], roundptr, is_int, broadcast_swizzle);
 
-                        /* (!c) ? a : b = c ? b : a */
-                        if (instr->op == nir_op_b32csel && ins.src_invert[2]) {
-                                ins.src_invert[2] = false;
-                                flip_src12 ^= true;
-                        }
-                }
-        }
+         /* (!c) ? a : b = c ? b : a */
+         if (instr->op == nir_op_b32csel && ins.src_invert[2]) {
+            ins.src_invert[2] = false;
+            flip_src12 ^= true;
+         }
+      }
+   }
 
-        if (instr->op == nir_op_fneg || instr->op == nir_op_fabs) {
-                /* Lowered to move */
-                if (instr->op == nir_op_fneg)
-                        ins.src_neg[1] ^= true;
+   if (instr->op == nir_op_fneg || instr->op == nir_op_fabs) {
+      /* Lowered to move */
+      if (instr->op == nir_op_fneg)
+         ins.src_neg[1] ^= true;
 
-                if (instr->op == nir_op_fabs)
-                        ins.src_abs[1] = true;
-        }
+      if (instr->op == nir_op_fabs)
+         ins.src_abs[1] = true;
+   }
 
-        ins.mask = mask_of(nr_components);
+   ins.mask = mask_of(nr_components);
 
-        /* Apply writemask if non-SSA, keeping in mind that we can't write to
-         * components that don't exist. Note modifier => SSA => !reg => no
-         * writemask, so we don't have to worry about writemasks here.*/
+   /* Apply writemask if non-SSA, keeping in mind that we can't write to
+    * components that don't exist. Note modifier => SSA => !reg => no
+    * writemask, so we don't have to worry about writemasks here.*/
 
-        if (!is_ssa)
-                ins.mask &= instr->dest.write_mask;
+   if (!is_ssa)
+      ins.mask &= instr->dest.write_mask;
 
-        ins.op = op;
-        ins.outmod = outmod;
+   ins.op = op;
+   ins.outmod = outmod;
 
-        /* Late fixup for emulated instructions */
+   /* Late fixup for emulated instructions */
 
-        if (instr->op == nir_op_b2f32 || instr->op == nir_op_b2i32) {
-                /* Presently, our second argument is an inline #0 constant.
-                 * Switch over to an embedded 1.0 constant (that can't fit
-                 * inline, since we're 32-bit, not 16-bit like the inline
-                 * constants) */
+   if (instr->op == nir_op_b2f32 || instr->op == nir_op_b2i32) {
+      /* Presently, our second argument is an inline #0 constant.
+       * Switch over to an embedded 1.0 constant (that can't fit
+       * inline, since we're 32-bit, not 16-bit like the inline
+       * constants) */
 
-                ins.has_inline_constant = false;
-                ins.src[1] = SSA_FIXED_REGISTER(REGISTER_CONSTANT);
-                ins.src_types[1] = nir_type_float32;
-                ins.has_constants = true;
+      ins.has_inline_constant = false;
+      ins.src[1] = SSA_FIXED_REGISTER(REGISTER_CONSTANT);
+      ins.src_types[1] = nir_type_float32;
+      ins.has_constants = true;
 
-                if (instr->op == nir_op_b2f32)
-                        ins.constants.f32[0] = 1.0f;
-                else
-                        ins.constants.i32[0] = 1;
+      if (instr->op == nir_op_b2f32)
+         ins.constants.f32[0] = 1.0f;
+      else
+         ins.constants.i32[0] = 1;
 
-                for (unsigned c = 0; c < 16; ++c)
-                        ins.swizzle[1][c] = 0;
-        } else if (instr->op == nir_op_b2f16) {
-                ins.src[1] = SSA_FIXED_REGISTER(REGISTER_CONSTANT);
-                ins.src_types[1] = nir_type_float16;
-                ins.has_constants = true;
-                ins.constants.i16[0] = _mesa_float_to_half(1.0);
+      for (unsigned c = 0; c < 16; ++c)
+         ins.swizzle[1][c] = 0;
+   } else if (instr->op == nir_op_b2f16) {
+      ins.src[1] = SSA_FIXED_REGISTER(REGISTER_CONSTANT);
+      ins.src_types[1] = nir_type_float16;
+      ins.has_constants = true;
+      ins.constants.i16[0] = _mesa_float_to_half(1.0);
 
-                for (unsigned c = 0; c < 16; ++c)
-                        ins.swizzle[1][c] = 0;
-        } else if (nr_inputs == 1 && !quirk_flipped_r24) {
-                /* Lots of instructions need a 0 plonked in */
-                ins.has_inline_constant = false;
-                ins.src[1] = SSA_FIXED_REGISTER(REGISTER_CONSTANT);
-                ins.src_types[1] = ins.src_types[0];
-                ins.has_constants = true;
-                ins.constants.u32[0] = 0;
+      for (unsigned c = 0; c < 16; ++c)
+         ins.swizzle[1][c] = 0;
+   } else if (nr_inputs == 1 && !quirk_flipped_r24) {
+      /* Lots of instructions need a 0 plonked in */
+      ins.has_inline_constant = false;
+      ins.src[1] = SSA_FIXED_REGISTER(REGISTER_CONSTANT);
+      ins.src_types[1] = ins.src_types[0];
+      ins.has_constants = true;
+      ins.constants.u32[0] = 0;
 
-                for (unsigned c = 0; c < 16; ++c)
-                        ins.swizzle[1][c] = 0;
-        } else if (instr->op == nir_op_pack_32_2x16) {
-                ins.dest_type = nir_type_uint16;
-                ins.mask = mask_of(nr_components * 2);
-                ins.is_pack = true;
-        } else if (instr->op == nir_op_pack_32_4x8) {
-                ins.dest_type = nir_type_uint8;
-                ins.mask = mask_of(nr_components * 4);
-                ins.is_pack = true;
-        } else if (instr->op == nir_op_unpack_32_2x16) {
-                ins.dest_type = nir_type_uint32;
-                ins.mask = mask_of(nr_components >> 1);
-                ins.is_pack = true;
-        } else if (instr->op == nir_op_unpack_32_4x8) {
-                ins.dest_type = nir_type_uint32;
-                ins.mask = mask_of(nr_components >> 2);
-                ins.is_pack = true;
-        }
+      for (unsigned c = 0; c < 16; ++c)
+         ins.swizzle[1][c] = 0;
+   } else if (instr->op == nir_op_pack_32_2x16) {
+      ins.dest_type = nir_type_uint16;
+      ins.mask = mask_of(nr_components * 2);
+      ins.is_pack = true;
+   } else if (instr->op == nir_op_pack_32_4x8) {
+      ins.dest_type = nir_type_uint8;
+      ins.mask = mask_of(nr_components * 4);
+      ins.is_pack = true;
+   } else if (instr->op == nir_op_unpack_32_2x16) {
+      ins.dest_type = nir_type_uint32;
+      ins.mask = mask_of(nr_components >> 1);
+      ins.is_pack = true;
+   } else if (instr->op == nir_op_unpack_32_4x8) {
+      ins.dest_type = nir_type_uint32;
+      ins.mask = mask_of(nr_components >> 2);
+      ins.is_pack = true;
+   }
 
-        if ((opcode_props & UNITS_ALL) == UNIT_VLUT) {
-                /* To avoid duplicating the lookup tables (probably), true LUT
-                 * instructions can only operate as if they were scalars. Lower
-                 * them here by changing the component. */
+   if ((opcode_props & UNITS_ALL) == UNIT_VLUT) {
+      /* To avoid duplicating the lookup tables (probably), true LUT
+       * instructions can only operate as if they were scalars. Lower
+       * them here by changing the component. */
 
-                unsigned orig_mask = ins.mask;
+      unsigned orig_mask = ins.mask;
 
-                unsigned swizzle_back[MIR_VEC_COMPONENTS];
-                memcpy(&swizzle_back, ins.swizzle[0], sizeof(swizzle_back));
+      unsigned swizzle_back[MIR_VEC_COMPONENTS];
+      memcpy(&swizzle_back, ins.swizzle[0], sizeof(swizzle_back));
 
-                midgard_instruction ins_split[MIR_VEC_COMPONENTS];
-                unsigned ins_count = 0;
+      midgard_instruction ins_split[MIR_VEC_COMPONENTS];
+      unsigned ins_count = 0;
 
-                for (int i = 0; i < nr_components; ++i) {
-                        /* Mask the associated component, dropping the
-                         * instruction if needed */
+      for (int i = 0; i < nr_components; ++i) {
+         /* Mask the associated component, dropping the
+          * instruction if needed */
 
-                        ins.mask = 1 << i;
-                        ins.mask &= orig_mask;
+         ins.mask = 1 << i;
+         ins.mask &= orig_mask;
 
-                        for (unsigned j = 0; j < ins_count; ++j) {
-                                if (swizzle_back[i] == ins_split[j].swizzle[0][0]) {
-                                        ins_split[j].mask |= ins.mask;
-                                        ins.mask = 0;
-                                        break;
-                                }
-                        }
+         for (unsigned j = 0; j < ins_count; ++j) {
+            if (swizzle_back[i] == ins_split[j].swizzle[0][0]) {
+               ins_split[j].mask |= ins.mask;
+               ins.mask = 0;
+               break;
+            }
+         }
 
-                        if (!ins.mask)
-                                continue;
+         if (!ins.mask)
+            continue;
 
-                        for (unsigned j = 0; j < MIR_VEC_COMPONENTS; ++j)
-                                ins.swizzle[0][j] = swizzle_back[i]; /* Pull from the correct component */
+         for (unsigned j = 0; j < MIR_VEC_COMPONENTS; ++j)
+            ins.swizzle[0][j] =
+               swizzle_back[i]; /* Pull from the correct component */
 
-                        ins_split[ins_count] = ins;
+         ins_split[ins_count] = ins;
 
-                        ++ins_count;
-                }
+         ++ins_count;
+      }
 
-                for (unsigned i = 0; i < ins_count; ++i) {
-                        emit_mir_instruction(ctx, ins_split[i]);
-                }
-        } else {
-                emit_mir_instruction(ctx, ins);
-        }
+      for (unsigned i = 0; i < ins_count; ++i) {
+         emit_mir_instruction(ctx, ins_split[i]);
+      }
+   } else {
+      emit_mir_instruction(ctx, ins);
+   }
 }
 
 #undef ALU_CASE
@@ -1180,179 +1181,179 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
 static void
 mir_set_intr_mask(nir_instr *instr, midgard_instruction *ins, bool is_read)
 {
-        nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-        unsigned nir_mask = 0;
-        unsigned dsize = 0;
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   unsigned nir_mask = 0;
+   unsigned dsize = 0;
 
-        if (is_read) {
-                nir_mask = mask_of(nir_intrinsic_dest_components(intr));
+   if (is_read) {
+      nir_mask = mask_of(nir_intrinsic_dest_components(intr));
 
-                /* Extension is mandatory for 8/16-bit loads */
-                dsize = nir_dest_bit_size(intr->dest) == 64 ? 64 : 32;
-        } else {
-                nir_mask = nir_intrinsic_write_mask(intr);
-                dsize = OP_IS_COMMON_STORE(ins->op) ?
-                        nir_src_bit_size(intr->src[0]) : 32;
-        }
+      /* Extension is mandatory for 8/16-bit loads */
+      dsize = nir_dest_bit_size(intr->dest) == 64 ? 64 : 32;
+   } else {
+      nir_mask = nir_intrinsic_write_mask(intr);
+      dsize = OP_IS_COMMON_STORE(ins->op) ? nir_src_bit_size(intr->src[0]) : 32;
+   }
 
-        /* Once we have the NIR mask, we need to normalize to work in 32-bit space */
-        unsigned bytemask = pan_to_bytemask(dsize, nir_mask);
-        ins->dest_type = nir_type_uint | dsize;
-        mir_set_bytemask(ins, bytemask);
+   /* Once we have the NIR mask, we need to normalize to work in 32-bit space */
+   unsigned bytemask = pan_to_bytemask(dsize, nir_mask);
+   ins->dest_type = nir_type_uint | dsize;
+   mir_set_bytemask(ins, bytemask);
 }
 
 /* Uniforms and UBOs use a shared code path, as uniforms are just (slightly
  * optimized) versions of UBO #0 */
 
 static midgard_instruction *
-emit_ubo_read(
-        compiler_context *ctx,
-        nir_instr *instr,
-        unsigned dest,
-        unsigned offset,
-        nir_src *indirect_offset,
-        unsigned indirect_shift,
-        unsigned index,
-        unsigned nr_comps)
+emit_ubo_read(compiler_context *ctx, nir_instr *instr, unsigned dest,
+              unsigned offset, nir_src *indirect_offset,
+              unsigned indirect_shift, unsigned index, unsigned nr_comps)
 {
-        midgard_instruction ins;
+   midgard_instruction ins;
 
-        unsigned dest_size = (instr->type == nir_instr_type_intrinsic) ?
-                nir_dest_bit_size(nir_instr_as_intrinsic(instr)->dest) : 32;
+   unsigned dest_size =
+      (instr->type == nir_instr_type_intrinsic)
+         ? nir_dest_bit_size(nir_instr_as_intrinsic(instr)->dest)
+         : 32;
 
-        unsigned bitsize = dest_size * nr_comps;
+   unsigned bitsize = dest_size * nr_comps;
 
-        /* Pick the smallest intrinsic to avoid out-of-bounds reads */
-        if (bitsize <= 8)
-                ins = m_ld_ubo_u8(dest, 0);
-        else if (bitsize <= 16)
-                ins = m_ld_ubo_u16(dest, 0);
-        else if (bitsize <= 32)
-                ins = m_ld_ubo_32(dest, 0);
-        else if (bitsize <= 64)
-                ins = m_ld_ubo_64(dest, 0);
-        else if (bitsize <= 128)
-                ins = m_ld_ubo_128(dest, 0);
-        else
-                unreachable("Invalid UBO read size");
+   /* Pick the smallest intrinsic to avoid out-of-bounds reads */
+   if (bitsize <= 8)
+      ins = m_ld_ubo_u8(dest, 0);
+   else if (bitsize <= 16)
+      ins = m_ld_ubo_u16(dest, 0);
+   else if (bitsize <= 32)
+      ins = m_ld_ubo_32(dest, 0);
+   else if (bitsize <= 64)
+      ins = m_ld_ubo_64(dest, 0);
+   else if (bitsize <= 128)
+      ins = m_ld_ubo_128(dest, 0);
+   else
+      unreachable("Invalid UBO read size");
 
-        ins.constants.u32[0] = offset;
+   ins.constants.u32[0] = offset;
 
-        if (instr->type == nir_instr_type_intrinsic)
-                mir_set_intr_mask(instr, &ins, true);
+   if (instr->type == nir_instr_type_intrinsic)
+      mir_set_intr_mask(instr, &ins, true);
 
-        if (indirect_offset) {
-                ins.src[2] = nir_src_index(ctx, indirect_offset);
-                ins.src_types[2] = nir_type_uint32;
-                ins.load_store.index_shift = indirect_shift;
+   if (indirect_offset) {
+      ins.src[2] = nir_src_index(ctx, indirect_offset);
+      ins.src_types[2] = nir_type_uint32;
+      ins.load_store.index_shift = indirect_shift;
 
-                /* X component for the whole swizzle to prevent register
-                 * pressure from ballooning from the extra components */
-                for (unsigned i = 0; i < ARRAY_SIZE(ins.swizzle[2]); ++i)
-                        ins.swizzle[2][i] = 0;
-        } else {
-                ins.load_store.index_reg = REGISTER_LDST_ZERO;
-        }
+      /* X component for the whole swizzle to prevent register
+       * pressure from ballooning from the extra components */
+      for (unsigned i = 0; i < ARRAY_SIZE(ins.swizzle[2]); ++i)
+         ins.swizzle[2][i] = 0;
+   } else {
+      ins.load_store.index_reg = REGISTER_LDST_ZERO;
+   }
 
-        if (indirect_offset && indirect_offset->is_ssa && !indirect_shift)
-                mir_set_ubo_offset(&ins, indirect_offset, offset);
+   if (indirect_offset && indirect_offset->is_ssa && !indirect_shift)
+      mir_set_ubo_offset(&ins, indirect_offset, offset);
 
-        midgard_pack_ubo_index_imm(&ins.load_store, index);
+   midgard_pack_ubo_index_imm(&ins.load_store, index);
 
-        return emit_mir_instruction(ctx, ins);
+   return emit_mir_instruction(ctx, ins);
 }
 
 /* Globals are like UBOs if you squint. And shared memory is like globals if
  * you squint even harder */
 
 static void
-emit_global(
-        compiler_context *ctx,
-        nir_instr *instr,
-        bool is_read,
-        unsigned srcdest,
-        nir_src *offset,
-        unsigned seg)
+emit_global(compiler_context *ctx, nir_instr *instr, bool is_read,
+            unsigned srcdest, nir_src *offset, unsigned seg)
 {
-        midgard_instruction ins;
+   midgard_instruction ins;
 
-        nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-        if (is_read) {
-                unsigned bitsize = nir_dest_bit_size(intr->dest) *
-                        nir_dest_num_components(intr->dest);
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   if (is_read) {
+      unsigned bitsize =
+         nir_dest_bit_size(intr->dest) * nir_dest_num_components(intr->dest);
 
-                switch (bitsize) {
-                case 8: ins = m_ld_u8(srcdest, 0); break;
-                case 16: ins = m_ld_u16(srcdest, 0); break;
-                case 32: ins = m_ld_32(srcdest, 0); break;
-                case 64: ins = m_ld_64(srcdest, 0); break;
-                case 128: ins = m_ld_128(srcdest, 0); break;
-                default: unreachable("Invalid global read size");
-                }
+      switch (bitsize) {
+      case 8:
+         ins = m_ld_u8(srcdest, 0);
+         break;
+      case 16:
+         ins = m_ld_u16(srcdest, 0);
+         break;
+      case 32:
+         ins = m_ld_32(srcdest, 0);
+         break;
+      case 64:
+         ins = m_ld_64(srcdest, 0);
+         break;
+      case 128:
+         ins = m_ld_128(srcdest, 0);
+         break;
+      default:
+         unreachable("Invalid global read size");
+      }
 
-                mir_set_intr_mask(instr, &ins, is_read);
+      mir_set_intr_mask(instr, &ins, is_read);
 
-                /* For anything not aligned on 32bit, make sure we write full
-                 * 32 bits registers. */
-                if (bitsize & 31) {
-                        unsigned comps_per_32b = 32 / nir_dest_bit_size(intr->dest);
+      /* For anything not aligned on 32bit, make sure we write full
+       * 32 bits registers. */
+      if (bitsize & 31) {
+         unsigned comps_per_32b = 32 / nir_dest_bit_size(intr->dest);
 
-                        for (unsigned c = 0; c < 4 * comps_per_32b; c += comps_per_32b) {
-                                if (!(ins.mask & BITFIELD_RANGE(c, comps_per_32b)))
-                                        continue;
+         for (unsigned c = 0; c < 4 * comps_per_32b; c += comps_per_32b) {
+            if (!(ins.mask & BITFIELD_RANGE(c, comps_per_32b)))
+               continue;
 
-                                unsigned base = ~0;
-                                for (unsigned i = 0; i < comps_per_32b; i++) {
-                                        if (ins.mask & BITFIELD_BIT(c + i)) {
-                                                base = ins.swizzle[0][c + i];
-                                                break;
-                                        }
-                                }
+            unsigned base = ~0;
+            for (unsigned i = 0; i < comps_per_32b; i++) {
+               if (ins.mask & BITFIELD_BIT(c + i)) {
+                  base = ins.swizzle[0][c + i];
+                  break;
+               }
+            }
 
-                                assert(base != ~0);
+            assert(base != ~0);
 
-                                for (unsigned i = 0; i < comps_per_32b; i++) {
-                                        if (!(ins.mask & BITFIELD_BIT(c + i))) {
-                                                ins.swizzle[0][c + i] = base + i;
-                                                ins.mask |= BITFIELD_BIT(c + i);
-                                        }
-                                        assert(ins.swizzle[0][c + i] == base + i);
-                                }
-                        }
+            for (unsigned i = 0; i < comps_per_32b; i++) {
+               if (!(ins.mask & BITFIELD_BIT(c + i))) {
+                  ins.swizzle[0][c + i] = base + i;
+                  ins.mask |= BITFIELD_BIT(c + i);
+               }
+               assert(ins.swizzle[0][c + i] == base + i);
+            }
+         }
+      }
+   } else {
+      unsigned bitsize =
+         nir_src_bit_size(intr->src[0]) * nir_src_num_components(intr->src[0]);
 
-                }
-        } else {
-                unsigned bitsize = nir_src_bit_size(intr->src[0]) *
-                        nir_src_num_components(intr->src[0]);
+      if (bitsize == 8)
+         ins = m_st_u8(srcdest, 0);
+      else if (bitsize == 16)
+         ins = m_st_u16(srcdest, 0);
+      else if (bitsize <= 32)
+         ins = m_st_32(srcdest, 0);
+      else if (bitsize <= 64)
+         ins = m_st_64(srcdest, 0);
+      else if (bitsize <= 128)
+         ins = m_st_128(srcdest, 0);
+      else
+         unreachable("Invalid global store size");
 
-                if (bitsize == 8)
-                        ins = m_st_u8(srcdest, 0);
-                else if (bitsize == 16)
-                        ins = m_st_u16(srcdest, 0);
-                else if (bitsize <= 32)
-                        ins = m_st_32(srcdest, 0);
-                else if (bitsize <= 64)
-                        ins = m_st_64(srcdest, 0);
-                else if (bitsize <= 128)
-                        ins = m_st_128(srcdest, 0);
-                else
-                        unreachable("Invalid global store size");
+      mir_set_intr_mask(instr, &ins, is_read);
+   }
 
-                mir_set_intr_mask(instr, &ins, is_read);
-        }
+   mir_set_offset(ctx, &ins, offset, seg);
 
-        mir_set_offset(ctx, &ins, offset, seg);
+   /* Set a valid swizzle for masked out components */
+   assert(ins.mask);
+   unsigned first_component = __builtin_ffs(ins.mask) - 1;
 
-        /* Set a valid swizzle for masked out components */
-        assert(ins.mask);
-        unsigned first_component = __builtin_ffs(ins.mask) - 1;
+   for (unsigned i = 0; i < ARRAY_SIZE(ins.swizzle[0]); ++i) {
+      if (!(ins.mask & (1 << i)))
+         ins.swizzle[0][i] = first_component;
+   }
 
-        for (unsigned i = 0; i < ARRAY_SIZE(ins.swizzle[0]); ++i) {
-                if (!(ins.mask & (1 << i)))
-                        ins.swizzle[0][i] = first_component;
-        }
-
-        emit_mir_instruction(ctx, ins);
+   emit_mir_instruction(ctx, ins);
 }
 
 /* If is_shared is off, the only other possible value are globals, since
@@ -1360,1346 +1361,1362 @@ emit_global(
  * `image_direct_address` should be ~0 when instr is not an image_atomic
  * and the destination register of a lea_image op when it is an image_atomic. */
 static void
-emit_atomic(
-        compiler_context *ctx,
-        nir_intrinsic_instr *instr,
-        bool is_shared,
-        midgard_load_store_op op,
-        unsigned image_direct_address)
+emit_atomic(compiler_context *ctx, nir_intrinsic_instr *instr, bool is_shared,
+            midgard_load_store_op op, unsigned image_direct_address)
 {
-        nir_alu_type type =
-                (op == midgard_op_atomic_imin || op == midgard_op_atomic_imax) ?
-                nir_type_int : nir_type_uint;
+   nir_alu_type type =
+      (op == midgard_op_atomic_imin || op == midgard_op_atomic_imax)
+         ? nir_type_int
+         : nir_type_uint;
 
-        bool is_image = image_direct_address != ~0;
+   bool is_image = image_direct_address != ~0;
 
-        unsigned dest = nir_dest_index(&instr->dest);
-        unsigned val_src = is_image ? 3 : 1;
-        unsigned val = nir_src_index(ctx, &instr->src[val_src]);
-        unsigned bitsize = nir_src_bit_size(instr->src[val_src]);
-        emit_explicit_constant(ctx, val, val);
+   unsigned dest = nir_dest_index(&instr->dest);
+   unsigned val_src = is_image ? 3 : 1;
+   unsigned val = nir_src_index(ctx, &instr->src[val_src]);
+   unsigned bitsize = nir_src_bit_size(instr->src[val_src]);
+   emit_explicit_constant(ctx, val, val);
 
-        midgard_instruction ins = {
-                .type = TAG_LOAD_STORE_4,
-                .mask = 0xF,
-                .dest = dest,
-                .src = { ~0, ~0, ~0, val, },
-                .src_types = { 0, 0, 0, type | bitsize, },
-                .op = op
-        };
+   midgard_instruction ins = {.type = TAG_LOAD_STORE_4,
+                              .mask = 0xF,
+                              .dest = dest,
+                              .src =
+                                 {
+                                    ~0,
+                                    ~0,
+                                    ~0,
+                                    val,
+                                 },
+                              .src_types =
+                                 {
+                                    0,
+                                    0,
+                                    0,
+                                    type | bitsize,
+                                 },
+                              .op = op};
 
-        nir_src *src_offset = nir_get_io_offset_src(instr);
+   nir_src *src_offset = nir_get_io_offset_src(instr);
 
-        if (op == midgard_op_atomic_cmpxchg) {
-                unsigned xchg_val_src = is_image ? 4 : 2;
-                unsigned xchg_val = nir_src_index(ctx, &instr->src[xchg_val_src]);
-                emit_explicit_constant(ctx, xchg_val, xchg_val);
+   if (op == midgard_op_atomic_cmpxchg) {
+      unsigned xchg_val_src = is_image ? 4 : 2;
+      unsigned xchg_val = nir_src_index(ctx, &instr->src[xchg_val_src]);
+      emit_explicit_constant(ctx, xchg_val, xchg_val);
 
-                ins.src[2] = val;
-                ins.src_types[2] = type | bitsize;
-                ins.src[3] = xchg_val;
+      ins.src[2] = val;
+      ins.src_types[2] = type | bitsize;
+      ins.src[3] = xchg_val;
 
-                if (is_shared) {
-                        ins.load_store.arg_reg = REGISTER_LDST_LOCAL_STORAGE_PTR;
-                        ins.load_store.arg_comp = COMPONENT_Z;
-                        ins.load_store.bitsize_toggle = true;
-                } else {
-                        for(unsigned i = 0; i < 2; ++i)
-                                ins.swizzle[1][i] = i;
+      if (is_shared) {
+         ins.load_store.arg_reg = REGISTER_LDST_LOCAL_STORAGE_PTR;
+         ins.load_store.arg_comp = COMPONENT_Z;
+         ins.load_store.bitsize_toggle = true;
+      } else {
+         for (unsigned i = 0; i < 2; ++i)
+            ins.swizzle[1][i] = i;
 
-                        ins.src[1] = is_image ? image_direct_address :
-                                                nir_src_index(ctx, src_offset);
-                        ins.src_types[1] = nir_type_uint64;
-                }
-        } else if (is_image) {
-                for(unsigned i = 0; i < 2; ++i)
-                        ins.swizzle[2][i] = i;
+         ins.src[1] =
+            is_image ? image_direct_address : nir_src_index(ctx, src_offset);
+         ins.src_types[1] = nir_type_uint64;
+      }
+   } else if (is_image) {
+      for (unsigned i = 0; i < 2; ++i)
+         ins.swizzle[2][i] = i;
 
-                ins.src[2] = image_direct_address;
-                ins.src_types[2] = nir_type_uint64;
+      ins.src[2] = image_direct_address;
+      ins.src_types[2] = nir_type_uint64;
 
-                ins.load_store.arg_reg = REGISTER_LDST_ZERO;
-                ins.load_store.bitsize_toggle = true;
-                ins.load_store.index_format = midgard_index_address_u64;
-        } else
-                mir_set_offset(ctx, &ins, src_offset, is_shared ? LDST_SHARED : LDST_GLOBAL);
+      ins.load_store.arg_reg = REGISTER_LDST_ZERO;
+      ins.load_store.bitsize_toggle = true;
+      ins.load_store.index_format = midgard_index_address_u64;
+   } else
+      mir_set_offset(ctx, &ins, src_offset,
+                     is_shared ? LDST_SHARED : LDST_GLOBAL);
 
-        mir_set_intr_mask(&instr->instr, &ins, true);
+   mir_set_intr_mask(&instr->instr, &ins, true);
 
-        emit_mir_instruction(ctx, ins);
+   emit_mir_instruction(ctx, ins);
 }
 
 static void
-emit_varying_read(
-        compiler_context *ctx,
-        unsigned dest, unsigned offset,
-        unsigned nr_comp, unsigned component,
-        nir_src *indirect_offset, nir_alu_type type, bool flat)
+emit_varying_read(compiler_context *ctx, unsigned dest, unsigned offset,
+                  unsigned nr_comp, unsigned component,
+                  nir_src *indirect_offset, nir_alu_type type, bool flat)
 {
-        midgard_instruction ins = m_ld_vary_32(dest, PACK_LDST_ATTRIB_OFS(offset));
-        ins.mask = mask_of(nr_comp);
-        ins.dest_type = type;
+   midgard_instruction ins = m_ld_vary_32(dest, PACK_LDST_ATTRIB_OFS(offset));
+   ins.mask = mask_of(nr_comp);
+   ins.dest_type = type;
 
-        if (type == nir_type_float16) {
-                /* Ensure we are aligned so we can pack it later */
-                ins.mask = mask_of(ALIGN_POT(nr_comp, 2));
-        }
+   if (type == nir_type_float16) {
+      /* Ensure we are aligned so we can pack it later */
+      ins.mask = mask_of(ALIGN_POT(nr_comp, 2));
+   }
 
-        for (unsigned i = 0; i < ARRAY_SIZE(ins.swizzle[0]); ++i)
-                ins.swizzle[0][i] = MIN2(i + component, COMPONENT_W);
+   for (unsigned i = 0; i < ARRAY_SIZE(ins.swizzle[0]); ++i)
+      ins.swizzle[0][i] = MIN2(i + component, COMPONENT_W);
 
-        midgard_varying_params p = {
-                .flat_shading = flat,
-                .perspective_correction = 1,
-                .interpolate_sample = true,
-        };
-        midgard_pack_varying_params(&ins.load_store, p);
+   midgard_varying_params p = {
+      .flat_shading = flat,
+      .perspective_correction = 1,
+      .interpolate_sample = true,
+   };
+   midgard_pack_varying_params(&ins.load_store, p);
 
-        if (indirect_offset) {
-                ins.src[2] = nir_src_index(ctx, indirect_offset);
-                ins.src_types[2] = nir_type_uint32;
-        } else
-                ins.load_store.index_reg = REGISTER_LDST_ZERO;
+   if (indirect_offset) {
+      ins.src[2] = nir_src_index(ctx, indirect_offset);
+      ins.src_types[2] = nir_type_uint32;
+   } else
+      ins.load_store.index_reg = REGISTER_LDST_ZERO;
 
-        ins.load_store.arg_reg = REGISTER_LDST_ZERO;
-        ins.load_store.index_format = midgard_index_address_u32;
+   ins.load_store.arg_reg = REGISTER_LDST_ZERO;
+   ins.load_store.index_format = midgard_index_address_u32;
 
-        /* For flat shading, we always use .u32 and require 32-bit mode. For
-         * smooth shading, we use the appropriate floating-point type.
-         *
-         * This could be optimized, but it makes it easy to check correctness.
-         */
-        if (flat) {
-                assert(nir_alu_type_get_type_size(type) == 32);
-                ins.op = midgard_op_ld_vary_32u;
-        } else {
-                assert(nir_alu_type_get_base_type(type) == nir_type_float);
+   /* For flat shading, we always use .u32 and require 32-bit mode. For
+    * smooth shading, we use the appropriate floating-point type.
+    *
+    * This could be optimized, but it makes it easy to check correctness.
+    */
+   if (flat) {
+      assert(nir_alu_type_get_type_size(type) == 32);
+      ins.op = midgard_op_ld_vary_32u;
+   } else {
+      assert(nir_alu_type_get_base_type(type) == nir_type_float);
 
-                ins.op = (nir_alu_type_get_type_size(type) == 32) ?
-                         midgard_op_ld_vary_32 :
-                         midgard_op_ld_vary_16;
-        }
+      ins.op = (nir_alu_type_get_type_size(type) == 32) ? midgard_op_ld_vary_32
+                                                        : midgard_op_ld_vary_16;
+   }
 
-        emit_mir_instruction(ctx, ins);
+   emit_mir_instruction(ctx, ins);
 }
 
-
-/* If `is_atomic` is true, we emit a `lea_image` since midgard doesn't not have special
- * image_atomic opcodes. The caller can then use that address to emit a normal atomic opcode. */
+/* If `is_atomic` is true, we emit a `lea_image` since midgard doesn't not have
+ * special image_atomic opcodes. The caller can then use that address to emit a
+ * normal atomic opcode. */
 static midgard_instruction
 emit_image_op(compiler_context *ctx, nir_intrinsic_instr *instr, bool is_atomic)
 {
-        enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
-        unsigned nr_attr = ctx->stage == MESA_SHADER_VERTEX ?
-                util_bitcount64(ctx->nir->info.inputs_read) : 0;
-        unsigned nr_dim = glsl_get_sampler_dim_coordinate_components(dim);
-        bool is_array = nir_intrinsic_image_array(instr);
-        bool is_store = instr->intrinsic == nir_intrinsic_image_store;
+   enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
+   unsigned nr_attr = ctx->stage == MESA_SHADER_VERTEX
+                         ? util_bitcount64(ctx->nir->info.inputs_read)
+                         : 0;
+   unsigned nr_dim = glsl_get_sampler_dim_coordinate_components(dim);
+   bool is_array = nir_intrinsic_image_array(instr);
+   bool is_store = instr->intrinsic == nir_intrinsic_image_store;
 
-        /* TODO: MSAA */
-        assert(dim != GLSL_SAMPLER_DIM_MS && "MSAA'd images not supported");
+   /* TODO: MSAA */
+   assert(dim != GLSL_SAMPLER_DIM_MS && "MSAA'd images not supported");
 
-        unsigned coord_reg = nir_src_index(ctx, &instr->src[1]);
-        emit_explicit_constant(ctx, coord_reg, coord_reg);
+   unsigned coord_reg = nir_src_index(ctx, &instr->src[1]);
+   emit_explicit_constant(ctx, coord_reg, coord_reg);
 
-        nir_src *index = &instr->src[0];
-        bool is_direct = nir_src_is_const(*index);
+   nir_src *index = &instr->src[0];
+   bool is_direct = nir_src_is_const(*index);
 
-        /* For image opcodes, address is used as an index into the attribute descriptor */
-        unsigned address = nr_attr;
-        if (is_direct)
-                address += nir_src_as_uint(*index);
+   /* For image opcodes, address is used as an index into the attribute
+    * descriptor */
+   unsigned address = nr_attr;
+   if (is_direct)
+      address += nir_src_as_uint(*index);
 
-        midgard_instruction ins;
-        if (is_store) { /* emit st_image_* */
-                unsigned val = nir_src_index(ctx, &instr->src[3]);
-                emit_explicit_constant(ctx, val, val);
+   midgard_instruction ins;
+   if (is_store) { /* emit st_image_* */
+      unsigned val = nir_src_index(ctx, &instr->src[3]);
+      emit_explicit_constant(ctx, val, val);
 
-                nir_alu_type type = nir_intrinsic_src_type(instr);
-                ins = st_image(type, val, PACK_LDST_ATTRIB_OFS(address));
-                nir_alu_type base_type = nir_alu_type_get_base_type(type);
-                ins.src_types[0] = base_type | nir_src_bit_size(instr->src[3]);
-        } else if (is_atomic) { /* emit lea_image */
-                unsigned dest = make_compiler_temp_reg(ctx);
-                ins = m_lea_image(dest, PACK_LDST_ATTRIB_OFS(address));
-                ins.mask = mask_of(2); /* 64-bit memory address */
-        } else { /* emit ld_image_* */
-                nir_alu_type type = nir_intrinsic_dest_type(instr);
-                ins = ld_image(type, nir_dest_index(&instr->dest), PACK_LDST_ATTRIB_OFS(address));
-                ins.mask = mask_of(nir_intrinsic_dest_components(instr));
-                ins.dest_type = type;
-        }
+      nir_alu_type type = nir_intrinsic_src_type(instr);
+      ins = st_image(type, val, PACK_LDST_ATTRIB_OFS(address));
+      nir_alu_type base_type = nir_alu_type_get_base_type(type);
+      ins.src_types[0] = base_type | nir_src_bit_size(instr->src[3]);
+   } else if (is_atomic) { /* emit lea_image */
+      unsigned dest = make_compiler_temp_reg(ctx);
+      ins = m_lea_image(dest, PACK_LDST_ATTRIB_OFS(address));
+      ins.mask = mask_of(2); /* 64-bit memory address */
+   } else {                  /* emit ld_image_* */
+      nir_alu_type type = nir_intrinsic_dest_type(instr);
+      ins = ld_image(type, nir_dest_index(&instr->dest),
+                     PACK_LDST_ATTRIB_OFS(address));
+      ins.mask = mask_of(nir_intrinsic_dest_components(instr));
+      ins.dest_type = type;
+   }
 
-        /* Coord reg */
-        ins.src[1] = coord_reg;
-        ins.src_types[1] = nir_type_uint16;
-        if (nr_dim == 3 || is_array) {
-                ins.load_store.bitsize_toggle = true;
-        }
+   /* Coord reg */
+   ins.src[1] = coord_reg;
+   ins.src_types[1] = nir_type_uint16;
+   if (nr_dim == 3 || is_array) {
+      ins.load_store.bitsize_toggle = true;
+   }
 
-        /* Image index reg */
-        if (!is_direct) {
-                ins.src[2] = nir_src_index(ctx, index);
-                ins.src_types[2] = nir_type_uint32;
-        } else
-                ins.load_store.index_reg = REGISTER_LDST_ZERO;
+   /* Image index reg */
+   if (!is_direct) {
+      ins.src[2] = nir_src_index(ctx, index);
+      ins.src_types[2] = nir_type_uint32;
+   } else
+      ins.load_store.index_reg = REGISTER_LDST_ZERO;
 
-        emit_mir_instruction(ctx, ins);
+   emit_mir_instruction(ctx, ins);
 
-        return ins;
+   return ins;
 }
 
 static void
-emit_attr_read(
-        compiler_context *ctx,
-        unsigned dest, unsigned offset,
-        unsigned nr_comp, nir_alu_type t)
+emit_attr_read(compiler_context *ctx, unsigned dest, unsigned offset,
+               unsigned nr_comp, nir_alu_type t)
 {
-        midgard_instruction ins = m_ld_attr_32(dest, PACK_LDST_ATTRIB_OFS(offset));
-        ins.load_store.arg_reg = REGISTER_LDST_ZERO;
-        ins.load_store.index_reg = REGISTER_LDST_ZERO;
-        ins.mask = mask_of(nr_comp);
+   midgard_instruction ins = m_ld_attr_32(dest, PACK_LDST_ATTRIB_OFS(offset));
+   ins.load_store.arg_reg = REGISTER_LDST_ZERO;
+   ins.load_store.index_reg = REGISTER_LDST_ZERO;
+   ins.mask = mask_of(nr_comp);
 
-        /* Use the type appropriate load */
-        switch (t) {
-        case nir_type_uint:
-        case nir_type_bool:
-                ins.op = midgard_op_ld_attr_32u;
-                break;
-        case nir_type_int:
-                ins.op = midgard_op_ld_attr_32i;
-                break;
-        case nir_type_float:
-                ins.op = midgard_op_ld_attr_32;
-                break;
-        default:
-                unreachable("Attempted to load unknown type");
-                break;
-        }
+   /* Use the type appropriate load */
+   switch (t) {
+   case nir_type_uint:
+   case nir_type_bool:
+      ins.op = midgard_op_ld_attr_32u;
+      break;
+   case nir_type_int:
+      ins.op = midgard_op_ld_attr_32i;
+      break;
+   case nir_type_float:
+      ins.op = midgard_op_ld_attr_32;
+      break;
+   default:
+      unreachable("Attempted to load unknown type");
+      break;
+   }
 
-        emit_mir_instruction(ctx, ins);
+   emit_mir_instruction(ctx, ins);
 }
 
 static void
 emit_sysval_read(compiler_context *ctx, nir_instr *instr,
-                unsigned nr_components, unsigned offset)
+                 unsigned nr_components, unsigned offset)
 {
-        nir_dest nir_dest;
+   nir_dest nir_dest;
 
-        /* Figure out which uniform this is */
-        unsigned sysval_ubo = ctx->inputs->fixed_sysval_ubo >= 0 ?
-                              ctx->inputs->fixed_sysval_ubo :
-                              ctx->nir->info.num_ubos;
-        int sysval = panfrost_sysval_for_instr(instr, &nir_dest);
-        unsigned dest = nir_dest_index(&nir_dest);
-        unsigned uniform =
-                pan_lookup_sysval(ctx->sysval_to_id, &ctx->info->sysvals, sysval);
+   /* Figure out which uniform this is */
+   unsigned sysval_ubo = ctx->inputs->fixed_sysval_ubo >= 0
+                            ? ctx->inputs->fixed_sysval_ubo
+                            : ctx->nir->info.num_ubos;
+   int sysval = panfrost_sysval_for_instr(instr, &nir_dest);
+   unsigned dest = nir_dest_index(&nir_dest);
+   unsigned uniform =
+      pan_lookup_sysval(ctx->sysval_to_id, &ctx->info->sysvals, sysval);
 
-        /* Emit the read itself -- this is never indirect */
-        midgard_instruction *ins =
-                emit_ubo_read(ctx, instr, dest, (uniform * 16) + offset, NULL, 0,
-                              sysval_ubo, nr_components);
+   /* Emit the read itself -- this is never indirect */
+   midgard_instruction *ins =
+      emit_ubo_read(ctx, instr, dest, (uniform * 16) + offset, NULL, 0,
+                    sysval_ubo, nr_components);
 
-        ins->mask = mask_of(nr_components);
+   ins->mask = mask_of(nr_components);
 }
 
 static unsigned
 compute_builtin_arg(nir_intrinsic_op op)
 {
-        switch (op) {
-        case nir_intrinsic_load_workgroup_id:
-                return REGISTER_LDST_GROUP_ID;
-        case nir_intrinsic_load_local_invocation_id:
-                return REGISTER_LDST_LOCAL_THREAD_ID;
-        case nir_intrinsic_load_global_invocation_id:
-        case nir_intrinsic_load_global_invocation_id_zero_base:
-                return REGISTER_LDST_GLOBAL_THREAD_ID;
-        default:
-                unreachable("Invalid compute paramater loaded");
-        }
+   switch (op) {
+   case nir_intrinsic_load_workgroup_id:
+      return REGISTER_LDST_GROUP_ID;
+   case nir_intrinsic_load_local_invocation_id:
+      return REGISTER_LDST_LOCAL_THREAD_ID;
+   case nir_intrinsic_load_global_invocation_id:
+   case nir_intrinsic_load_global_invocation_id_zero_base:
+      return REGISTER_LDST_GLOBAL_THREAD_ID;
+   default:
+      unreachable("Invalid compute paramater loaded");
+   }
 }
 
 static void
-emit_fragment_store(compiler_context *ctx, unsigned src, unsigned src_z, unsigned src_s,
-                    enum midgard_rt_id rt, unsigned sample_iter)
+emit_fragment_store(compiler_context *ctx, unsigned src, unsigned src_z,
+                    unsigned src_s, enum midgard_rt_id rt, unsigned sample_iter)
 {
-        assert(rt < ARRAY_SIZE(ctx->writeout_branch));
-        assert(sample_iter < ARRAY_SIZE(ctx->writeout_branch[0]));
+   assert(rt < ARRAY_SIZE(ctx->writeout_branch));
+   assert(sample_iter < ARRAY_SIZE(ctx->writeout_branch[0]));
 
-        midgard_instruction *br = ctx->writeout_branch[rt][sample_iter];
+   midgard_instruction *br = ctx->writeout_branch[rt][sample_iter];
 
-        assert(!br);
+   assert(!br);
 
-        emit_explicit_constant(ctx, src, src);
+   emit_explicit_constant(ctx, src, src);
 
-        struct midgard_instruction ins =
-                v_branch(false, false);
+   struct midgard_instruction ins = v_branch(false, false);
 
-        bool depth_only = (rt == MIDGARD_ZS_RT);
+   bool depth_only = (rt == MIDGARD_ZS_RT);
 
-        ins.writeout = depth_only ? 0 : PAN_WRITEOUT_C;
+   ins.writeout = depth_only ? 0 : PAN_WRITEOUT_C;
 
-        /* Add dependencies */
-        ins.src[0] = src;
-        ins.src_types[0] = nir_type_uint32;
+   /* Add dependencies */
+   ins.src[0] = src;
+   ins.src_types[0] = nir_type_uint32;
 
-        if (depth_only)
-                ins.constants.u32[0] = 0xFF;
-        else
-                ins.constants.u32[0] = ((rt - MIDGARD_COLOR_RT0) << 8) | sample_iter;
+   if (depth_only)
+      ins.constants.u32[0] = 0xFF;
+   else
+      ins.constants.u32[0] = ((rt - MIDGARD_COLOR_RT0) << 8) | sample_iter;
 
-        for (int i = 0; i < 4; ++i)
-                ins.swizzle[0][i] = i;
+   for (int i = 0; i < 4; ++i)
+      ins.swizzle[0][i] = i;
 
-        if (~src_z) {
-                emit_explicit_constant(ctx, src_z, src_z);
-                ins.src[2] = src_z;
-                ins.src_types[2] = nir_type_uint32;
-                ins.writeout |= PAN_WRITEOUT_Z;
-        }
-        if (~src_s) {
-                emit_explicit_constant(ctx, src_s, src_s);
-                ins.src[3] = src_s;
-                ins.src_types[3] = nir_type_uint32;
-                ins.writeout |= PAN_WRITEOUT_S;
-        }
+   if (~src_z) {
+      emit_explicit_constant(ctx, src_z, src_z);
+      ins.src[2] = src_z;
+      ins.src_types[2] = nir_type_uint32;
+      ins.writeout |= PAN_WRITEOUT_Z;
+   }
+   if (~src_s) {
+      emit_explicit_constant(ctx, src_s, src_s);
+      ins.src[3] = src_s;
+      ins.src_types[3] = nir_type_uint32;
+      ins.writeout |= PAN_WRITEOUT_S;
+   }
 
-        /* Emit the branch */
-        br = emit_mir_instruction(ctx, ins);
-        schedule_barrier(ctx);
-        ctx->writeout_branch[rt][sample_iter] = br;
+   /* Emit the branch */
+   br = emit_mir_instruction(ctx, ins);
+   schedule_barrier(ctx);
+   ctx->writeout_branch[rt][sample_iter] = br;
 
-        /* Push our current location = current block count - 1 = where we'll
-         * jump to. Maybe a bit too clever for my own good */
+   /* Push our current location = current block count - 1 = where we'll
+    * jump to. Maybe a bit too clever for my own good */
 
-        br->branch.target_block = ctx->block_count - 1;
+   br->branch.target_block = ctx->block_count - 1;
 }
 
 static void
 emit_compute_builtin(compiler_context *ctx, nir_intrinsic_instr *instr)
 {
-        unsigned reg = nir_dest_index(&instr->dest);
-        midgard_instruction ins = m_ldst_mov(reg, 0);
-        ins.mask = mask_of(3);
-        ins.swizzle[0][3] = COMPONENT_X; /* xyzx */
-        ins.load_store.arg_reg = compute_builtin_arg(instr->intrinsic);
-        emit_mir_instruction(ctx, ins);
+   unsigned reg = nir_dest_index(&instr->dest);
+   midgard_instruction ins = m_ldst_mov(reg, 0);
+   ins.mask = mask_of(3);
+   ins.swizzle[0][3] = COMPONENT_X; /* xyzx */
+   ins.load_store.arg_reg = compute_builtin_arg(instr->intrinsic);
+   emit_mir_instruction(ctx, ins);
 }
 
 static unsigned
 vertex_builtin_arg(nir_intrinsic_op op)
 {
-        switch (op) {
-        case nir_intrinsic_load_vertex_id_zero_base:
-                return PAN_VERTEX_ID;
-        case nir_intrinsic_load_instance_id:
-                return PAN_INSTANCE_ID;
-        default:
-                unreachable("Invalid vertex builtin");
-        }
+   switch (op) {
+   case nir_intrinsic_load_vertex_id_zero_base:
+      return PAN_VERTEX_ID;
+   case nir_intrinsic_load_instance_id:
+      return PAN_INSTANCE_ID;
+   default:
+      unreachable("Invalid vertex builtin");
+   }
 }
 
 static void
 emit_vertex_builtin(compiler_context *ctx, nir_intrinsic_instr *instr)
 {
-        unsigned reg = nir_dest_index(&instr->dest);
-        emit_attr_read(ctx, reg, vertex_builtin_arg(instr->intrinsic), 1, nir_type_int);
+   unsigned reg = nir_dest_index(&instr->dest);
+   emit_attr_read(ctx, reg, vertex_builtin_arg(instr->intrinsic), 1,
+                  nir_type_int);
 }
 
 static void
 emit_special(compiler_context *ctx, nir_intrinsic_instr *instr, unsigned idx)
 {
-        unsigned reg = nir_dest_index(&instr->dest);
+   unsigned reg = nir_dest_index(&instr->dest);
 
-        midgard_instruction ld = m_ld_tilebuffer_raw(reg, 0);
-        ld.op = midgard_op_ld_special_32u;
-        ld.load_store.signed_offset = PACK_LDST_SELECTOR_OFS(idx);
-        ld.load_store.index_reg = REGISTER_LDST_ZERO;
+   midgard_instruction ld = m_ld_tilebuffer_raw(reg, 0);
+   ld.op = midgard_op_ld_special_32u;
+   ld.load_store.signed_offset = PACK_LDST_SELECTOR_OFS(idx);
+   ld.load_store.index_reg = REGISTER_LDST_ZERO;
 
-        for (int i = 0; i < 4; ++i)
-                ld.swizzle[0][i] = COMPONENT_X;
+   for (int i = 0; i < 4; ++i)
+      ld.swizzle[0][i] = COMPONENT_X;
 
-        emit_mir_instruction(ctx, ld);
+   emit_mir_instruction(ctx, ld);
 }
 
 static void
 emit_control_barrier(compiler_context *ctx)
 {
-        midgard_instruction ins = {
-                .type = TAG_TEXTURE_4,
-                .dest = ~0,
-                .src = { ~0, ~0, ~0, ~0 },
-                .op = midgard_tex_op_barrier,
-        };
+   midgard_instruction ins = {
+      .type = TAG_TEXTURE_4,
+      .dest = ~0,
+      .src = {~0, ~0, ~0, ~0},
+      .op = midgard_tex_op_barrier,
+   };
 
-        emit_mir_instruction(ctx, ins);
+   emit_mir_instruction(ctx, ins);
 }
 
 static unsigned
 mir_get_branch_cond(nir_src *src, bool *invert)
 {
-        /* Wrap it. No swizzle since it's a scalar */
+   /* Wrap it. No swizzle since it's a scalar */
 
-        nir_alu_src alu = {
-                .src = *src
-        };
+   nir_alu_src alu = {.src = *src};
 
-        *invert = pan_has_source_mod(&alu, nir_op_inot);
-        return nir_src_index(NULL, &alu.src);
+   *invert = pan_has_source_mod(&alu, nir_op_inot);
+   return nir_src_index(NULL, &alu.src);
 }
 
 static uint8_t
 output_load_rt_addr(compiler_context *ctx, nir_intrinsic_instr *instr)
 {
-        if (ctx->inputs->is_blend)
-                return MIDGARD_COLOR_RT0 + ctx->inputs->blend.rt;
+   if (ctx->inputs->is_blend)
+      return MIDGARD_COLOR_RT0 + ctx->inputs->blend.rt;
 
-        unsigned loc = nir_intrinsic_io_semantics(instr).location;
+   unsigned loc = nir_intrinsic_io_semantics(instr).location;
 
-        if (loc >= FRAG_RESULT_DATA0)
-                return loc - FRAG_RESULT_DATA0;
+   if (loc >= FRAG_RESULT_DATA0)
+      return loc - FRAG_RESULT_DATA0;
 
-        if (loc == FRAG_RESULT_DEPTH)
-                return 0x1F;
-        if (loc == FRAG_RESULT_STENCIL)
-                return 0x1E;
+   if (loc == FRAG_RESULT_DEPTH)
+      return 0x1F;
+   if (loc == FRAG_RESULT_STENCIL)
+      return 0x1E;
 
-        unreachable("Invalid RT to load from");
+   unreachable("Invalid RT to load from");
 }
 
 static void
 emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
 {
-        unsigned offset = 0, reg;
-
-        switch (instr->intrinsic) {
-        case nir_intrinsic_discard_if:
-        case nir_intrinsic_discard: {
-                bool conditional = instr->intrinsic == nir_intrinsic_discard_if;
-                struct midgard_instruction discard = v_branch(conditional, false);
-                discard.branch.target_type = TARGET_DISCARD;
-
-                if (conditional) {
-                        discard.src[0] = mir_get_branch_cond(&instr->src[0],
-                                        &discard.branch.invert_conditional);
-                        discard.src_types[0] = nir_type_uint32;
-                }
-
-                emit_mir_instruction(ctx, discard);
-                schedule_barrier(ctx);
-
-                break;
-        }
-
-        case nir_intrinsic_image_load:
-        case nir_intrinsic_image_store:
-                emit_image_op(ctx, instr, false);
-                break;
-
-        case nir_intrinsic_image_size: {
-                unsigned nr_comp = nir_intrinsic_dest_components(instr);
-                emit_sysval_read(ctx, &instr->instr, nr_comp, 0);
-                break;
-        }
-
-        case nir_intrinsic_load_ubo:
-        case nir_intrinsic_load_global:
-        case nir_intrinsic_load_global_constant:
-        case nir_intrinsic_load_shared:
-        case nir_intrinsic_load_scratch:
-        case nir_intrinsic_load_input:
-        case nir_intrinsic_load_interpolated_input: {
-                bool is_ubo = instr->intrinsic == nir_intrinsic_load_ubo;
-                bool is_global = instr->intrinsic == nir_intrinsic_load_global ||
-                        instr->intrinsic == nir_intrinsic_load_global_constant;
-                bool is_shared = instr->intrinsic == nir_intrinsic_load_shared;
-                bool is_scratch = instr->intrinsic == nir_intrinsic_load_scratch;
-                bool is_flat = instr->intrinsic == nir_intrinsic_load_input;
-                bool is_interp = instr->intrinsic == nir_intrinsic_load_interpolated_input;
-
-                /* Get the base type of the intrinsic */
-                /* TODO: Infer type? Does it matter? */
-                nir_alu_type t =
-                        (is_interp) ? nir_type_float :
-                        (is_flat) ? nir_intrinsic_dest_type(instr) :
-                        nir_type_uint;
-
-                t = nir_alu_type_get_base_type(t);
-
-                if (!(is_ubo || is_global || is_scratch)) {
-                        offset = nir_intrinsic_base(instr);
-                }
-
-                unsigned nr_comp = nir_intrinsic_dest_components(instr);
-
-                nir_src *src_offset = nir_get_io_offset_src(instr);
-
-                bool direct = nir_src_is_const(*src_offset);
-                nir_src *indirect_offset = direct ? NULL : src_offset;
-
-                if (direct)
-                        offset += nir_src_as_uint(*src_offset);
-
-                /* We may need to apply a fractional offset */
-                int component = (is_flat || is_interp) ?
-                                nir_intrinsic_component(instr) : 0;
-                reg = nir_dest_index(&instr->dest);
-
-                if (is_ubo) {
-                        nir_src index = instr->src[0];
-
-                        /* TODO: Is indirect block number possible? */
-                        assert(nir_src_is_const(index));
-
-                        uint32_t uindex = nir_src_as_uint(index);
-                        emit_ubo_read(ctx, &instr->instr, reg, offset, indirect_offset, 0, uindex, nr_comp);
-                } else if (is_global || is_shared || is_scratch) {
-                        unsigned seg = is_global ? LDST_GLOBAL : (is_shared ? LDST_SHARED : LDST_SCRATCH);
-                        emit_global(ctx, &instr->instr, true, reg, src_offset, seg);
-                } else if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->inputs->is_blend) {
-                        emit_varying_read(ctx, reg, offset, nr_comp, component, indirect_offset, t | nir_dest_bit_size(instr->dest), is_flat);
-                } else if (ctx->inputs->is_blend) {
-                        /* ctx->blend_input will be precoloured to r0/r2, where
-                         * the input is preloaded */
-
-                        unsigned *input = offset ? &ctx->blend_src1 : &ctx->blend_input;
-
-                        if (*input == ~0)
-                                *input = reg;
-                        else
-                                emit_mir_instruction(ctx, v_mov(*input, reg));
-                } else if (ctx->stage == MESA_SHADER_VERTEX) {
-                        emit_attr_read(ctx, reg, offset, nr_comp, t);
-                } else {
-                        DBG("Unknown load\n");
-                        assert(0);
-                }
-
-                break;
-        }
-
-        /* Handled together with load_interpolated_input */
-        case nir_intrinsic_load_barycentric_pixel:
-        case nir_intrinsic_load_barycentric_centroid:
-        case nir_intrinsic_load_barycentric_sample:
-                break;
-
-        /* Reads 128-bit value raw off the tilebuffer during blending, tasty */
-
-        case nir_intrinsic_load_raw_output_pan: {
-                reg = nir_dest_index(&instr->dest);
-
-                /* T720 and below use different blend opcodes with slightly
-                 * different semantics than T760 and up */
-
-                midgard_instruction ld = m_ld_tilebuffer_raw(reg, 0);
-
-                unsigned target = output_load_rt_addr(ctx, instr);
-                ld.load_store.index_comp = target & 0x3;
-                ld.load_store.index_reg = target >> 2;
-
-                if (nir_src_is_const(instr->src[0])) {
-                        unsigned sample = nir_src_as_uint(instr->src[0]);
-                        ld.load_store.arg_comp = sample & 0x3;
-                        ld.load_store.arg_reg = sample >> 2;
-                } else {
-                        /* Enable sample index via register. */
-                        ld.load_store.signed_offset |= 1;
-                        ld.src[1] = nir_src_index(ctx, &instr->src[0]);
-                        ld.src_types[1] = nir_type_int32;
-                }
-
-                if (ctx->quirks & MIDGARD_OLD_BLEND) {
-                        ld.op = midgard_op_ld_special_32u;
-                        ld.load_store.signed_offset = PACK_LDST_SELECTOR_OFS(16);
-                        ld.load_store.index_reg = REGISTER_LDST_ZERO;
-                }
-
-                emit_mir_instruction(ctx, ld);
-                break;
-        }
-
-        case nir_intrinsic_load_output: {
-                reg = nir_dest_index(&instr->dest);
-
-                unsigned bits = nir_dest_bit_size(instr->dest);
-
-                midgard_instruction ld;
-                if (bits == 16)
-                        ld = m_ld_tilebuffer_16f(reg, 0);
-                else
-                        ld = m_ld_tilebuffer_32f(reg, 0);
-
-                unsigned index = output_load_rt_addr(ctx, instr);
-                ld.load_store.index_comp = index & 0x3;
-                ld.load_store.index_reg = index >> 2;
-
-                for (unsigned c = 4; c < 16; ++c)
-                        ld.swizzle[0][c] = 0;
-
-                if (ctx->quirks & MIDGARD_OLD_BLEND) {
-                        if (bits == 16)
-                                ld.op = midgard_op_ld_special_16f;
-                        else
-                                ld.op = midgard_op_ld_special_32f;
-                        ld.load_store.signed_offset = PACK_LDST_SELECTOR_OFS(1);
-                        ld.load_store.index_reg = REGISTER_LDST_ZERO;
-                }
-
-                emit_mir_instruction(ctx, ld);
-                break;
-        }
-
-        case nir_intrinsic_store_output:
-        case nir_intrinsic_store_combined_output_pan:
-                assert(nir_src_is_const(instr->src[1]) && "no indirect outputs");
-
-                reg = nir_src_index(ctx, &instr->src[0]);
-
-                if (ctx->stage == MESA_SHADER_FRAGMENT) {
-                        bool combined = instr->intrinsic ==
-                                nir_intrinsic_store_combined_output_pan;
-
-                        enum midgard_rt_id rt;
-
-                        unsigned reg_z = ~0, reg_s = ~0, reg_2 = ~0;
-                        unsigned writeout = PAN_WRITEOUT_C;
-                        if (combined) {
-                                writeout = nir_intrinsic_component(instr);
-                                if (writeout & PAN_WRITEOUT_Z)
-                                        reg_z = nir_src_index(ctx, &instr->src[2]);
-                                if (writeout & PAN_WRITEOUT_S)
-                                        reg_s = nir_src_index(ctx, &instr->src[3]);
-                                if (writeout & PAN_WRITEOUT_2)
-                                        reg_2 = nir_src_index(ctx, &instr->src[4]);
-                        }
-
-                        if (writeout & PAN_WRITEOUT_C) {
-                                nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
-
-                                rt = MIDGARD_COLOR_RT0 +
-                                     (sem.location - FRAG_RESULT_DATA0);
-                        } else {
-                                rt = MIDGARD_ZS_RT;
-                        }
-
-                        /* Dual-source blend writeout is done by leaving the
-                         * value in r2 for the blend shader to use. */
-                        if (~reg_2) {
-                                if (instr->src[4].is_ssa) {
-                                        emit_explicit_constant(ctx, reg_2, reg_2);
-
-                                        unsigned out = make_compiler_temp(ctx);
-
-                                        midgard_instruction ins = v_mov(reg_2, out);
-                                        emit_mir_instruction(ctx, ins);
-
-                                        ctx->blend_src1 = out;
-                                } else {
-                                        ctx->blend_src1 = reg_2;
-                                }
-                        }
-
-                        emit_fragment_store(ctx, reg, reg_z, reg_s, rt, 0);
-                } else if (ctx->stage == MESA_SHADER_VERTEX) {
-                        assert(instr->intrinsic == nir_intrinsic_store_output);
-
-                        /* We should have been vectorized, though we don't
-                         * currently check that st_vary is emitted only once
-                         * per slot (this is relevant, since there's not a mask
-                         * parameter available on the store [set to 0 by the
-                         * blob]). We do respect the component by adjusting the
-                         * swizzle. If this is a constant source, we'll need to
-                         * emit that explicitly. */
-
-                        emit_explicit_constant(ctx, reg, reg);
-
-                        offset = nir_intrinsic_base(instr) + nir_src_as_uint(instr->src[1]);
-
-                        unsigned dst_component = nir_intrinsic_component(instr);
-                        unsigned nr_comp = nir_src_num_components(instr->src[0]);
-
-                        /* ABI: Format controlled by the attribute descriptor.
-                         * This simplifies flat shading, although it prevents
-                         * certain (unimplemented) 16-bit optimizations.
-                         *
-                         * In particular, it lets the driver handle internal
-                         * TGSI shaders that set flat in the VS but smooth in
-                         * the FS. This matches our handling on Bifrost.
-                         */
-                        bool auto32 = true;
-                        assert(nir_alu_type_get_type_size(nir_intrinsic_src_type(instr)) == 32);
-
-                        /* ABI: varyings in the secondary attribute table */
-                        bool secondary_table = true;
-
-                        midgard_instruction st = m_st_vary_32(reg, PACK_LDST_ATTRIB_OFS(offset));
-                        st.load_store.arg_reg = REGISTER_LDST_ZERO;
-                        st.load_store.index_reg = REGISTER_LDST_ZERO;
-
-                        /* Attribute instruction uses these 2-bits for the
-                         * a32 and table bits, pack this specially.
-                         */
-                        st.load_store.index_format = (auto32 ? (1 << 0) : 0) |
-                                                     (secondary_table ? (1 << 1) : 0);
-
-                        /* nir_intrinsic_component(store_intr) encodes the
-                         * destination component start. Source component offset
-                         * adjustment is taken care of in
-                         * install_registers_instr(), when offset_swizzle() is
-                         * called.
-                         */
-                        unsigned src_component = COMPONENT_X;
-
-                        assert(nr_comp > 0);
-                        for (unsigned i = 0; i < ARRAY_SIZE(st.swizzle); ++i) {
-                                st.swizzle[0][i] = src_component;
-                                if (i >= dst_component && i < dst_component + nr_comp - 1)
-                                        src_component++;
-                        }
-
-                        emit_mir_instruction(ctx, st);
-                } else {
-                        DBG("Unknown store\n");
-                        assert(0);
-                }
-
-                break;
-
-        /* Special case of store_output for lowered blend shaders */
-        case nir_intrinsic_store_raw_output_pan:
-                assert (ctx->stage == MESA_SHADER_FRAGMENT);
-                reg = nir_src_index(ctx, &instr->src[0]);
-                for (unsigned s = 0; s < ctx->blend_sample_iterations; s++)
-                        emit_fragment_store(ctx, reg, ~0, ~0,
-                                            ctx->inputs->blend.rt + MIDGARD_COLOR_RT0,
-                                            s);
-                break;
-
-        case nir_intrinsic_store_global:
-        case nir_intrinsic_store_shared:
-        case nir_intrinsic_store_scratch:
-                reg = nir_src_index(ctx, &instr->src[0]);
-                emit_explicit_constant(ctx, reg, reg);
-
-                unsigned seg;
-                if (instr->intrinsic == nir_intrinsic_store_global)
-                        seg = LDST_GLOBAL;
-                else if (instr->intrinsic == nir_intrinsic_store_shared)
-                        seg = LDST_SHARED;
-                else
-                        seg = LDST_SCRATCH;
-
-                emit_global(ctx, &instr->instr, false, reg, &instr->src[1], seg);
-                break;
-
-        case nir_intrinsic_load_ssbo_address:
-        case nir_intrinsic_load_xfb_address:
-                emit_sysval_read(ctx, &instr->instr, 2, 0);
-                break;
-
-        case nir_intrinsic_load_first_vertex:
-        case nir_intrinsic_load_work_dim:
-        case nir_intrinsic_load_num_vertices:
-                emit_sysval_read(ctx, &instr->instr, 1, 0);
-                break;
-
-        case nir_intrinsic_load_base_vertex:
-                emit_sysval_read(ctx, &instr->instr, 1, 4);
-                break;
-
-        case nir_intrinsic_load_base_instance:
-        case nir_intrinsic_get_ssbo_size:
-                emit_sysval_read(ctx, &instr->instr, 1, 8);
-                break;
-
-        case nir_intrinsic_load_sample_positions_pan:
-                emit_sysval_read(ctx, &instr->instr, 2, 0);
-                break;
-
-        case nir_intrinsic_load_viewport_scale:
-        case nir_intrinsic_load_viewport_offset:
-        case nir_intrinsic_load_num_workgroups:
-        case nir_intrinsic_load_sampler_lod_parameters_pan:
-        case nir_intrinsic_load_workgroup_size:
-                emit_sysval_read(ctx, &instr->instr, 3, 0);
-                break;
-
-        case nir_intrinsic_load_blend_const_color_rgba:
-                emit_sysval_read(ctx, &instr->instr, 4, 0);
-                break;
-
-        case nir_intrinsic_load_workgroup_id:
-        case nir_intrinsic_load_local_invocation_id:
-        case nir_intrinsic_load_global_invocation_id:
-        case nir_intrinsic_load_global_invocation_id_zero_base:
-                emit_compute_builtin(ctx, instr);
-                break;
-
-        case nir_intrinsic_load_vertex_id_zero_base:
-        case nir_intrinsic_load_instance_id:
-                emit_vertex_builtin(ctx, instr);
-                break;
-
-        case nir_intrinsic_load_sample_mask_in:
-                emit_special(ctx, instr, 96);
-                break;
-
-        case nir_intrinsic_load_sample_id:
-                emit_special(ctx, instr, 97);
-                break;
-
-        /* Midgard doesn't seem to want special handling, though we do need to
-         * take care when scheduling to avoid incorrect reordering.
-         */
-        case nir_intrinsic_memory_barrier:
-        case nir_intrinsic_memory_barrier_buffer:
-        case nir_intrinsic_memory_barrier_image:
-        case nir_intrinsic_memory_barrier_shared:
-        case nir_intrinsic_group_memory_barrier:
-                schedule_barrier(ctx);
-                break;
-
-        case nir_intrinsic_control_barrier:
-                schedule_barrier(ctx);
-                emit_control_barrier(ctx);
-                schedule_barrier(ctx);
-                break;
-
-        ATOMIC_CASE(ctx, instr, add, add);
-        ATOMIC_CASE(ctx, instr, and, and);
-        ATOMIC_CASE(ctx, instr, comp_swap, cmpxchg);
-        ATOMIC_CASE(ctx, instr, exchange, xchg);
-        ATOMIC_CASE(ctx, instr, imax, imax);
-        ATOMIC_CASE(ctx, instr, imin, imin);
-        ATOMIC_CASE(ctx, instr, or, or);
-        ATOMIC_CASE(ctx, instr, umax, umax);
-        ATOMIC_CASE(ctx, instr, umin, umin);
-        ATOMIC_CASE(ctx, instr, xor, xor);
-
-        IMAGE_ATOMIC_CASE(ctx, instr, add, add);
-        IMAGE_ATOMIC_CASE(ctx, instr, and, and);
-        IMAGE_ATOMIC_CASE(ctx, instr, comp_swap, cmpxchg);
-        IMAGE_ATOMIC_CASE(ctx, instr, exchange, xchg);
-        IMAGE_ATOMIC_CASE(ctx, instr, imax, imax);
-        IMAGE_ATOMIC_CASE(ctx, instr, imin, imin);
-        IMAGE_ATOMIC_CASE(ctx, instr, or, or);
-        IMAGE_ATOMIC_CASE(ctx, instr, umax, umax);
-        IMAGE_ATOMIC_CASE(ctx, instr, umin, umin);
-        IMAGE_ATOMIC_CASE(ctx, instr, xor, xor);
-
-        default:
-                fprintf(stderr, "Unhandled intrinsic %s\n", nir_intrinsic_infos[instr->intrinsic].name);
-                assert(0);
-                break;
-        }
+   unsigned offset = 0, reg;
+
+   switch (instr->intrinsic) {
+   case nir_intrinsic_discard_if:
+   case nir_intrinsic_discard: {
+      bool conditional = instr->intrinsic == nir_intrinsic_discard_if;
+      struct midgard_instruction discard = v_branch(conditional, false);
+      discard.branch.target_type = TARGET_DISCARD;
+
+      if (conditional) {
+         discard.src[0] = mir_get_branch_cond(
+            &instr->src[0], &discard.branch.invert_conditional);
+         discard.src_types[0] = nir_type_uint32;
+      }
+
+      emit_mir_instruction(ctx, discard);
+      schedule_barrier(ctx);
+
+      break;
+   }
+
+   case nir_intrinsic_image_load:
+   case nir_intrinsic_image_store:
+      emit_image_op(ctx, instr, false);
+      break;
+
+   case nir_intrinsic_image_size: {
+      unsigned nr_comp = nir_intrinsic_dest_components(instr);
+      emit_sysval_read(ctx, &instr->instr, nr_comp, 0);
+      break;
+   }
+
+   case nir_intrinsic_load_ubo:
+   case nir_intrinsic_load_global:
+   case nir_intrinsic_load_global_constant:
+   case nir_intrinsic_load_shared:
+   case nir_intrinsic_load_scratch:
+   case nir_intrinsic_load_input:
+   case nir_intrinsic_load_interpolated_input: {
+      bool is_ubo = instr->intrinsic == nir_intrinsic_load_ubo;
+      bool is_global = instr->intrinsic == nir_intrinsic_load_global ||
+                       instr->intrinsic == nir_intrinsic_load_global_constant;
+      bool is_shared = instr->intrinsic == nir_intrinsic_load_shared;
+      bool is_scratch = instr->intrinsic == nir_intrinsic_load_scratch;
+      bool is_flat = instr->intrinsic == nir_intrinsic_load_input;
+      bool is_interp =
+         instr->intrinsic == nir_intrinsic_load_interpolated_input;
+
+      /* Get the base type of the intrinsic */
+      /* TODO: Infer type? Does it matter? */
+      nir_alu_type t = (is_interp) ? nir_type_float
+                       : (is_flat) ? nir_intrinsic_dest_type(instr)
+                                   : nir_type_uint;
+
+      t = nir_alu_type_get_base_type(t);
+
+      if (!(is_ubo || is_global || is_scratch)) {
+         offset = nir_intrinsic_base(instr);
+      }
+
+      unsigned nr_comp = nir_intrinsic_dest_components(instr);
+
+      nir_src *src_offset = nir_get_io_offset_src(instr);
+
+      bool direct = nir_src_is_const(*src_offset);
+      nir_src *indirect_offset = direct ? NULL : src_offset;
+
+      if (direct)
+         offset += nir_src_as_uint(*src_offset);
+
+      /* We may need to apply a fractional offset */
+      int component =
+         (is_flat || is_interp) ? nir_intrinsic_component(instr) : 0;
+      reg = nir_dest_index(&instr->dest);
+
+      if (is_ubo) {
+         nir_src index = instr->src[0];
+
+         /* TODO: Is indirect block number possible? */
+         assert(nir_src_is_const(index));
+
+         uint32_t uindex = nir_src_as_uint(index);
+         emit_ubo_read(ctx, &instr->instr, reg, offset, indirect_offset, 0,
+                       uindex, nr_comp);
+      } else if (is_global || is_shared || is_scratch) {
+         unsigned seg =
+            is_global ? LDST_GLOBAL : (is_shared ? LDST_SHARED : LDST_SCRATCH);
+         emit_global(ctx, &instr->instr, true, reg, src_offset, seg);
+      } else if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->inputs->is_blend) {
+         emit_varying_read(ctx, reg, offset, nr_comp, component,
+                           indirect_offset, t | nir_dest_bit_size(instr->dest),
+                           is_flat);
+      } else if (ctx->inputs->is_blend) {
+         /* ctx->blend_input will be precoloured to r0/r2, where
+          * the input is preloaded */
+
+         unsigned *input = offset ? &ctx->blend_src1 : &ctx->blend_input;
+
+         if (*input == ~0)
+            *input = reg;
+         else
+            emit_mir_instruction(ctx, v_mov(*input, reg));
+      } else if (ctx->stage == MESA_SHADER_VERTEX) {
+         emit_attr_read(ctx, reg, offset, nr_comp, t);
+      } else {
+         DBG("Unknown load\n");
+         assert(0);
+      }
+
+      break;
+   }
+
+   /* Handled together with load_interpolated_input */
+   case nir_intrinsic_load_barycentric_pixel:
+   case nir_intrinsic_load_barycentric_centroid:
+   case nir_intrinsic_load_barycentric_sample:
+      break;
+
+      /* Reads 128-bit value raw off the tilebuffer during blending, tasty */
+
+   case nir_intrinsic_load_raw_output_pan: {
+      reg = nir_dest_index(&instr->dest);
+
+      /* T720 and below use different blend opcodes with slightly
+       * different semantics than T760 and up */
+
+      midgard_instruction ld = m_ld_tilebuffer_raw(reg, 0);
+
+      unsigned target = output_load_rt_addr(ctx, instr);
+      ld.load_store.index_comp = target & 0x3;
+      ld.load_store.index_reg = target >> 2;
+
+      if (nir_src_is_const(instr->src[0])) {
+         unsigned sample = nir_src_as_uint(instr->src[0]);
+         ld.load_store.arg_comp = sample & 0x3;
+         ld.load_store.arg_reg = sample >> 2;
+      } else {
+         /* Enable sample index via register. */
+         ld.load_store.signed_offset |= 1;
+         ld.src[1] = nir_src_index(ctx, &instr->src[0]);
+         ld.src_types[1] = nir_type_int32;
+      }
+
+      if (ctx->quirks & MIDGARD_OLD_BLEND) {
+         ld.op = midgard_op_ld_special_32u;
+         ld.load_store.signed_offset = PACK_LDST_SELECTOR_OFS(16);
+         ld.load_store.index_reg = REGISTER_LDST_ZERO;
+      }
+
+      emit_mir_instruction(ctx, ld);
+      break;
+   }
+
+   case nir_intrinsic_load_output: {
+      reg = nir_dest_index(&instr->dest);
+
+      unsigned bits = nir_dest_bit_size(instr->dest);
+
+      midgard_instruction ld;
+      if (bits == 16)
+         ld = m_ld_tilebuffer_16f(reg, 0);
+      else
+         ld = m_ld_tilebuffer_32f(reg, 0);
+
+      unsigned index = output_load_rt_addr(ctx, instr);
+      ld.load_store.index_comp = index & 0x3;
+      ld.load_store.index_reg = index >> 2;
+
+      for (unsigned c = 4; c < 16; ++c)
+         ld.swizzle[0][c] = 0;
+
+      if (ctx->quirks & MIDGARD_OLD_BLEND) {
+         if (bits == 16)
+            ld.op = midgard_op_ld_special_16f;
+         else
+            ld.op = midgard_op_ld_special_32f;
+         ld.load_store.signed_offset = PACK_LDST_SELECTOR_OFS(1);
+         ld.load_store.index_reg = REGISTER_LDST_ZERO;
+      }
+
+      emit_mir_instruction(ctx, ld);
+      break;
+   }
+
+   case nir_intrinsic_store_output:
+   case nir_intrinsic_store_combined_output_pan:
+      assert(nir_src_is_const(instr->src[1]) && "no indirect outputs");
+
+      reg = nir_src_index(ctx, &instr->src[0]);
+
+      if (ctx->stage == MESA_SHADER_FRAGMENT) {
+         bool combined =
+            instr->intrinsic == nir_intrinsic_store_combined_output_pan;
+
+         enum midgard_rt_id rt;
+
+         unsigned reg_z = ~0, reg_s = ~0, reg_2 = ~0;
+         unsigned writeout = PAN_WRITEOUT_C;
+         if (combined) {
+            writeout = nir_intrinsic_component(instr);
+            if (writeout & PAN_WRITEOUT_Z)
+               reg_z = nir_src_index(ctx, &instr->src[2]);
+            if (writeout & PAN_WRITEOUT_S)
+               reg_s = nir_src_index(ctx, &instr->src[3]);
+            if (writeout & PAN_WRITEOUT_2)
+               reg_2 = nir_src_index(ctx, &instr->src[4]);
+         }
+
+         if (writeout & PAN_WRITEOUT_C) {
+            nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
+
+            rt = MIDGARD_COLOR_RT0 + (sem.location - FRAG_RESULT_DATA0);
+         } else {
+            rt = MIDGARD_ZS_RT;
+         }
+
+         /* Dual-source blend writeout is done by leaving the
+          * value in r2 for the blend shader to use. */
+         if (~reg_2) {
+            if (instr->src[4].is_ssa) {
+               emit_explicit_constant(ctx, reg_2, reg_2);
+
+               unsigned out = make_compiler_temp(ctx);
+
+               midgard_instruction ins = v_mov(reg_2, out);
+               emit_mir_instruction(ctx, ins);
+
+               ctx->blend_src1 = out;
+            } else {
+               ctx->blend_src1 = reg_2;
+            }
+         }
+
+         emit_fragment_store(ctx, reg, reg_z, reg_s, rt, 0);
+      } else if (ctx->stage == MESA_SHADER_VERTEX) {
+         assert(instr->intrinsic == nir_intrinsic_store_output);
+
+         /* We should have been vectorized, though we don't
+          * currently check that st_vary is emitted only once
+          * per slot (this is relevant, since there's not a mask
+          * parameter available on the store [set to 0 by the
+          * blob]). We do respect the component by adjusting the
+          * swizzle. If this is a constant source, we'll need to
+          * emit that explicitly. */
+
+         emit_explicit_constant(ctx, reg, reg);
+
+         offset = nir_intrinsic_base(instr) + nir_src_as_uint(instr->src[1]);
+
+         unsigned dst_component = nir_intrinsic_component(instr);
+         unsigned nr_comp = nir_src_num_components(instr->src[0]);
+
+         /* ABI: Format controlled by the attribute descriptor.
+          * This simplifies flat shading, although it prevents
+          * certain (unimplemented) 16-bit optimizations.
+          *
+          * In particular, it lets the driver handle internal
+          * TGSI shaders that set flat in the VS but smooth in
+          * the FS. This matches our handling on Bifrost.
+          */
+         bool auto32 = true;
+         assert(nir_alu_type_get_type_size(nir_intrinsic_src_type(instr)) ==
+                32);
+
+         /* ABI: varyings in the secondary attribute table */
+         bool secondary_table = true;
+
+         midgard_instruction st =
+            m_st_vary_32(reg, PACK_LDST_ATTRIB_OFS(offset));
+         st.load_store.arg_reg = REGISTER_LDST_ZERO;
+         st.load_store.index_reg = REGISTER_LDST_ZERO;
+
+         /* Attribute instruction uses these 2-bits for the
+          * a32 and table bits, pack this specially.
+          */
+         st.load_store.index_format =
+            (auto32 ? (1 << 0) : 0) | (secondary_table ? (1 << 1) : 0);
+
+         /* nir_intrinsic_component(store_intr) encodes the
+          * destination component start. Source component offset
+          * adjustment is taken care of in
+          * install_registers_instr(), when offset_swizzle() is
+          * called.
+          */
+         unsigned src_component = COMPONENT_X;
+
+         assert(nr_comp > 0);
+         for (unsigned i = 0; i < ARRAY_SIZE(st.swizzle); ++i) {
+            st.swizzle[0][i] = src_component;
+            if (i >= dst_component && i < dst_component + nr_comp - 1)
+               src_component++;
+         }
+
+         emit_mir_instruction(ctx, st);
+      } else {
+         DBG("Unknown store\n");
+         assert(0);
+      }
+
+      break;
+
+   /* Special case of store_output for lowered blend shaders */
+   case nir_intrinsic_store_raw_output_pan:
+      assert(ctx->stage == MESA_SHADER_FRAGMENT);
+      reg = nir_src_index(ctx, &instr->src[0]);
+      for (unsigned s = 0; s < ctx->blend_sample_iterations; s++)
+         emit_fragment_store(ctx, reg, ~0, ~0,
+                             ctx->inputs->blend.rt + MIDGARD_COLOR_RT0, s);
+      break;
+
+   case nir_intrinsic_store_global:
+   case nir_intrinsic_store_shared:
+   case nir_intrinsic_store_scratch:
+      reg = nir_src_index(ctx, &instr->src[0]);
+      emit_explicit_constant(ctx, reg, reg);
+
+      unsigned seg;
+      if (instr->intrinsic == nir_intrinsic_store_global)
+         seg = LDST_GLOBAL;
+      else if (instr->intrinsic == nir_intrinsic_store_shared)
+         seg = LDST_SHARED;
+      else
+         seg = LDST_SCRATCH;
+
+      emit_global(ctx, &instr->instr, false, reg, &instr->src[1], seg);
+      break;
+
+   case nir_intrinsic_load_ssbo_address:
+   case nir_intrinsic_load_xfb_address:
+      emit_sysval_read(ctx, &instr->instr, 2, 0);
+      break;
+
+   case nir_intrinsic_load_first_vertex:
+   case nir_intrinsic_load_work_dim:
+   case nir_intrinsic_load_num_vertices:
+      emit_sysval_read(ctx, &instr->instr, 1, 0);
+      break;
+
+   case nir_intrinsic_load_base_vertex:
+      emit_sysval_read(ctx, &instr->instr, 1, 4);
+      break;
+
+   case nir_intrinsic_load_base_instance:
+   case nir_intrinsic_get_ssbo_size:
+      emit_sysval_read(ctx, &instr->instr, 1, 8);
+      break;
+
+   case nir_intrinsic_load_sample_positions_pan:
+      emit_sysval_read(ctx, &instr->instr, 2, 0);
+      break;
+
+   case nir_intrinsic_load_viewport_scale:
+   case nir_intrinsic_load_viewport_offset:
+   case nir_intrinsic_load_num_workgroups:
+   case nir_intrinsic_load_sampler_lod_parameters_pan:
+   case nir_intrinsic_load_workgroup_size:
+      emit_sysval_read(ctx, &instr->instr, 3, 0);
+      break;
+
+   case nir_intrinsic_load_blend_const_color_rgba:
+      emit_sysval_read(ctx, &instr->instr, 4, 0);
+      break;
+
+   case nir_intrinsic_load_workgroup_id:
+   case nir_intrinsic_load_local_invocation_id:
+   case nir_intrinsic_load_global_invocation_id:
+   case nir_intrinsic_load_global_invocation_id_zero_base:
+      emit_compute_builtin(ctx, instr);
+      break;
+
+   case nir_intrinsic_load_vertex_id_zero_base:
+   case nir_intrinsic_load_instance_id:
+      emit_vertex_builtin(ctx, instr);
+      break;
+
+   case nir_intrinsic_load_sample_mask_in:
+      emit_special(ctx, instr, 96);
+      break;
+
+   case nir_intrinsic_load_sample_id:
+      emit_special(ctx, instr, 97);
+      break;
+
+   /* Midgard doesn't seem to want special handling, though we do need to
+    * take care when scheduling to avoid incorrect reordering.
+    */
+   case nir_intrinsic_memory_barrier:
+   case nir_intrinsic_memory_barrier_buffer:
+   case nir_intrinsic_memory_barrier_image:
+   case nir_intrinsic_memory_barrier_shared:
+   case nir_intrinsic_group_memory_barrier:
+      schedule_barrier(ctx);
+      break;
+
+   case nir_intrinsic_control_barrier:
+      schedule_barrier(ctx);
+      emit_control_barrier(ctx);
+      schedule_barrier(ctx);
+      break;
+
+      ATOMIC_CASE(ctx, instr, add, add);
+      ATOMIC_CASE(ctx, instr, and, and);
+      ATOMIC_CASE(ctx, instr, comp_swap, cmpxchg);
+      ATOMIC_CASE(ctx, instr, exchange, xchg);
+      ATOMIC_CASE(ctx, instr, imax, imax);
+      ATOMIC_CASE(ctx, instr, imin, imin);
+      ATOMIC_CASE(ctx, instr, or, or);
+      ATOMIC_CASE(ctx, instr, umax, umax);
+      ATOMIC_CASE(ctx, instr, umin, umin);
+      ATOMIC_CASE(ctx, instr, xor, xor);
+
+      IMAGE_ATOMIC_CASE(ctx, instr, add, add);
+      IMAGE_ATOMIC_CASE(ctx, instr, and, and);
+      IMAGE_ATOMIC_CASE(ctx, instr, comp_swap, cmpxchg);
+      IMAGE_ATOMIC_CASE(ctx, instr, exchange, xchg);
+      IMAGE_ATOMIC_CASE(ctx, instr, imax, imax);
+      IMAGE_ATOMIC_CASE(ctx, instr, imin, imin);
+      IMAGE_ATOMIC_CASE(ctx, instr, or, or);
+      IMAGE_ATOMIC_CASE(ctx, instr, umax, umax);
+      IMAGE_ATOMIC_CASE(ctx, instr, umin, umin);
+      IMAGE_ATOMIC_CASE(ctx, instr, xor, xor);
+
+   default:
+      fprintf(stderr, "Unhandled intrinsic %s\n",
+              nir_intrinsic_infos[instr->intrinsic].name);
+      assert(0);
+      break;
+   }
 }
 
 /* Returns dimension with 0 special casing cubemaps */
 static unsigned
 midgard_tex_format(enum glsl_sampler_dim dim)
 {
-        switch (dim) {
-        case GLSL_SAMPLER_DIM_1D:
-        case GLSL_SAMPLER_DIM_BUF:
-                return 1;
+   switch (dim) {
+   case GLSL_SAMPLER_DIM_1D:
+   case GLSL_SAMPLER_DIM_BUF:
+      return 1;
 
-        case GLSL_SAMPLER_DIM_2D:
-        case GLSL_SAMPLER_DIM_MS:
-        case GLSL_SAMPLER_DIM_EXTERNAL:
-        case GLSL_SAMPLER_DIM_RECT:
-                return 2;
+   case GLSL_SAMPLER_DIM_2D:
+   case GLSL_SAMPLER_DIM_MS:
+   case GLSL_SAMPLER_DIM_EXTERNAL:
+   case GLSL_SAMPLER_DIM_RECT:
+      return 2;
 
-        case GLSL_SAMPLER_DIM_3D:
-                return 3;
+   case GLSL_SAMPLER_DIM_3D:
+      return 3;
 
-        case GLSL_SAMPLER_DIM_CUBE:
-                return 0;
+   case GLSL_SAMPLER_DIM_CUBE:
+      return 0;
 
-        default:
-                DBG("Unknown sampler dim type\n");
-                assert(0);
-                return 0;
-        }
+   default:
+      DBG("Unknown sampler dim type\n");
+      assert(0);
+      return 0;
+   }
 }
 
 /* Tries to attach an explicit LOD or bias as a constant. Returns whether this
  * was successful */
 
 static bool
-pan_attach_constant_bias(
-        compiler_context *ctx,
-        nir_src lod,
-        midgard_texture_word *word)
+pan_attach_constant_bias(compiler_context *ctx, nir_src lod,
+                         midgard_texture_word *word)
 {
-        /* To attach as constant, it has to *be* constant */
+   /* To attach as constant, it has to *be* constant */
 
-        if (!nir_src_is_const(lod))
-                return false;
+   if (!nir_src_is_const(lod))
+      return false;
 
-        float f = nir_src_as_float(lod);
+   float f = nir_src_as_float(lod);
 
-        /* Break into fixed-point */
-        signed lod_int = f;
-        float lod_frac = f - lod_int;
+   /* Break into fixed-point */
+   signed lod_int = f;
+   float lod_frac = f - lod_int;
 
-        /* Carry over negative fractions */
-        if (lod_frac < 0.0) {
-                lod_int--;
-                lod_frac += 1.0;
-        }
+   /* Carry over negative fractions */
+   if (lod_frac < 0.0) {
+      lod_int--;
+      lod_frac += 1.0;
+   }
 
-        /* Encode */
-        word->bias = float_to_ubyte(lod_frac);
-        word->bias_int = lod_int;
+   /* Encode */
+   word->bias = float_to_ubyte(lod_frac);
+   word->bias_int = lod_int;
 
-        return true;
+   return true;
 }
 
 static enum mali_texture_mode
 mdg_texture_mode(nir_tex_instr *instr)
 {
-        if (instr->op == nir_texop_tg4 && instr->is_shadow)
-                return TEXTURE_GATHER_SHADOW;
-        else if (instr->op == nir_texop_tg4)
-                return TEXTURE_GATHER_X + instr->component;
-        else if (instr->is_shadow)
-                return TEXTURE_SHADOW;
-        else
-                return TEXTURE_NORMAL;
+   if (instr->op == nir_texop_tg4 && instr->is_shadow)
+      return TEXTURE_GATHER_SHADOW;
+   else if (instr->op == nir_texop_tg4)
+      return TEXTURE_GATHER_X + instr->component;
+   else if (instr->is_shadow)
+      return TEXTURE_SHADOW;
+   else
+      return TEXTURE_NORMAL;
 }
 
 static void
 set_tex_coord(compiler_context *ctx, nir_tex_instr *instr,
               midgard_instruction *ins)
 {
-        int coord_idx = nir_tex_instr_src_index(instr, nir_tex_src_coord);
+   int coord_idx = nir_tex_instr_src_index(instr, nir_tex_src_coord);
 
-        assert(coord_idx >= 0);
+   assert(coord_idx >= 0);
 
-        int comparator_idx = nir_tex_instr_src_index(instr, nir_tex_src_comparator);
-        int ms_idx = nir_tex_instr_src_index(instr, nir_tex_src_ms_index);
-        assert(comparator_idx < 0 || ms_idx < 0);
-        int ms_or_comparator_idx = ms_idx >= 0 ? ms_idx : comparator_idx;
+   int comparator_idx = nir_tex_instr_src_index(instr, nir_tex_src_comparator);
+   int ms_idx = nir_tex_instr_src_index(instr, nir_tex_src_ms_index);
+   assert(comparator_idx < 0 || ms_idx < 0);
+   int ms_or_comparator_idx = ms_idx >= 0 ? ms_idx : comparator_idx;
 
-        unsigned coords = nir_src_index(ctx, &instr->src[coord_idx].src);
+   unsigned coords = nir_src_index(ctx, &instr->src[coord_idx].src);
 
-        emit_explicit_constant(ctx, coords, coords);
+   emit_explicit_constant(ctx, coords, coords);
 
-        ins->src_types[1] = nir_tex_instr_src_type(instr, coord_idx) |
-                            nir_src_bit_size(instr->src[coord_idx].src);
+   ins->src_types[1] = nir_tex_instr_src_type(instr, coord_idx) |
+                       nir_src_bit_size(instr->src[coord_idx].src);
 
-        unsigned nr_comps = instr->coord_components;
-        unsigned written_mask = 0, write_mask = 0;
+   unsigned nr_comps = instr->coord_components;
+   unsigned written_mask = 0, write_mask = 0;
 
-        /* Initialize all components to coord.x which is expected to always be
-         * present. Swizzle is updated below based on the texture dimension
-         * and extra attributes that are packed in the coordinate argument.
-         */
-        for (unsigned c = 0; c < MIR_VEC_COMPONENTS; c++)
-                ins->swizzle[1][c] = COMPONENT_X;
+   /* Initialize all components to coord.x which is expected to always be
+    * present. Swizzle is updated below based on the texture dimension
+    * and extra attributes that are packed in the coordinate argument.
+    */
+   for (unsigned c = 0; c < MIR_VEC_COMPONENTS; c++)
+      ins->swizzle[1][c] = COMPONENT_X;
 
-        /* Shadow ref value is part of the coordinates if there's no comparator
-         * source, in that case it's always placed in the last component.
-         * Midgard wants the ref value in coord.z.
-         */
-        if (instr->is_shadow && comparator_idx < 0) {
-                ins->swizzle[1][COMPONENT_Z] = --nr_comps;
-                write_mask |= 1 << COMPONENT_Z;
-        }
+   /* Shadow ref value is part of the coordinates if there's no comparator
+    * source, in that case it's always placed in the last component.
+    * Midgard wants the ref value in coord.z.
+    */
+   if (instr->is_shadow && comparator_idx < 0) {
+      ins->swizzle[1][COMPONENT_Z] = --nr_comps;
+      write_mask |= 1 << COMPONENT_Z;
+   }
 
-        /* The array index is the last component if there's no shadow ref value
-         * or second last if there's one. We already decremented the number of
-         * components to account for the shadow ref value above.
-         * Midgard wants the array index in coord.w.
-         */
-        if (instr->is_array) {
-                ins->swizzle[1][COMPONENT_W] = --nr_comps;
-                write_mask |= 1 << COMPONENT_W;
-        }
+   /* The array index is the last component if there's no shadow ref value
+    * or second last if there's one. We already decremented the number of
+    * components to account for the shadow ref value above.
+    * Midgard wants the array index in coord.w.
+    */
+   if (instr->is_array) {
+      ins->swizzle[1][COMPONENT_W] = --nr_comps;
+      write_mask |= 1 << COMPONENT_W;
+   }
 
-        if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
-                /* texelFetch is undefined on samplerCube */
-                assert(ins->op != midgard_tex_op_fetch);
+   if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
+      /* texelFetch is undefined on samplerCube */
+      assert(ins->op != midgard_tex_op_fetch);
 
-                ins->src[1] = make_compiler_temp_reg(ctx);
+      ins->src[1] = make_compiler_temp_reg(ctx);
 
-                /* For cubemaps, we use a special ld/st op to select the face
-                 * and copy the xy into the texture register
-                 */
-                midgard_instruction ld = m_ld_cubemap_coords(ins->src[1], 0);
-                ld.src[1] = coords;
-                ld.src_types[1] = ins->src_types[1];
-                ld.mask = 0x3; /* xy */
-                ld.load_store.bitsize_toggle = true;
-                ld.swizzle[1][3] = COMPONENT_X;
-                emit_mir_instruction(ctx, ld);
+      /* For cubemaps, we use a special ld/st op to select the face
+       * and copy the xy into the texture register
+       */
+      midgard_instruction ld = m_ld_cubemap_coords(ins->src[1], 0);
+      ld.src[1] = coords;
+      ld.src_types[1] = ins->src_types[1];
+      ld.mask = 0x3; /* xy */
+      ld.load_store.bitsize_toggle = true;
+      ld.swizzle[1][3] = COMPONENT_X;
+      emit_mir_instruction(ctx, ld);
 
-                /* We packed cube coordiates (X,Y,Z) into (X,Y), update the
-                 * written mask accordingly and decrement the number of
-                 * components
-                 */
-                nr_comps--;
-                written_mask |= 3;
-        }
+      /* We packed cube coordiates (X,Y,Z) into (X,Y), update the
+       * written mask accordingly and decrement the number of
+       * components
+       */
+      nr_comps--;
+      written_mask |= 3;
+   }
 
-        /* Now flag tex coord components that have not been written yet */
-        write_mask |= mask_of(nr_comps) & ~written_mask;
-        for (unsigned c = 0; c < nr_comps; c++)
-                ins->swizzle[1][c] = c;
+   /* Now flag tex coord components that have not been written yet */
+   write_mask |= mask_of(nr_comps) & ~written_mask;
+   for (unsigned c = 0; c < nr_comps; c++)
+      ins->swizzle[1][c] = c;
 
-        /* Sample index and shadow ref are expected in coord.z */
-        if (ms_or_comparator_idx >= 0) {
-                assert(!((write_mask | written_mask) & (1 << COMPONENT_Z)));
+   /* Sample index and shadow ref are expected in coord.z */
+   if (ms_or_comparator_idx >= 0) {
+      assert(!((write_mask | written_mask) & (1 << COMPONENT_Z)));
 
-                unsigned sample_or_ref =
-                        nir_src_index(ctx, &instr->src[ms_or_comparator_idx].src);
+      unsigned sample_or_ref =
+         nir_src_index(ctx, &instr->src[ms_or_comparator_idx].src);
 
-                emit_explicit_constant(ctx, sample_or_ref, sample_or_ref);
+      emit_explicit_constant(ctx, sample_or_ref, sample_or_ref);
 
-                if (ins->src[1] == ~0)
-                        ins->src[1] = make_compiler_temp_reg(ctx);
+      if (ins->src[1] == ~0)
+         ins->src[1] = make_compiler_temp_reg(ctx);
 
-                midgard_instruction mov = v_mov(sample_or_ref, ins->src[1]);
+      midgard_instruction mov = v_mov(sample_or_ref, ins->src[1]);
 
-                for (unsigned c = 0; c < MIR_VEC_COMPONENTS; c++)
-                        mov.swizzle[1][c] = COMPONENT_X;
+      for (unsigned c = 0; c < MIR_VEC_COMPONENTS; c++)
+         mov.swizzle[1][c] = COMPONENT_X;
 
-                mov.mask = 1 << COMPONENT_Z;
-                written_mask |= 1 << COMPONENT_Z;
-                ins->swizzle[1][COMPONENT_Z] = COMPONENT_Z;
-                emit_mir_instruction(ctx, mov);
-        }
+      mov.mask = 1 << COMPONENT_Z;
+      written_mask |= 1 << COMPONENT_Z;
+      ins->swizzle[1][COMPONENT_Z] = COMPONENT_Z;
+      emit_mir_instruction(ctx, mov);
+   }
 
-        /* Texelfetch coordinates uses all four elements (xyz/index) regardless
-         * of texture dimensionality, which means it's necessary to zero the
-         * unused components to keep everything happy.
-         */
-        if (ins->op == midgard_tex_op_fetch &&
-            (written_mask | write_mask) != 0xF) {
-                if (ins->src[1] == ~0)
-                        ins->src[1] = make_compiler_temp_reg(ctx);
+   /* Texelfetch coordinates uses all four elements (xyz/index) regardless
+    * of texture dimensionality, which means it's necessary to zero the
+    * unused components to keep everything happy.
+    */
+   if (ins->op == midgard_tex_op_fetch && (written_mask | write_mask) != 0xF) {
+      if (ins->src[1] == ~0)
+         ins->src[1] = make_compiler_temp_reg(ctx);
 
-                /* mov index.zw, #0, or generalized */
-                midgard_instruction mov =
-                        v_mov(SSA_FIXED_REGISTER(REGISTER_CONSTANT), ins->src[1]);
-                mov.has_constants = true;
-                mov.mask = (written_mask | write_mask) ^ 0xF;
-                emit_mir_instruction(ctx, mov);
-                for (unsigned c = 0; c < MIR_VEC_COMPONENTS; c++) {
-                        if (mov.mask & (1 << c))
-                                ins->swizzle[1][c] = c;
-                }
-        }
+      /* mov index.zw, #0, or generalized */
+      midgard_instruction mov =
+         v_mov(SSA_FIXED_REGISTER(REGISTER_CONSTANT), ins->src[1]);
+      mov.has_constants = true;
+      mov.mask = (written_mask | write_mask) ^ 0xF;
+      emit_mir_instruction(ctx, mov);
+      for (unsigned c = 0; c < MIR_VEC_COMPONENTS; c++) {
+         if (mov.mask & (1 << c))
+            ins->swizzle[1][c] = c;
+      }
+   }
 
-        if (ins->src[1] == ~0) {
-                /* No temporary reg created, use the src coords directly */
-                ins->src[1] = coords;
-	} else if (write_mask) {
-                /* Move the remaining coordinates to the temporary reg */
-                midgard_instruction mov = v_mov(coords, ins->src[1]);
+   if (ins->src[1] == ~0) {
+      /* No temporary reg created, use the src coords directly */
+      ins->src[1] = coords;
+   } else if (write_mask) {
+      /* Move the remaining coordinates to the temporary reg */
+      midgard_instruction mov = v_mov(coords, ins->src[1]);
 
-                for (unsigned c = 0; c < MIR_VEC_COMPONENTS; c++) {
-                        if ((1 << c) & write_mask) {
-                                mov.swizzle[1][c] = ins->swizzle[1][c];
-                                ins->swizzle[1][c] = c;
-                        } else {
-                                mov.swizzle[1][c] = COMPONENT_X;
-                        }
-                }
+      for (unsigned c = 0; c < MIR_VEC_COMPONENTS; c++) {
+         if ((1 << c) & write_mask) {
+            mov.swizzle[1][c] = ins->swizzle[1][c];
+            ins->swizzle[1][c] = c;
+         } else {
+            mov.swizzle[1][c] = COMPONENT_X;
+         }
+      }
 
-                mov.mask = write_mask;
-                emit_mir_instruction(ctx, mov);
-        }
+      mov.mask = write_mask;
+      emit_mir_instruction(ctx, mov);
+   }
 }
 
 static void
 emit_texop_native(compiler_context *ctx, nir_tex_instr *instr,
                   unsigned midgard_texop)
 {
-        nir_dest *dest = &instr->dest;
+   nir_dest *dest = &instr->dest;
 
-        int texture_index = instr->texture_index;
-        int sampler_index = instr->sampler_index;
+   int texture_index = instr->texture_index;
+   int sampler_index = instr->sampler_index;
 
-        nir_alu_type dest_base = nir_alu_type_get_base_type(instr->dest_type);
+   nir_alu_type dest_base = nir_alu_type_get_base_type(instr->dest_type);
 
-        /* texture instructions support float outmods */
-        unsigned outmod = midgard_outmod_none;
-        if (dest_base == nir_type_float) {
-                outmod = mir_determine_float_outmod(ctx, &dest, 0);
-        }
+   /* texture instructions support float outmods */
+   unsigned outmod = midgard_outmod_none;
+   if (dest_base == nir_type_float) {
+      outmod = mir_determine_float_outmod(ctx, &dest, 0);
+   }
 
-        midgard_instruction ins = {
-                .type = TAG_TEXTURE_4,
-                .mask = 0xF,
-                .dest = nir_dest_index(dest),
-                .src = { ~0, ~0, ~0, ~0 },
-                .dest_type = instr->dest_type,
-                .swizzle = SWIZZLE_IDENTITY_4,
-                .outmod = outmod,
-                .op = midgard_texop,
-                .texture = {
-                        .format = midgard_tex_format(instr->sampler_dim),
-                        .texture_handle = texture_index,
-                        .sampler_handle = sampler_index,
-                        .mode = mdg_texture_mode(instr),
-                }
-        };
+   midgard_instruction ins = {
+      .type = TAG_TEXTURE_4,
+      .mask = 0xF,
+      .dest = nir_dest_index(dest),
+      .src = {~0, ~0, ~0, ~0},
+      .dest_type = instr->dest_type,
+      .swizzle = SWIZZLE_IDENTITY_4,
+      .outmod = outmod,
+      .op = midgard_texop,
+      .texture = {
+         .format = midgard_tex_format(instr->sampler_dim),
+         .texture_handle = texture_index,
+         .sampler_handle = sampler_index,
+         .mode = mdg_texture_mode(instr),
+      }};
 
-        if (instr->is_shadow && !instr->is_new_style_shadow && instr->op != nir_texop_tg4)
-           for (int i = 0; i < 4; ++i)
-              ins.swizzle[0][i] = COMPONENT_X;
+   if (instr->is_shadow && !instr->is_new_style_shadow &&
+       instr->op != nir_texop_tg4)
+      for (int i = 0; i < 4; ++i)
+         ins.swizzle[0][i] = COMPONENT_X;
 
-        for (unsigned i = 0; i < instr->num_srcs; ++i) {
-                int index = nir_src_index(ctx, &instr->src[i].src);
-                unsigned sz = nir_src_bit_size(instr->src[i].src);
-                nir_alu_type T = nir_tex_instr_src_type(instr, i) | sz;
+   for (unsigned i = 0; i < instr->num_srcs; ++i) {
+      int index = nir_src_index(ctx, &instr->src[i].src);
+      unsigned sz = nir_src_bit_size(instr->src[i].src);
+      nir_alu_type T = nir_tex_instr_src_type(instr, i) | sz;
 
-                switch (instr->src[i].src_type) {
-                case nir_tex_src_coord:
-                        set_tex_coord(ctx, instr, &ins);
-                        break;
+      switch (instr->src[i].src_type) {
+      case nir_tex_src_coord:
+         set_tex_coord(ctx, instr, &ins);
+         break;
 
-                case nir_tex_src_bias:
-                case nir_tex_src_lod: {
-                        /* Try as a constant if we can */
+      case nir_tex_src_bias:
+      case nir_tex_src_lod: {
+         /* Try as a constant if we can */
 
-                        bool is_txf = midgard_texop == midgard_tex_op_fetch;
-                        if (!is_txf && pan_attach_constant_bias(ctx, instr->src[i].src, &ins.texture))
-                                break;
+         bool is_txf = midgard_texop == midgard_tex_op_fetch;
+         if (!is_txf &&
+             pan_attach_constant_bias(ctx, instr->src[i].src, &ins.texture))
+            break;
 
-                        ins.texture.lod_register = true;
-                        ins.src[2] = index;
-                        ins.src_types[2] = T;
+         ins.texture.lod_register = true;
+         ins.src[2] = index;
+         ins.src_types[2] = T;
 
-                        for (unsigned c = 0; c < MIR_VEC_COMPONENTS; ++c)
-                                ins.swizzle[2][c] = COMPONENT_X;
+         for (unsigned c = 0; c < MIR_VEC_COMPONENTS; ++c)
+            ins.swizzle[2][c] = COMPONENT_X;
 
-                        emit_explicit_constant(ctx, index, index);
+         emit_explicit_constant(ctx, index, index);
 
-                        break;
-                };
+         break;
+      };
 
-                case nir_tex_src_offset: {
-                        ins.texture.offset_register = true;
-                        ins.src[3] = index;
-                        ins.src_types[3] = T;
+      case nir_tex_src_offset: {
+         ins.texture.offset_register = true;
+         ins.src[3] = index;
+         ins.src_types[3] = T;
 
-                        for (unsigned c = 0; c < MIR_VEC_COMPONENTS; ++c)
-                                ins.swizzle[3][c] = (c > COMPONENT_Z) ? 0 : c;
+         for (unsigned c = 0; c < MIR_VEC_COMPONENTS; ++c)
+            ins.swizzle[3][c] = (c > COMPONENT_Z) ? 0 : c;
 
-                        emit_explicit_constant(ctx, index, index);
-                        break;
-                };
+         emit_explicit_constant(ctx, index, index);
+         break;
+      };
 
-                case nir_tex_src_comparator:
-                case nir_tex_src_ms_index:
-                        /* Nothing to do, handled in set_tex_coord() */
-                        break;
+      case nir_tex_src_comparator:
+      case nir_tex_src_ms_index:
+         /* Nothing to do, handled in set_tex_coord() */
+         break;
 
-                default: {
-                        fprintf(stderr, "Unknown texture source type: %d\n", instr->src[i].src_type);
-                        assert(0);
-                }
-                }
-        }
+      default: {
+         fprintf(stderr, "Unknown texture source type: %d\n",
+                 instr->src[i].src_type);
+         assert(0);
+      }
+      }
+   }
 
-        emit_mir_instruction(ctx, ins);
+   emit_mir_instruction(ctx, ins);
 }
 
 static void
 emit_tex(compiler_context *ctx, nir_tex_instr *instr)
 {
-        switch (instr->op) {
-        case nir_texop_tex:
-        case nir_texop_txb:
-                emit_texop_native(ctx, instr, midgard_tex_op_normal);
-                break;
-        case nir_texop_txl:
-        case nir_texop_tg4:
-                emit_texop_native(ctx, instr, midgard_tex_op_gradient);
-                break;
-        case nir_texop_txf:
-        case nir_texop_txf_ms:
-                emit_texop_native(ctx, instr, midgard_tex_op_fetch);
-                break;
-        case nir_texop_txs:
-                emit_sysval_read(ctx, &instr->instr, 4, 0);
-                break;
-        default: {
-                fprintf(stderr, "Unhandled texture op: %d\n", instr->op);
-                assert(0);
-        }
-        }
+   switch (instr->op) {
+   case nir_texop_tex:
+   case nir_texop_txb:
+      emit_texop_native(ctx, instr, midgard_tex_op_normal);
+      break;
+   case nir_texop_txl:
+   case nir_texop_tg4:
+      emit_texop_native(ctx, instr, midgard_tex_op_gradient);
+      break;
+   case nir_texop_txf:
+   case nir_texop_txf_ms:
+      emit_texop_native(ctx, instr, midgard_tex_op_fetch);
+      break;
+   case nir_texop_txs:
+      emit_sysval_read(ctx, &instr->instr, 4, 0);
+      break;
+   default: {
+      fprintf(stderr, "Unhandled texture op: %d\n", instr->op);
+      assert(0);
+   }
+   }
 }
 
 static void
 emit_jump(compiler_context *ctx, nir_jump_instr *instr)
 {
-        switch (instr->type) {
-        case nir_jump_break: {
-                /* Emit a branch out of the loop */
-                struct midgard_instruction br = v_branch(false, false);
-                br.branch.target_type = TARGET_BREAK;
-                br.branch.target_break = ctx->current_loop_depth;
-                emit_mir_instruction(ctx, br);
-                break;
-        }
+   switch (instr->type) {
+   case nir_jump_break: {
+      /* Emit a branch out of the loop */
+      struct midgard_instruction br = v_branch(false, false);
+      br.branch.target_type = TARGET_BREAK;
+      br.branch.target_break = ctx->current_loop_depth;
+      emit_mir_instruction(ctx, br);
+      break;
+   }
 
-        default:
-                unreachable("Unhandled jump");
-        }
+   default:
+      unreachable("Unhandled jump");
+   }
 }
 
 static void
 emit_instr(compiler_context *ctx, struct nir_instr *instr)
 {
-        switch (instr->type) {
-        case nir_instr_type_load_const:
-                emit_load_const(ctx, nir_instr_as_load_const(instr));
-                break;
+   switch (instr->type) {
+   case nir_instr_type_load_const:
+      emit_load_const(ctx, nir_instr_as_load_const(instr));
+      break;
 
-        case nir_instr_type_intrinsic:
-                emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
-                break;
+   case nir_instr_type_intrinsic:
+      emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
+      break;
 
-        case nir_instr_type_alu:
-                emit_alu(ctx, nir_instr_as_alu(instr));
-                break;
+   case nir_instr_type_alu:
+      emit_alu(ctx, nir_instr_as_alu(instr));
+      break;
 
-        case nir_instr_type_tex:
-                emit_tex(ctx, nir_instr_as_tex(instr));
-                break;
+   case nir_instr_type_tex:
+      emit_tex(ctx, nir_instr_as_tex(instr));
+      break;
 
-        case nir_instr_type_jump:
-                emit_jump(ctx, nir_instr_as_jump(instr));
-                break;
+   case nir_instr_type_jump:
+      emit_jump(ctx, nir_instr_as_jump(instr));
+      break;
 
-        case nir_instr_type_ssa_undef:
-                /* Spurious */
-                break;
+   case nir_instr_type_ssa_undef:
+      /* Spurious */
+      break;
 
-        default:
-                DBG("Unhandled instruction type\n");
-                break;
-        }
+   default:
+      DBG("Unhandled instruction type\n");
+      break;
+   }
 }
 
-
 /* ALU instructions can inline or embed constants, which decreases register
  * pressure and saves space. */
 
-#define CONDITIONAL_ATTACH(idx) { \
-	void *entry = _mesa_hash_table_u64_search(ctx->ssa_constants, alu->src[idx] + 1); \
-\
-	if (entry) { \
-		attach_constants(ctx, alu, entry, alu->src[idx] + 1); \
-		alu->src[idx] = SSA_FIXED_REGISTER(REGISTER_CONSTANT); \
-	} \
-}
+#define CONDITIONAL_ATTACH(idx)                                                \
+   {                                                                           \
+      void *entry =                                                            \
+         _mesa_hash_table_u64_search(ctx->ssa_constants, alu->src[idx] + 1);   \
+                                                                               \
+      if (entry) {                                                             \
+         attach_constants(ctx, alu, entry, alu->src[idx] + 1);                 \
+         alu->src[idx] = SSA_FIXED_REGISTER(REGISTER_CONSTANT);                \
+      }                                                                        \
+   }
 
 static void
 inline_alu_constants(compiler_context *ctx, midgard_block *block)
 {
-        mir_foreach_instr_in_block(block, alu) {
-                /* Other instructions cannot inline constants */
-                if (alu->type != TAG_ALU_4) continue;
-                if (alu->compact_branch) continue;
+   mir_foreach_instr_in_block(block, alu) {
+      /* Other instructions cannot inline constants */
+      if (alu->type != TAG_ALU_4)
+         continue;
+      if (alu->compact_branch)
+         continue;
 
-                /* If there is already a constant here, we can do nothing */
-                if (alu->has_constants) continue;
+      /* If there is already a constant here, we can do nothing */
+      if (alu->has_constants)
+         continue;
 
-                CONDITIONAL_ATTACH(0);
+      CONDITIONAL_ATTACH(0);
 
-                if (!alu->has_constants) {
-                        CONDITIONAL_ATTACH(1)
-                } else if (!alu->inline_constant) {
-                        /* Corner case: _two_ vec4 constants, for instance with a
-                         * csel. For this case, we can only use a constant
-                         * register for one, we'll have to emit a move for the
-                         * other. */
+      if (!alu->has_constants) {
+         CONDITIONAL_ATTACH(1)
+      } else if (!alu->inline_constant) {
+         /* Corner case: _two_ vec4 constants, for instance with a
+          * csel. For this case, we can only use a constant
+          * register for one, we'll have to emit a move for the
+          * other. */
 
-                        void *entry = _mesa_hash_table_u64_search(ctx->ssa_constants, alu->src[1] + 1);
-                        unsigned scratch = make_compiler_temp(ctx);
+         void *entry =
+            _mesa_hash_table_u64_search(ctx->ssa_constants, alu->src[1] + 1);
+         unsigned scratch = make_compiler_temp(ctx);
 
-                        if (entry) {
-                                midgard_instruction ins = v_mov(SSA_FIXED_REGISTER(REGISTER_CONSTANT), scratch);
-                                attach_constants(ctx, &ins, entry, alu->src[1] + 1);
+         if (entry) {
+            midgard_instruction ins =
+               v_mov(SSA_FIXED_REGISTER(REGISTER_CONSTANT), scratch);
+            attach_constants(ctx, &ins, entry, alu->src[1] + 1);
 
-                                /* Set the source */
-                                alu->src[1] = scratch;
+            /* Set the source */
+            alu->src[1] = scratch;
 
-                                /* Inject us -before- the last instruction which set r31 */
-                                mir_insert_instruction_before(ctx, mir_prev_op(alu), ins);
-                        }
-                }
-        }
+            /* Inject us -before- the last instruction which set r31 */
+            mir_insert_instruction_before(ctx, mir_prev_op(alu), ins);
+         }
+      }
+   }
 }
 
 unsigned
 max_bitsize_for_alu(midgard_instruction *ins)
 {
-        unsigned max_bitsize = 0;
-        for (int i = 0; i < MIR_SRC_COUNT; i++) {
-                if (ins->src[i] == ~0) continue;
-                unsigned src_bitsize = nir_alu_type_get_type_size(ins->src_types[i]);
-                max_bitsize = MAX2(src_bitsize, max_bitsize);
-        }
-        unsigned dst_bitsize = nir_alu_type_get_type_size(ins->dest_type);
-        max_bitsize = MAX2(dst_bitsize, max_bitsize);
+   unsigned max_bitsize = 0;
+   for (int i = 0; i < MIR_SRC_COUNT; i++) {
+      if (ins->src[i] == ~0)
+         continue;
+      unsigned src_bitsize = nir_alu_type_get_type_size(ins->src_types[i]);
+      max_bitsize = MAX2(src_bitsize, max_bitsize);
+   }
+   unsigned dst_bitsize = nir_alu_type_get_type_size(ins->dest_type);
+   max_bitsize = MAX2(dst_bitsize, max_bitsize);
 
-        /* We emulate 8-bit as 16-bit for simplicity of packing */
-        max_bitsize = MAX2(max_bitsize, 16);
+   /* We emulate 8-bit as 16-bit for simplicity of packing */
+   max_bitsize = MAX2(max_bitsize, 16);
 
-        /* We don't have fp16 LUTs, so we'll want to emit code like:
-         *
-         *      vlut.fsinr hr0, hr0
-         *
-         * where both input and output are 16-bit but the operation is carried
-         * out in 32-bit
-         */
+   /* We don't have fp16 LUTs, so we'll want to emit code like:
+    *
+    *      vlut.fsinr hr0, hr0
+    *
+    * where both input and output are 16-bit but the operation is carried
+    * out in 32-bit
+    */
 
-        switch (ins->op) {
-        case midgard_alu_op_fsqrt:
-        case midgard_alu_op_frcp:
-        case midgard_alu_op_frsqrt:
-        case midgard_alu_op_fsinpi:
-        case midgard_alu_op_fcospi:
-        case midgard_alu_op_fexp2:
-        case midgard_alu_op_flog2:
-                max_bitsize = MAX2(max_bitsize, 32);
-                break;
+   switch (ins->op) {
+   case midgard_alu_op_fsqrt:
+   case midgard_alu_op_frcp:
+   case midgard_alu_op_frsqrt:
+   case midgard_alu_op_fsinpi:
+   case midgard_alu_op_fcospi:
+   case midgard_alu_op_fexp2:
+   case midgard_alu_op_flog2:
+      max_bitsize = MAX2(max_bitsize, 32);
+      break;
 
-        default:
-                break;
-        }
+   default:
+      break;
+   }
 
-        /* High implies computing at a higher bitsize, e.g umul_high of 32-bit
-         * requires computing at 64-bit */
-        if (midgard_is_integer_out_op(ins->op) && ins->outmod == midgard_outmod_keephi) {
-                max_bitsize *= 2;
-                assert(max_bitsize <= 64);
-        }
+   /* High implies computing at a higher bitsize, e.g umul_high of 32-bit
+    * requires computing at 64-bit */
+   if (midgard_is_integer_out_op(ins->op) &&
+       ins->outmod == midgard_outmod_keephi) {
+      max_bitsize *= 2;
+      assert(max_bitsize <= 64);
+   }
 
-        return max_bitsize;
+   return max_bitsize;
 }
 
 midgard_reg_mode
 reg_mode_for_bitsize(unsigned bitsize)
 {
-        switch (bitsize) {
-                /* use 16 pipe for 8 since we don't support vec16 yet */
-        case 8:
-        case 16:
-                return midgard_reg_mode_16;
-        case 32:
-                return midgard_reg_mode_32;
-        case 64:
-                return midgard_reg_mode_64;
-        default:
-                unreachable("invalid bit size");
-        }
+   switch (bitsize) {
+      /* use 16 pipe for 8 since we don't support vec16 yet */
+   case 8:
+   case 16:
+      return midgard_reg_mode_16;
+   case 32:
+      return midgard_reg_mode_32;
+   case 64:
+      return midgard_reg_mode_64;
+   default:
+      unreachable("invalid bit size");
+   }
 }
 
 /* Midgard supports two types of constants, embedded constants (128-bit) and
@@ -2710,102 +2727,103 @@ reg_mode_for_bitsize(unsigned bitsize)
 static void
 embedded_to_inline_constant(compiler_context *ctx, midgard_block *block)
 {
-        mir_foreach_instr_in_block(block, ins) {
-                if (!ins->has_constants) continue;
-                if (ins->has_inline_constant) continue;
+   mir_foreach_instr_in_block(block, ins) {
+      if (!ins->has_constants)
+         continue;
+      if (ins->has_inline_constant)
+         continue;
 
-                unsigned max_bitsize = max_bitsize_for_alu(ins);
+      unsigned max_bitsize = max_bitsize_for_alu(ins);
 
-                /* We can inline 32-bit (sometimes) or 16-bit (usually) */
-                bool is_16 = max_bitsize == 16;
-                bool is_32 = max_bitsize == 32;
+      /* We can inline 32-bit (sometimes) or 16-bit (usually) */
+      bool is_16 = max_bitsize == 16;
+      bool is_32 = max_bitsize == 32;
 
-                if (!(is_16 || is_32))
-                        continue;
+      if (!(is_16 || is_32))
+         continue;
 
-                /* src1 cannot be an inline constant due to encoding
-                 * restrictions. So, if possible we try to flip the arguments
-                 * in that case */
+      /* src1 cannot be an inline constant due to encoding
+       * restrictions. So, if possible we try to flip the arguments
+       * in that case */
 
-                int op = ins->op;
+      int op = ins->op;
 
-                if (ins->src[0] == SSA_FIXED_REGISTER(REGISTER_CONSTANT) &&
-                                alu_opcode_props[op].props & OP_COMMUTES) {
-                        mir_flip(ins);
-                }
+      if (ins->src[0] == SSA_FIXED_REGISTER(REGISTER_CONSTANT) &&
+          alu_opcode_props[op].props & OP_COMMUTES) {
+         mir_flip(ins);
+      }
 
-                if (ins->src[1] == SSA_FIXED_REGISTER(REGISTER_CONSTANT)) {
-                        /* Component is from the swizzle. Take a nonzero component */
-                        assert(ins->mask);
-                        unsigned first_comp = ffs(ins->mask) - 1;
-                        unsigned component = ins->swizzle[1][first_comp];
+      if (ins->src[1] == SSA_FIXED_REGISTER(REGISTER_CONSTANT)) {
+         /* Component is from the swizzle. Take a nonzero component */
+         assert(ins->mask);
+         unsigned first_comp = ffs(ins->mask) - 1;
+         unsigned component = ins->swizzle[1][first_comp];
 
-                        /* Scale constant appropriately, if we can legally */
-                        int16_t scaled_constant = 0;
+         /* Scale constant appropriately, if we can legally */
+         int16_t scaled_constant = 0;
 
-                        if (is_16) {
-                                scaled_constant = ins->constants.u16[component];
-                        } else if (midgard_is_integer_op(op)) {
-                                scaled_constant = ins->constants.u32[component];
+         if (is_16) {
+            scaled_constant = ins->constants.u16[component];
+         } else if (midgard_is_integer_op(op)) {
+            scaled_constant = ins->constants.u32[component];
 
-                                /* Constant overflow after resize */
-                                if (scaled_constant != ins->constants.u32[component])
-                                        continue;
-                        } else {
-                                float original = ins->constants.f32[component];
-                                scaled_constant = _mesa_float_to_half(original);
+            /* Constant overflow after resize */
+            if (scaled_constant != ins->constants.u32[component])
+               continue;
+         } else {
+            float original = ins->constants.f32[component];
+            scaled_constant = _mesa_float_to_half(original);
 
-                                /* Check for loss of precision. If this is
-                                 * mediump, we don't care, but for a highp
-                                 * shader, we need to pay attention. NIR
-                                 * doesn't yet tell us which mode we're in!
-                                 * Practically this prevents most constants
-                                 * from being inlined, sadly. */
+            /* Check for loss of precision. If this is
+             * mediump, we don't care, but for a highp
+             * shader, we need to pay attention. NIR
+             * doesn't yet tell us which mode we're in!
+             * Practically this prevents most constants
+             * from being inlined, sadly. */
 
-                                float fp32 = _mesa_half_to_float(scaled_constant);
+            float fp32 = _mesa_half_to_float(scaled_constant);
 
-                                if (fp32 != original)
-                                        continue;
-                        }
+            if (fp32 != original)
+               continue;
+         }
 
-                        /* Should've been const folded */
-                        if (ins->src_abs[1] || ins->src_neg[1])
-                                continue;
+         /* Should've been const folded */
+         if (ins->src_abs[1] || ins->src_neg[1])
+            continue;
 
-                        /* Make sure that the constant is not itself a vector
-                         * by checking if all accessed values are the same. */
+         /* Make sure that the constant is not itself a vector
+          * by checking if all accessed values are the same. */
 
-                        const midgard_constants *cons = &ins->constants;
-                        uint32_t value = is_16 ? cons->u16[component] : cons->u32[component];
+         const midgard_constants *cons = &ins->constants;
+         uint32_t value = is_16 ? cons->u16[component] : cons->u32[component];
 
-                        bool is_vector = false;
-                        unsigned mask = effective_writemask(ins->op, ins->mask);
+         bool is_vector = false;
+         unsigned mask = effective_writemask(ins->op, ins->mask);
 
-                        for (unsigned c = 0; c < MIR_VEC_COMPONENTS; ++c) {
-                                /* We only care if this component is actually used */
-                                if (!(mask & (1 << c)))
-                                        continue;
+         for (unsigned c = 0; c < MIR_VEC_COMPONENTS; ++c) {
+            /* We only care if this component is actually used */
+            if (!(mask & (1 << c)))
+               continue;
 
-                                uint32_t test = is_16 ?
-                                                cons->u16[ins->swizzle[1][c]] :
-                                                cons->u32[ins->swizzle[1][c]];
+            uint32_t test = is_16 ? cons->u16[ins->swizzle[1][c]]
+                                  : cons->u32[ins->swizzle[1][c]];
 
-                                if (test != value) {
-                                        is_vector = true;
-                                        break;
-                                }
-                        }
+            if (test != value) {
+               is_vector = true;
+               break;
+            }
+         }
 
-                        if (is_vector)
-                                continue;
+         if (is_vector)
+            continue;
 
-                        /* Get rid of the embedded constant */
-                        ins->has_constants = false;
-                        ins->src[1] = ~0;
-                        ins->has_inline_constant = true;
-                        ins->inline_constant = scaled_constant;
-                }
-        }
+         /* Get rid of the embedded constant */
+         ins->has_constants = false;
+         ins->src[1] = ~0;
+         ins->has_inline_constant = true;
+         ins->inline_constant = scaled_constant;
+      }
+   }
 }
 
 /* Dead code elimination for branches at the end of a block - only one branch
@@ -2814,16 +2832,17 @@ embedded_to_inline_constant(compiler_context *ctx, midgard_block *block)
 static void
 midgard_cull_dead_branch(compiler_context *ctx, midgard_block *block)
 {
-        bool branched = false;
+   bool branched = false;
 
-        mir_foreach_instr_in_block_safe(block, ins) {
-                if (!midgard_is_branch_unit(ins->unit)) continue;
+   mir_foreach_instr_in_block_safe(block, ins) {
+      if (!midgard_is_branch_unit(ins->unit))
+         continue;
 
-                if (branched)
-                        mir_remove_instruction(ins);
+      if (branched)
+         mir_remove_instruction(ins);
 
-                branched = true;
-        }
+      branched = true;
+   }
 }
 
 /* We want to force the invert on AND/OR to the second slot to legalize into
@@ -2836,227 +2855,235 @@ midgard_cull_dead_branch(compiler_context *ctx, midgard_block *block)
 static void
 midgard_legalize_invert(compiler_context *ctx, midgard_block *block)
 {
-        mir_foreach_instr_in_block(block, ins) {
-                if (ins->type != TAG_ALU_4) continue;
+   mir_foreach_instr_in_block(block, ins) {
+      if (ins->type != TAG_ALU_4)
+         continue;
 
-                if (ins->op != midgard_alu_op_iand &&
-                    ins->op != midgard_alu_op_ior) continue;
+      if (ins->op != midgard_alu_op_iand && ins->op != midgard_alu_op_ior)
+         continue;
 
-                if (ins->src_invert[1] || !ins->src_invert[0]) continue;
+      if (ins->src_invert[1] || !ins->src_invert[0])
+         continue;
 
-                if (ins->has_inline_constant) {
-                        /* ~(#~a) = ~(~#a) = a, so valid, and forces both
-                         * inverts on */
-                        ins->inline_constant = ~ins->inline_constant;
-                        ins->src_invert[1] = true;
-                } else {
-                        /* Flip to the right invert order. Note
-                         * has_inline_constant false by assumption on the
-                         * branch, so flipping makes sense. */
-                        mir_flip(ins);
-                }
-        }
+      if (ins->has_inline_constant) {
+         /* ~(#~a) = ~(~#a) = a, so valid, and forces both
+          * inverts on */
+         ins->inline_constant = ~ins->inline_constant;
+         ins->src_invert[1] = true;
+      } else {
+         /* Flip to the right invert order. Note
+          * has_inline_constant false by assumption on the
+          * branch, so flipping makes sense. */
+         mir_flip(ins);
+      }
+   }
 }
 
 static unsigned
 emit_fragment_epilogue(compiler_context *ctx, unsigned rt, unsigned sample_iter)
 {
-        /* Loop to ourselves */
-        midgard_instruction *br = ctx->writeout_branch[rt][sample_iter];
-        struct midgard_instruction ins = v_branch(false, false);
-        ins.writeout = br->writeout;
-        ins.branch.target_block = ctx->block_count - 1;
-        ins.constants.u32[0] = br->constants.u32[0];
-        memcpy(&ins.src_types, &br->src_types, sizeof(ins.src_types));
-        emit_mir_instruction(ctx, ins);
+   /* Loop to ourselves */
+   midgard_instruction *br = ctx->writeout_branch[rt][sample_iter];
+   struct midgard_instruction ins = v_branch(false, false);
+   ins.writeout = br->writeout;
+   ins.branch.target_block = ctx->block_count - 1;
+   ins.constants.u32[0] = br->constants.u32[0];
+   memcpy(&ins.src_types, &br->src_types, sizeof(ins.src_types));
+   emit_mir_instruction(ctx, ins);
 
-        ctx->current_block->epilogue = true;
-        schedule_barrier(ctx);
-        return ins.branch.target_block;
+   ctx->current_block->epilogue = true;
+   schedule_barrier(ctx);
+   return ins.branch.target_block;
 }
 
 static midgard_block *
 emit_block_init(compiler_context *ctx)
 {
-        midgard_block *this_block = ctx->after_block;
-        ctx->after_block = NULL;
+   midgard_block *this_block = ctx->after_block;
+   ctx->after_block = NULL;
 
-        if (!this_block)
-                this_block = create_empty_block(ctx);
+   if (!this_block)
+      this_block = create_empty_block(ctx);
 
-        list_addtail(&this_block->base.link, &ctx->blocks);
+   list_addtail(&this_block->base.link, &ctx->blocks);
 
-        this_block->scheduled = false;
-        ++ctx->block_count;
+   this_block->scheduled = false;
+   ++ctx->block_count;
 
-        /* Set up current block */
-        list_inithead(&this_block->base.instructions);
-        ctx->current_block = this_block;
+   /* Set up current block */
+   list_inithead(&this_block->base.instructions);
+   ctx->current_block = this_block;
 
-        return this_block;
+   return this_block;
 }
 
 static midgard_block *
 emit_block(compiler_context *ctx, nir_block *block)
 {
-        midgard_block *this_block = emit_block_init(ctx);
+   midgard_block *this_block = emit_block_init(ctx);
 
-        nir_foreach_instr(instr, block) {
-                emit_instr(ctx, instr);
-                ++ctx->instruction_count;
-        }
+   nir_foreach_instr(instr, block) {
+      emit_instr(ctx, instr);
+      ++ctx->instruction_count;
+   }
 
-        return this_block;
+   return this_block;
 }
 
-static midgard_block *emit_cf_list(struct compiler_context *ctx, struct exec_list *list);
+static midgard_block *emit_cf_list(struct compiler_context *ctx,
+                                   struct exec_list *list);
 
 static void
 emit_if(struct compiler_context *ctx, nir_if *nif)
 {
-        midgard_block *before_block = ctx->current_block;
+   midgard_block *before_block = ctx->current_block;
 
-        /* Speculatively emit the branch, but we can't fill it in until later */
-        bool inv = false;
-        EMIT(branch, true, true);
-        midgard_instruction *then_branch = mir_last_in_block(ctx->current_block);
-        then_branch->src[0] = mir_get_branch_cond(&nif->condition, &inv);
-        then_branch->src_types[0] = nir_type_uint32;
-        then_branch->branch.invert_conditional = !inv;
+   /* Speculatively emit the branch, but we can't fill it in until later */
+   bool inv = false;
+   EMIT(branch, true, true);
+   midgard_instruction *then_branch = mir_last_in_block(ctx->current_block);
+   then_branch->src[0] = mir_get_branch_cond(&nif->condition, &inv);
+   then_branch->src_types[0] = nir_type_uint32;
+   then_branch->branch.invert_conditional = !inv;
 
-        /* Emit the two subblocks. */
-        midgard_block *then_block = emit_cf_list(ctx, &nif->then_list);
-        midgard_block *end_then_block = ctx->current_block;
+   /* Emit the two subblocks. */
+   midgard_block *then_block = emit_cf_list(ctx, &nif->then_list);
+   midgard_block *end_then_block = ctx->current_block;
 
-        /* Emit a jump from the end of the then block to the end of the else */
-        EMIT(branch, false, false);
-        midgard_instruction *then_exit = mir_last_in_block(ctx->current_block);
+   /* Emit a jump from the end of the then block to the end of the else */
+   EMIT(branch, false, false);
+   midgard_instruction *then_exit = mir_last_in_block(ctx->current_block);
 
-        /* Emit second block, and check if it's empty */
+   /* Emit second block, and check if it's empty */
 
-        int else_idx = ctx->block_count;
-        int count_in = ctx->instruction_count;
-        midgard_block *else_block = emit_cf_list(ctx, &nif->else_list);
-        midgard_block *end_else_block = ctx->current_block;
-        int after_else_idx = ctx->block_count;
+   int else_idx = ctx->block_count;
+   int count_in = ctx->instruction_count;
+   midgard_block *else_block = emit_cf_list(ctx, &nif->else_list);
+   midgard_block *end_else_block = ctx->current_block;
+   int after_else_idx = ctx->block_count;
 
-        /* Now that we have the subblocks emitted, fix up the branches */
+   /* Now that we have the subblocks emitted, fix up the branches */
 
-        assert(then_block);
-        assert(else_block);
+   assert(then_block);
+   assert(else_block);
 
-        if (ctx->instruction_count == count_in) {
-                /* The else block is empty, so don't emit an exit jump */
-                mir_remove_instruction(then_exit);
-                then_branch->branch.target_block = after_else_idx;
-        } else {
-                then_branch->branch.target_block = else_idx;
-                then_exit->branch.target_block = after_else_idx;
-        }
+   if (ctx->instruction_count == count_in) {
+      /* The else block is empty, so don't emit an exit jump */
+      mir_remove_instruction(then_exit);
+      then_branch->branch.target_block = after_else_idx;
+   } else {
+      then_branch->branch.target_block = else_idx;
+      then_exit->branch.target_block = after_else_idx;
+   }
 
-        /* Wire up the successors */
+   /* Wire up the successors */
 
-        ctx->after_block = create_empty_block(ctx);
+   ctx->after_block = create_empty_block(ctx);
 
-        pan_block_add_successor(&before_block->base, &then_block->base);
-        pan_block_add_successor(&before_block->base, &else_block->base);
+   pan_block_add_successor(&before_block->base, &then_block->base);
+   pan_block_add_successor(&before_block->base, &else_block->base);
 
-        pan_block_add_successor(&end_then_block->base, &ctx->after_block->base);
-        pan_block_add_successor(&end_else_block->base, &ctx->after_block->base);
+   pan_block_add_successor(&end_then_block->base, &ctx->after_block->base);
+   pan_block_add_successor(&end_else_block->base, &ctx->after_block->base);
 }
 
 static void
 emit_loop(struct compiler_context *ctx, nir_loop *nloop)
 {
-        /* Remember where we are */
-        midgard_block *start_block = ctx->current_block;
+   /* Remember where we are */
+   midgard_block *start_block = ctx->current_block;
 
-        /* Allocate a loop number, growing the current inner loop depth */
-        int loop_idx = ++ctx->current_loop_depth;
+   /* Allocate a loop number, growing the current inner loop depth */
+   int loop_idx = ++ctx->current_loop_depth;
 
-        /* Get index from before the body so we can loop back later */
-        int start_idx = ctx->block_count;
+   /* Get index from before the body so we can loop back later */
+   int start_idx = ctx->block_count;
 
-        /* Emit the body itself */
-        midgard_block *loop_block = emit_cf_list(ctx, &nloop->body);
+   /* Emit the body itself */
+   midgard_block *loop_block = emit_cf_list(ctx, &nloop->body);
 
-        /* Branch back to loop back */
-        struct midgard_instruction br_back = v_branch(false, false);
-        br_back.branch.target_block = start_idx;
-        emit_mir_instruction(ctx, br_back);
+   /* Branch back to loop back */
+   struct midgard_instruction br_back = v_branch(false, false);
+   br_back.branch.target_block = start_idx;
+   emit_mir_instruction(ctx, br_back);
 
-        /* Mark down that branch in the graph. */
-        pan_block_add_successor(&start_block->base, &loop_block->base);
-        pan_block_add_successor(&ctx->current_block->base, &loop_block->base);
+   /* Mark down that branch in the graph. */
+   pan_block_add_successor(&start_block->base, &loop_block->base);
+   pan_block_add_successor(&ctx->current_block->base, &loop_block->base);
 
-        /* Find the index of the block about to follow us (note: we don't add
-         * one; blocks are 0-indexed so we get a fencepost problem) */
-        int break_block_idx = ctx->block_count;
+   /* Find the index of the block about to follow us (note: we don't add
+    * one; blocks are 0-indexed so we get a fencepost problem) */
+   int break_block_idx = ctx->block_count;
 
-        /* Fix up the break statements we emitted to point to the right place,
-         * now that we can allocate a block number for them */
-        ctx->after_block = create_empty_block(ctx);
+   /* Fix up the break statements we emitted to point to the right place,
+    * now that we can allocate a block number for them */
+   ctx->after_block = create_empty_block(ctx);
 
-        mir_foreach_block_from(ctx, start_block, _block) {
-                mir_foreach_instr_in_block(((midgard_block *) _block), ins) {
-                        if (ins->type != TAG_ALU_4) continue;
-                        if (!ins->compact_branch) continue;
+   mir_foreach_block_from(ctx, start_block, _block) {
+      mir_foreach_instr_in_block(((midgard_block *)_block), ins) {
+         if (ins->type != TAG_ALU_4)
+            continue;
+         if (!ins->compact_branch)
+            continue;
 
-                        /* We found a branch -- check the type to see if we need to do anything */
-                        if (ins->branch.target_type != TARGET_BREAK) continue;
+         /* We found a branch -- check the type to see if we need to do anything
+          */
+         if (ins->branch.target_type != TARGET_BREAK)
+            continue;
 
-                        /* It's a break! Check if it's our break */
-                        if (ins->branch.target_break != loop_idx) continue;
+         /* It's a break! Check if it's our break */
+         if (ins->branch.target_break != loop_idx)
+            continue;
 
-                        /* Okay, cool, we're breaking out of this loop.
-                         * Rewrite from a break to a goto */
+         /* Okay, cool, we're breaking out of this loop.
+          * Rewrite from a break to a goto */
 
-                        ins->branch.target_type = TARGET_GOTO;
-                        ins->branch.target_block = break_block_idx;
+         ins->branch.target_type = TARGET_GOTO;
+         ins->branch.target_block = break_block_idx;
 
-                        pan_block_add_successor(_block, &ctx->after_block->base);
-                }
-        }
+         pan_block_add_successor(_block, &ctx->after_block->base);
+      }
+   }
 
-        /* Now that we've finished emitting the loop, free up the depth again
-         * so we play nice with recursion amid nested loops */
-        --ctx->current_loop_depth;
+   /* Now that we've finished emitting the loop, free up the depth again
+    * so we play nice with recursion amid nested loops */
+   --ctx->current_loop_depth;
 
-        /* Dump loop stats */
-        ++ctx->loop_count;
+   /* Dump loop stats */
+   ++ctx->loop_count;
 }
 
 static midgard_block *
 emit_cf_list(struct compiler_context *ctx, struct exec_list *list)
 {
-        midgard_block *start_block = NULL;
+   midgard_block *start_block = NULL;
 
-        foreach_list_typed(nir_cf_node, node, node, list) {
-                switch (node->type) {
-                case nir_cf_node_block: {
-                        midgard_block *block = emit_block(ctx, nir_cf_node_as_block(node));
+   foreach_list_typed(nir_cf_node, node, node, list) {
+      switch (node->type) {
+      case nir_cf_node_block: {
+         midgard_block *block = emit_block(ctx, nir_cf_node_as_block(node));
 
-                        if (!start_block)
-                                start_block = block;
+         if (!start_block)
+            start_block = block;
 
-                        break;
-                }
+         break;
+      }
 
-                case nir_cf_node_if:
-                        emit_if(ctx, nir_cf_node_as_if(node));
-                        break;
+      case nir_cf_node_if:
+         emit_if(ctx, nir_cf_node_as_if(node));
+         break;
 
-                case nir_cf_node_loop:
-                        emit_loop(ctx, nir_cf_node_as_loop(node));
-                        break;
+      case nir_cf_node_loop:
+         emit_loop(ctx, nir_cf_node_as_loop(node));
+         break;
 
-                case nir_cf_node_function:
-                        assert(0);
-                        break;
-                }
-        }
+      case nir_cf_node_function:
+         assert(0);
+         break;
+      }
+   }
 
-        return start_block;
+   return start_block;
 }
 
 /* Due to lookahead, we need to report the first tag executed in the command
@@ -3066,22 +3093,22 @@ emit_cf_list(struct compiler_context *ctx, struct exec_list *list)
 unsigned
 midgard_get_first_tag_from_block(compiler_context *ctx, unsigned block_idx)
 {
-        midgard_block *initial_block = mir_get_block(ctx, block_idx);
+   midgard_block *initial_block = mir_get_block(ctx, block_idx);
 
-        mir_foreach_block_from(ctx, initial_block, _v) {
-                midgard_block *v = (midgard_block *) _v;
-                if (v->quadword_count) {
-                        midgard_bundle *initial_bundle =
-                                util_dynarray_element(&v->bundles, midgard_bundle, 0);
+   mir_foreach_block_from(ctx, initial_block, _v) {
+      midgard_block *v = (midgard_block *)_v;
+      if (v->quadword_count) {
+         midgard_bundle *initial_bundle =
+            util_dynarray_element(&v->bundles, midgard_bundle, 0);
 
-                        return initial_bundle->tag;
-                }
-        }
+         return initial_bundle->tag;
+      }
+   }
 
-        /* Default to a tag 1 which will break from the shader, in case we jump
-         * to the exit block (i.e. `return` in a compute shader) */
+   /* Default to a tag 1 which will break from the shader, in case we jump
+    * to the exit block (i.e. `return` in a compute shader) */
 
-        return 1;
+   return 1;
 }
 
 /* For each fragment writeout instruction, generate a writeout loop to
@@ -3090,41 +3117,42 @@ midgard_get_first_tag_from_block(compiler_context *ctx, unsigned block_idx)
 static void
 mir_add_writeout_loops(compiler_context *ctx)
 {
-        for (unsigned rt = 0; rt < ARRAY_SIZE(ctx->writeout_branch); ++rt) {
-                for (unsigned s = 0; s < MIDGARD_MAX_SAMPLE_ITER; ++s) {
-                        midgard_instruction *br = ctx->writeout_branch[rt][s];
-                        if (!br) continue;
+   for (unsigned rt = 0; rt < ARRAY_SIZE(ctx->writeout_branch); ++rt) {
+      for (unsigned s = 0; s < MIDGARD_MAX_SAMPLE_ITER; ++s) {
+         midgard_instruction *br = ctx->writeout_branch[rt][s];
+         if (!br)
+            continue;
 
-                        unsigned popped = br->branch.target_block;
-                        pan_block_add_successor(&(mir_get_block(ctx, popped - 1)->base),
-                                                &ctx->current_block->base);
-                        br->branch.target_block = emit_fragment_epilogue(ctx, rt, s);
-                        br->branch.target_type = TARGET_GOTO;
+         unsigned popped = br->branch.target_block;
+         pan_block_add_successor(&(mir_get_block(ctx, popped - 1)->base),
+                                 &ctx->current_block->base);
+         br->branch.target_block = emit_fragment_epilogue(ctx, rt, s);
+         br->branch.target_type = TARGET_GOTO;
 
-                        /* If we have more RTs, we'll need to restore back after our
-                         * loop terminates */
-                        midgard_instruction *next_br = NULL;
+         /* If we have more RTs, we'll need to restore back after our
+          * loop terminates */
+         midgard_instruction *next_br = NULL;
 
-                        if ((s + 1) < MIDGARD_MAX_SAMPLE_ITER)
-                                next_br = ctx->writeout_branch[rt][s + 1];
+         if ((s + 1) < MIDGARD_MAX_SAMPLE_ITER)
+            next_br = ctx->writeout_branch[rt][s + 1];
 
-                        if (!next_br && (rt + 1) < ARRAY_SIZE(ctx->writeout_branch))
-			        next_br = ctx->writeout_branch[rt + 1][0];
+         if (!next_br && (rt + 1) < ARRAY_SIZE(ctx->writeout_branch))
+            next_br = ctx->writeout_branch[rt + 1][0];
 
-                        if (next_br) {
-                                midgard_instruction uncond = v_branch(false, false);
-                                uncond.branch.target_block = popped;
-                                uncond.branch.target_type = TARGET_GOTO;
-                                emit_mir_instruction(ctx, uncond);
-                                pan_block_add_successor(&ctx->current_block->base,
-                                                        &(mir_get_block(ctx, popped)->base));
-                                schedule_barrier(ctx);
-                        } else {
-                                /* We're last, so we can terminate here */
-                                br->last_writeout = true;
-                        }
-                }
-        }
+         if (next_br) {
+            midgard_instruction uncond = v_branch(false, false);
+            uncond.branch.target_block = popped;
+            uncond.branch.target_type = TARGET_GOTO;
+            emit_mir_instruction(ctx, uncond);
+            pan_block_add_successor(&ctx->current_block->base,
+                                    &(mir_get_block(ctx, popped)->base));
+            schedule_barrier(ctx);
+         } else {
+            /* We're last, so we can terminate here */
+            br->last_writeout = true;
+         }
+      }
+   }
 }
 
 void
@@ -3133,281 +3161,279 @@ midgard_compile_shader_nir(nir_shader *nir,
                            struct util_dynarray *binary,
                            struct pan_shader_info *info)
 {
-        midgard_debug = debug_get_option_midgard_debug();
+   midgard_debug = debug_get_option_midgard_debug();
 
-        /* TODO: Bound against what? */
-        compiler_context *ctx = rzalloc(NULL, compiler_context);
-        ctx->sysval_to_id = panfrost_init_sysvals(&info->sysvals,
-                                                  inputs->fixed_sysval_layout,
-                                                  ctx);
+   /* TODO: Bound against what? */
+   compiler_context *ctx = rzalloc(NULL, compiler_context);
+   ctx->sysval_to_id =
+      panfrost_init_sysvals(&info->sysvals, inputs->fixed_sysval_layout, ctx);
 
-        ctx->inputs = inputs;
-        ctx->nir = nir;
-        ctx->info = info;
-        ctx->stage = nir->info.stage;
+   ctx->inputs = inputs;
+   ctx->nir = nir;
+   ctx->info = info;
+   ctx->stage = nir->info.stage;
 
-        if (inputs->is_blend) {
-                unsigned nr_samples = MAX2(inputs->blend.nr_samples, 1);
-                const struct util_format_description *desc =
-                        util_format_description(inputs->rt_formats[inputs->blend.rt]);
+   if (inputs->is_blend) {
+      unsigned nr_samples = MAX2(inputs->blend.nr_samples, 1);
+      const struct util_format_description *desc =
+         util_format_description(inputs->rt_formats[inputs->blend.rt]);
 
-                /* We have to split writeout in 128 bit chunks */
-                ctx->blend_sample_iterations =
-                        DIV_ROUND_UP(desc->block.bits * nr_samples, 128);
-        }
-        ctx->blend_input = ~0;
-        ctx->blend_src1 = ~0;
-        ctx->quirks = midgard_get_quirks(inputs->gpu_id);
+      /* We have to split writeout in 128 bit chunks */
+      ctx->blend_sample_iterations =
+         DIV_ROUND_UP(desc->block.bits * nr_samples, 128);
+   }
+   ctx->blend_input = ~0;
+   ctx->blend_src1 = ~0;
+   ctx->quirks = midgard_get_quirks(inputs->gpu_id);
 
-        /* Initialize at a global (not block) level hash tables */
+   /* Initialize at a global (not block) level hash tables */
 
-        ctx->ssa_constants = _mesa_hash_table_u64_create(ctx);
+   ctx->ssa_constants = _mesa_hash_table_u64_create(ctx);
 
-        /* Lower gl_Position pre-optimisation, but after lowering vars to ssa
-         * (so we don't accidentally duplicate the epilogue since mesa/st has
-         * messed with our I/O quite a bit already) */
+   /* Lower gl_Position pre-optimisation, but after lowering vars to ssa
+    * (so we don't accidentally duplicate the epilogue since mesa/st has
+    * messed with our I/O quite a bit already) */
 
-        NIR_PASS_V(nir, nir_lower_vars_to_ssa);
+   NIR_PASS_V(nir, nir_lower_vars_to_ssa);
 
-        if (ctx->stage == MESA_SHADER_VERTEX) {
-                NIR_PASS_V(nir, nir_lower_viewport_transform);
-                NIR_PASS_V(nir, nir_lower_point_size, 1.0, 0.0);
-        }
+   if (ctx->stage == MESA_SHADER_VERTEX) {
+      NIR_PASS_V(nir, nir_lower_viewport_transform);
+      NIR_PASS_V(nir, nir_lower_point_size, 1.0, 0.0);
+   }
 
-        NIR_PASS_V(nir, nir_lower_var_copies);
-        NIR_PASS_V(nir, nir_lower_vars_to_ssa);
-        NIR_PASS_V(nir, nir_split_var_copies);
-        NIR_PASS_V(nir, nir_lower_var_copies);
-        NIR_PASS_V(nir, nir_lower_global_vars_to_local);
-        NIR_PASS_V(nir, nir_lower_var_copies);
-        NIR_PASS_V(nir, nir_lower_vars_to_ssa);
+   NIR_PASS_V(nir, nir_lower_var_copies);
+   NIR_PASS_V(nir, nir_lower_vars_to_ssa);
+   NIR_PASS_V(nir, nir_split_var_copies);
+   NIR_PASS_V(nir, nir_lower_var_copies);
+   NIR_PASS_V(nir, nir_lower_global_vars_to_local);
+   NIR_PASS_V(nir, nir_lower_var_copies);
+   NIR_PASS_V(nir, nir_lower_vars_to_ssa);
 
-        NIR_PASS_V(nir, pan_lower_framebuffer,
-                   inputs->rt_formats, inputs->raw_fmt_mask,
-                   inputs->is_blend, ctx->quirks & MIDGARD_BROKEN_BLEND_LOADS);
+   NIR_PASS_V(nir, pan_lower_framebuffer, inputs->rt_formats,
+              inputs->raw_fmt_mask, inputs->is_blend,
+              ctx->quirks & MIDGARD_BROKEN_BLEND_LOADS);
 
-        NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
-                        glsl_type_size, 0);
+   NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
+              glsl_type_size, 0);
 
-        if (ctx->stage == MESA_SHADER_VERTEX) {
-                /* nir_lower[_explicit]_io is lazy and emits mul+add chains even
-                 * for offsets it could figure out are constant.  Do some
-                 * constant folding before pan_nir_lower_store_component below.
-                 */
-                NIR_PASS_V(nir, nir_opt_constant_folding);
-                NIR_PASS_V(nir, pan_nir_lower_store_component);
-        }
+   if (ctx->stage == MESA_SHADER_VERTEX) {
+      /* nir_lower[_explicit]_io is lazy and emits mul+add chains even
+       * for offsets it could figure out are constant.  Do some
+       * constant folding before pan_nir_lower_store_component below.
+       */
+      NIR_PASS_V(nir, nir_opt_constant_folding);
+      NIR_PASS_V(nir, pan_nir_lower_store_component);
+   }
 
-        NIR_PASS_V(nir, nir_lower_ssbo);
-        NIR_PASS_V(nir, pan_nir_lower_zs_store);
+   NIR_PASS_V(nir, nir_lower_ssbo);
+   NIR_PASS_V(nir, pan_nir_lower_zs_store);
 
-        NIR_PASS_V(nir, pan_nir_lower_64bit_intrin);
+   NIR_PASS_V(nir, pan_nir_lower_64bit_intrin);
 
-        NIR_PASS_V(nir, midgard_nir_lower_global_load);
+   NIR_PASS_V(nir, midgard_nir_lower_global_load);
 
-        /* Collect varyings after lowering I/O */
-        pan_nir_collect_varyings(nir, info);
+   /* Collect varyings after lowering I/O */
+   pan_nir_collect_varyings(nir, info);
 
-        /* Optimisation passes */
+   /* Optimisation passes */
 
-        optimise_nir(nir, ctx->quirks, inputs->is_blend, inputs->is_blit);
+   optimise_nir(nir, ctx->quirks, inputs->is_blend, inputs->is_blit);
 
-        bool skip_internal = nir->info.internal;
-        skip_internal &= !(midgard_debug & MIDGARD_DBG_INTERNAL);
+   bool skip_internal = nir->info.internal;
+   skip_internal &= !(midgard_debug & MIDGARD_DBG_INTERNAL);
+
+   if (midgard_debug & MIDGARD_DBG_SHADERS && !skip_internal)
+      nir_print_shader(nir, stdout);
+
+   info->tls_size = nir->scratch_size;
 
-        if (midgard_debug & MIDGARD_DBG_SHADERS && !skip_internal)
-                nir_print_shader(nir, stdout);
+   nir_foreach_function(func, nir) {
+      if (!func->impl)
+         continue;
 
-        info->tls_size = nir->scratch_size;
+      list_inithead(&ctx->blocks);
+      ctx->block_count = 0;
+      ctx->func = func;
+      ctx->already_emitted =
+         calloc(BITSET_WORDS(func->impl->ssa_alloc), sizeof(BITSET_WORD));
+
+      if (nir->info.outputs_read && !inputs->is_blend) {
+         emit_block_init(ctx);
 
-        nir_foreach_function(func, nir) {
-                if (!func->impl)
-                        continue;
+         struct midgard_instruction wait = v_branch(false, false);
+         wait.branch.target_type = TARGET_TILEBUF_WAIT;
 
-                list_inithead(&ctx->blocks);
-                ctx->block_count = 0;
-                ctx->func = func;
-                ctx->already_emitted = calloc(BITSET_WORDS(func->impl->ssa_alloc), sizeof(BITSET_WORD));
+         emit_mir_instruction(ctx, wait);
+
+         ++ctx->instruction_count;
+      }
 
-                if (nir->info.outputs_read && !inputs->is_blend) {
-                        emit_block_init(ctx);
+      emit_cf_list(ctx, &func->impl->body);
+      free(ctx->already_emitted);
+      break; /* TODO: Multi-function shaders */
+   }
 
-                        struct midgard_instruction wait = v_branch(false, false);
-                        wait.branch.target_type = TARGET_TILEBUF_WAIT;
+   /* Per-block lowering before opts */
 
-                        emit_mir_instruction(ctx, wait);
+   mir_foreach_block(ctx, _block) {
+      midgard_block *block = (midgard_block *)_block;
+      inline_alu_constants(ctx, block);
+      embedded_to_inline_constant(ctx, block);
+   }
+   /* MIR-level optimizations */
 
-                        ++ctx->instruction_count;
-                }
+   bool progress = false;
 
-                emit_cf_list(ctx, &func->impl->body);
-                free(ctx->already_emitted);
-                break; /* TODO: Multi-function shaders */
-        }
+   do {
+      progress = false;
+      progress |= midgard_opt_dead_code_eliminate(ctx);
 
-        /* Per-block lowering before opts */
+      mir_foreach_block(ctx, _block) {
+         midgard_block *block = (midgard_block *)_block;
+         progress |= midgard_opt_copy_prop(ctx, block);
+         progress |= midgard_opt_combine_projection(ctx, block);
+         progress |= midgard_opt_varying_projection(ctx, block);
+      }
+   } while (progress);
 
-        mir_foreach_block(ctx, _block) {
-                midgard_block *block = (midgard_block *) _block;
-                inline_alu_constants(ctx, block);
-                embedded_to_inline_constant(ctx, block);
-        }
-        /* MIR-level optimizations */
+   mir_foreach_block(ctx, _block) {
+      midgard_block *block = (midgard_block *)_block;
+      midgard_lower_derivatives(ctx, block);
+      midgard_legalize_invert(ctx, block);
+      midgard_cull_dead_branch(ctx, block);
+   }
 
-        bool progress = false;
+   if (ctx->stage == MESA_SHADER_FRAGMENT)
+      mir_add_writeout_loops(ctx);
 
-        do {
-                progress = false;
-                progress |= midgard_opt_dead_code_eliminate(ctx);
+   /* Analyze now that the code is known but before scheduling creates
+    * pipeline registers which are harder to track */
+   mir_analyze_helper_requirements(ctx);
 
-                mir_foreach_block(ctx, _block) {
-                        midgard_block *block = (midgard_block *) _block;
-                        progress |= midgard_opt_copy_prop(ctx, block);
-                        progress |= midgard_opt_combine_projection(ctx, block);
-                        progress |= midgard_opt_varying_projection(ctx, block);
-                }
-        } while (progress);
+   if (midgard_debug & MIDGARD_DBG_SHADERS && !skip_internal)
+      mir_print_shader(ctx);
 
-        mir_foreach_block(ctx, _block) {
-                midgard_block *block = (midgard_block *) _block;
-                midgard_lower_derivatives(ctx, block);
-                midgard_legalize_invert(ctx, block);
-                midgard_cull_dead_branch(ctx, block);
-        }
+   /* Schedule! */
+   midgard_schedule_program(ctx);
+   mir_ra(ctx);
 
-        if (ctx->stage == MESA_SHADER_FRAGMENT)
-                mir_add_writeout_loops(ctx);
+   if (midgard_debug & MIDGARD_DBG_SHADERS && !skip_internal)
+      mir_print_shader(ctx);
 
-        /* Analyze now that the code is known but before scheduling creates
-         * pipeline registers which are harder to track */
-        mir_analyze_helper_requirements(ctx);
+   /* Analyze after scheduling since this is order-dependent */
+   mir_analyze_helper_terminate(ctx);
 
-        if (midgard_debug & MIDGARD_DBG_SHADERS && !skip_internal)
-                mir_print_shader(ctx);
+   /* Emit flat binary from the instruction arrays. Iterate each block in
+    * sequence. Save instruction boundaries such that lookahead tags can
+    * be assigned easily */
 
-        /* Schedule! */
-        midgard_schedule_program(ctx);
-        mir_ra(ctx);
+   /* Cache _all_ bundles in source order for lookahead across failed branches */
 
-        if (midgard_debug & MIDGARD_DBG_SHADERS && !skip_internal)
-                mir_print_shader(ctx);
+   int bundle_count = 0;
+   mir_foreach_block(ctx, _block) {
+      midgard_block *block = (midgard_block *)_block;
+      bundle_count += block->bundles.size / sizeof(midgard_bundle);
+   }
+   midgard_bundle **source_order_bundles =
+      malloc(sizeof(midgard_bundle *) * bundle_count);
+   int bundle_idx = 0;
+   mir_foreach_block(ctx, _block) {
+      midgard_block *block = (midgard_block *)_block;
+      util_dynarray_foreach(&block->bundles, midgard_bundle, bundle) {
+         source_order_bundles[bundle_idx++] = bundle;
+      }
+   }
 
-        /* Analyze after scheduling since this is order-dependent */
-        mir_analyze_helper_terminate(ctx);
+   int current_bundle = 0;
 
-        /* Emit flat binary from the instruction arrays. Iterate each block in
-         * sequence. Save instruction boundaries such that lookahead tags can
-         * be assigned easily */
+   /* Midgard prefetches instruction types, so during emission we
+    * need to lookahead. Unless this is the last instruction, in
+    * which we return 1. */
 
-        /* Cache _all_ bundles in source order for lookahead across failed branches */
+   mir_foreach_block(ctx, _block) {
+      midgard_block *block = (midgard_block *)_block;
+      mir_foreach_bundle_in_block(block, bundle) {
+         int lookahead = 1;
 
-        int bundle_count = 0;
-        mir_foreach_block(ctx, _block) {
-                midgard_block *block = (midgard_block *) _block;
-                bundle_count += block->bundles.size / sizeof(midgard_bundle);
-        }
-        midgard_bundle **source_order_bundles = malloc(sizeof(midgard_bundle *) * bundle_count);
-        int bundle_idx = 0;
-        mir_foreach_block(ctx, _block) {
-                midgard_block *block = (midgard_block *) _block;
-                util_dynarray_foreach(&block->bundles, midgard_bundle, bundle) {
-                        source_order_bundles[bundle_idx++] = bundle;
-                }
-        }
+         if (!bundle->last_writeout && (current_bundle + 1 < bundle_count))
+            lookahead = source_order_bundles[current_bundle + 1]->tag;
 
-        int current_bundle = 0;
+         emit_binary_bundle(ctx, block, bundle, binary, lookahead);
+         ++current_bundle;
+      }
 
-        /* Midgard prefetches instruction types, so during emission we
-         * need to lookahead. Unless this is the last instruction, in
-         * which we return 1. */
+      /* TODO: Free deeper */
+      // util_dynarray_fini(&block->instructions);
+   }
 
-        mir_foreach_block(ctx, _block) {
-                midgard_block *block = (midgard_block *) _block;
-                mir_foreach_bundle_in_block(block, bundle) {
-                        int lookahead = 1;
+   free(source_order_bundles);
 
-                        if (!bundle->last_writeout && (current_bundle + 1 < bundle_count))
-                                lookahead = source_order_bundles[current_bundle + 1]->tag;
+   /* Report the very first tag executed */
+   info->midgard.first_tag = midgard_get_first_tag_from_block(ctx, 0);
 
-                        emit_binary_bundle(ctx, block, bundle, binary, lookahead);
-                        ++current_bundle;
-                }
+   info->ubo_mask = ctx->ubo_mask & ((1 << ctx->nir->info.num_ubos) - 1);
 
-                /* TODO: Free deeper */
-                //util_dynarray_fini(&block->instructions);
-        }
+   if (midgard_debug & MIDGARD_DBG_SHADERS && !skip_internal) {
+      disassemble_midgard(stdout, binary->data, binary->size, inputs->gpu_id,
+                          midgard_debug & MIDGARD_DBG_VERBOSE);
+      fflush(stdout);
+   }
 
-        free(source_order_bundles);
-
-        /* Report the very first tag executed */
-        info->midgard.first_tag = midgard_get_first_tag_from_block(ctx, 0);
-
-        info->ubo_mask = ctx->ubo_mask & ((1 << ctx->nir->info.num_ubos) - 1);
+   /* A shader ending on a 16MB boundary causes INSTR_INVALID_PC faults,
+    * workaround by adding some padding to the end of the shader. (The
+    * kernel makes sure shader BOs can't cross 16MB boundaries.) */
+   if (binary->size)
+      memset(util_dynarray_grow(binary, uint8_t, 16), 0, 16);
+
+   if ((midgard_debug & MIDGARD_DBG_SHADERDB || inputs->debug) &&
+       !nir->info.internal) {
+      unsigned nr_bundles = 0, nr_ins = 0;
+
+      /* Count instructions and bundles */
 
-        if (midgard_debug & MIDGARD_DBG_SHADERS && !skip_internal) {
-                disassemble_midgard(stdout, binary->data,
-                                    binary->size, inputs->gpu_id,
-                                    midgard_debug & MIDGARD_DBG_VERBOSE);
-                fflush(stdout);
-        }
-
-        /* A shader ending on a 16MB boundary causes INSTR_INVALID_PC faults,
-         * workaround by adding some padding to the end of the shader. (The
-         * kernel makes sure shader BOs can't cross 16MB boundaries.) */
-        if (binary->size)
-                memset(util_dynarray_grow(binary, uint8_t, 16), 0, 16);
-
-        if ((midgard_debug & MIDGARD_DBG_SHADERDB || inputs->debug) &&
-            !nir->info.internal) {
-                unsigned nr_bundles = 0, nr_ins = 0;
-
-                /* Count instructions and bundles */
-
-                mir_foreach_block(ctx, _block) {
-                        midgard_block *block = (midgard_block *) _block;
-                        nr_bundles += util_dynarray_num_elements(
-                                              &block->bundles, midgard_bundle);
+      mir_foreach_block(ctx, _block) {
+         midgard_block *block = (midgard_block *)_block;
+         nr_bundles +=
+            util_dynarray_num_elements(&block->bundles, midgard_bundle);
 
-                        mir_foreach_bundle_in_block(block, bun)
-                                nr_ins += bun->instruction_count;
-                }
+         mir_foreach_bundle_in_block(block, bun)
+            nr_ins += bun->instruction_count;
+      }
 
-                /* Calculate thread count. There are certain cutoffs by
-                 * register count for thread count */
+      /* Calculate thread count. There are certain cutoffs by
+       * register count for thread count */
 
-                unsigned nr_registers = info->work_reg_count;
+      unsigned nr_registers = info->work_reg_count;
 
-                unsigned nr_threads =
-                        (nr_registers <= 4) ? 4 :
-                        (nr_registers <= 8) ? 2 :
-                        1;
+      unsigned nr_threads = (nr_registers <= 4)   ? 4
+                            : (nr_registers <= 8) ? 2
+                                                  : 1;
 
-                char *shaderdb = NULL;
+      char *shaderdb = NULL;
 
-                /* Dump stats */
+      /* Dump stats */
 
-                asprintf(&shaderdb, "%s shader: "
-                        "%u inst, %u bundles, %u quadwords, "
-                        "%u registers, %u threads, %u loops, "
-                        "%u:%u spills:fills",
-                        ctx->inputs->is_blend ? "PAN_SHADER_BLEND" :
-                        gl_shader_stage_name(ctx->stage),
-                        nr_ins, nr_bundles, ctx->quadword_count,
-                        nr_registers, nr_threads,
-                        ctx->loop_count,
-                        ctx->spills, ctx->fills);
+      asprintf(&shaderdb,
+               "%s shader: "
+               "%u inst, %u bundles, %u quadwords, "
+               "%u registers, %u threads, %u loops, "
+               "%u:%u spills:fills",
+               ctx->inputs->is_blend ? "PAN_SHADER_BLEND"
+                                     : gl_shader_stage_name(ctx->stage),
+               nr_ins, nr_bundles, ctx->quadword_count, nr_registers,
+               nr_threads, ctx->loop_count, ctx->spills, ctx->fills);
 
-                if (midgard_debug & MIDGARD_DBG_SHADERDB)
-                        fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
+      if (midgard_debug & MIDGARD_DBG_SHADERDB)
+         fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
 
-                if (inputs->debug)
-                        util_debug_message(inputs->debug, SHADER_INFO, "%s", shaderdb);
-
-                free(shaderdb);
-        }
+      if (inputs->debug)
+         util_debug_message(inputs->debug, SHADER_INFO, "%s", shaderdb);
 
-        _mesa_hash_table_u64_destroy(ctx->ssa_constants);
-        _mesa_hash_table_u64_destroy(ctx->sysval_to_id);
+      free(shaderdb);
+   }
 
-        ralloc_free(ctx);
+   _mesa_hash_table_u64_destroy(ctx->ssa_constants);
+   _mesa_hash_table_u64_destroy(ctx->sysval_to_id);
+
+   ralloc_free(ctx);
 }
diff --git a/src/panfrost/midgard/midgard_compile.h b/src/panfrost/midgard/midgard_compile.h
index bced01dde9c..1b88eb11339 100644
--- a/src/panfrost/midgard/midgard_compile.h
+++ b/src/panfrost/midgard/midgard_compile.h
@@ -26,81 +26,81 @@
 #define __MIDGARD_H_
 
 #include "compiler/nir/nir.h"
-#include "util/u_dynarray.h"
 #include "panfrost/util/pan_ir.h"
+#include "util/u_dynarray.h"
 
-void
-midgard_compile_shader_nir(nir_shader *nir,
-                           const struct panfrost_compile_inputs *inputs,
-                           struct util_dynarray *binary,
-                           struct pan_shader_info *info);
+void midgard_compile_shader_nir(nir_shader *nir,
+                                const struct panfrost_compile_inputs *inputs,
+                                struct util_dynarray *binary,
+                                struct pan_shader_info *info);
 
 /* NIR options are shared between the standalone compiler and the online
  * compiler. Defining it here is the simplest, though maybe not the Right
  * solution. */
 
 static const nir_shader_compiler_options midgard_nir_options = {
-        .lower_ffma16 = true,
-        .lower_ffma32 = true,
-        .lower_ffma64 = true,
-        .lower_scmp = true,
-        .lower_flrp16 = true,
-        .lower_flrp32 = true,
-        .lower_flrp64 = true,
-        .lower_ffract = true,
-        .lower_fmod = true,
-        .lower_fdiv = true,
-        .lower_isign = true,
-        .lower_fpow = true,
-        .lower_find_lsb = true,
-        .lower_ifind_msb = true,
-        .lower_fdph = true,
-        .lower_uadd_carry = true,
-        .lower_usub_borrow = true,
+   .lower_ffma16 = true,
+   .lower_ffma32 = true,
+   .lower_ffma64 = true,
+   .lower_scmp = true,
+   .lower_flrp16 = true,
+   .lower_flrp32 = true,
+   .lower_flrp64 = true,
+   .lower_ffract = true,
+   .lower_fmod = true,
+   .lower_fdiv = true,
+   .lower_isign = true,
+   .lower_fpow = true,
+   .lower_find_lsb = true,
+   .lower_ifind_msb = true,
+   .lower_fdph = true,
+   .lower_uadd_carry = true,
+   .lower_usub_borrow = true,
 
-        /* TODO: We have native ops to help here, which we'll want to look into
-         * eventually */
-        .lower_fsign = true,
+   /* TODO: We have native ops to help here, which we'll want to look into
+    * eventually */
+   .lower_fsign = true,
 
-        .lower_bit_count = true,
-        .lower_bitfield_reverse = true,
-        .lower_bitfield_insert_to_shifts = true,
-        .lower_bitfield_extract_to_shifts = true,
-        .lower_extract_byte = true,
-        .lower_extract_word = true,
-        .lower_insert_byte = true,
-        .lower_insert_word = true,
-        .lower_rotate = true,
+   .lower_bit_count = true,
+   .lower_bitfield_reverse = true,
+   .lower_bitfield_insert_to_shifts = true,
+   .lower_bitfield_extract_to_shifts = true,
+   .lower_extract_byte = true,
+   .lower_extract_word = true,
+   .lower_insert_byte = true,
+   .lower_insert_word = true,
+   .lower_rotate = true,
 
-        .lower_pack_half_2x16 = true,
-        .lower_pack_unorm_2x16 = true,
-        .lower_pack_snorm_2x16 = true,
-        .lower_pack_unorm_4x8 = true,
-        .lower_pack_snorm_4x8 = true,
-        .lower_unpack_half_2x16 = true,
-        .lower_unpack_unorm_2x16 = true,
-        .lower_unpack_snorm_2x16 = true,
-        .lower_unpack_unorm_4x8 = true,
-        .lower_unpack_snorm_4x8 = true,
-        .lower_pack_split = true,
-        .lower_pack_64_2x32_split = true,
-        .lower_unpack_64_2x32_split = true,
-        .lower_int64_options = nir_lower_imul_2x32_64,
+   .lower_pack_half_2x16 = true,
+   .lower_pack_unorm_2x16 = true,
+   .lower_pack_snorm_2x16 = true,
+   .lower_pack_unorm_4x8 = true,
+   .lower_pack_snorm_4x8 = true,
+   .lower_unpack_half_2x16 = true,
+   .lower_unpack_unorm_2x16 = true,
+   .lower_unpack_snorm_2x16 = true,
+   .lower_unpack_unorm_4x8 = true,
+   .lower_unpack_snorm_4x8 = true,
+   .lower_pack_split = true,
+   .lower_pack_64_2x32_split = true,
+   .lower_unpack_64_2x32_split = true,
+   .lower_int64_options = nir_lower_imul_2x32_64,
 
-        .lower_doubles_options = nir_lower_dmod,
+   .lower_doubles_options = nir_lower_dmod,
 
-        .lower_uniforms_to_ubo = true,
-        .has_fsub = true,
-        .has_isub = true,
-        .vectorize_io = true,
-        .use_interpolated_input_intrinsics = true,
+   .lower_uniforms_to_ubo = true,
+   .has_fsub = true,
+   .has_isub = true,
+   .vectorize_io = true,
+   .use_interpolated_input_intrinsics = true,
 
-        .vertex_id_zero_based = true,
-        .has_cs_global_id = true,
-        .lower_cs_local_index_to_id = true,
-        .max_unroll_iterations = 32,
-        .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
-        .force_indirect_unrolling_sampler = true,
+   .vertex_id_zero_based = true,
+   .has_cs_global_id = true,
+   .lower_cs_local_index_to_id = true,
+   .max_unroll_iterations = 32,
+   .force_indirect_unrolling =
+      (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
+   .force_indirect_unrolling_sampler = true,
 };
 
 #endif
diff --git a/src/panfrost/midgard/midgard_derivatives.c b/src/panfrost/midgard/midgard_derivatives.c
index 5ad2e378a83..d243a00bdd2 100644
--- a/src/panfrost/midgard/midgard_derivatives.c
+++ b/src/panfrost/midgard/midgard_derivatives.c
@@ -53,20 +53,20 @@
 static unsigned
 mir_derivative_mode(nir_op op)
 {
-        switch (op) {
-        case nir_op_fddx:
-        case nir_op_fddx_fine:
-        case nir_op_fddx_coarse:
-                return TEXTURE_DFDX;
+   switch (op) {
+   case nir_op_fddx:
+   case nir_op_fddx_fine:
+   case nir_op_fddx_coarse:
+      return TEXTURE_DFDX;
 
-        case nir_op_fddy:
-        case nir_op_fddy_fine:
-        case nir_op_fddy_coarse:
-                return TEXTURE_DFDY;
+   case nir_op_fddy:
+   case nir_op_fddy_fine:
+   case nir_op_fddy_coarse:
+      return TEXTURE_DFDY;
 
-        default:
-                unreachable("Invalid derivative op");
-        }
+   default:
+      unreachable("Invalid derivative op");
+   }
 }
 
 /* Returns true if a texturing op computes derivatives either explicitly or
@@ -75,91 +75,105 @@ mir_derivative_mode(nir_op op)
 bool
 mir_op_computes_derivatives(gl_shader_stage stage, unsigned op)
 {
-        /* Only fragment shaders may compute derivatives, but the sense of
-         * "normal" changes in vertex shaders on certain GPUs */
+   /* Only fragment shaders may compute derivatives, but the sense of
+    * "normal" changes in vertex shaders on certain GPUs */
 
-        if (op == midgard_tex_op_normal && stage != MESA_SHADER_FRAGMENT)
-                return false;
+   if (op == midgard_tex_op_normal && stage != MESA_SHADER_FRAGMENT)
+      return false;
 
-        switch (op) {
-        case midgard_tex_op_normal:
-        case midgard_tex_op_derivative:
-                assert(stage == MESA_SHADER_FRAGMENT);
-                return true;
-        default:
-                return false;
-        }
+   switch (op) {
+   case midgard_tex_op_normal:
+   case midgard_tex_op_derivative:
+      assert(stage == MESA_SHADER_FRAGMENT);
+      return true;
+   default:
+      return false;
+   }
 }
 
 void
 midgard_emit_derivatives(compiler_context *ctx, nir_alu_instr *instr)
 {
-        /* Create texture instructions */
+   /* Create texture instructions */
 
-        unsigned nr_components = nir_dest_num_components(instr->dest.dest);
+   unsigned nr_components = nir_dest_num_components(instr->dest.dest);
 
-        midgard_instruction ins = {
-                .type = TAG_TEXTURE_4,
-                .mask = mask_of(nr_components),
-                .dest = nir_dest_index(&instr->dest.dest),
-                .dest_type = nir_type_float32,
-                .src = { ~0, nir_src_index(ctx, &instr->src[0].src), ~0, ~0, },
-                .swizzle = SWIZZLE_IDENTITY_4,
-                .src_types = { nir_type_float32, nir_type_float32, },
-                .op = midgard_tex_op_derivative,
-                .texture = {
-                        .mode = mir_derivative_mode(instr->op),
-                        .format = 2,
-                        .in_reg_full = 1,
-                        .out_full = 1,
-                        .sampler_type = MALI_SAMPLER_FLOAT,
-                },
-        };
+   midgard_instruction ins = {
+      .type = TAG_TEXTURE_4,
+      .mask = mask_of(nr_components),
+      .dest = nir_dest_index(&instr->dest.dest),
+      .dest_type = nir_type_float32,
+      .src =
+         {
+            ~0,
+            nir_src_index(ctx, &instr->src[0].src),
+            ~0,
+            ~0,
+         },
+      .swizzle = SWIZZLE_IDENTITY_4,
+      .src_types =
+         {
+            nir_type_float32,
+            nir_type_float32,
+         },
+      .op = midgard_tex_op_derivative,
+      .texture =
+         {
+            .mode = mir_derivative_mode(instr->op),
+            .format = 2,
+            .in_reg_full = 1,
+            .out_full = 1,
+            .sampler_type = MALI_SAMPLER_FLOAT,
+         },
+   };
 
-        if (!instr->dest.dest.is_ssa)
-                ins.mask &= instr->dest.write_mask;
+   if (!instr->dest.dest.is_ssa)
+      ins.mask &= instr->dest.write_mask;
 
-        emit_mir_instruction(ctx, ins);
+   emit_mir_instruction(ctx, ins);
 }
 
 void
 midgard_lower_derivatives(compiler_context *ctx, midgard_block *block)
 {
-        mir_foreach_instr_in_block_safe(block, ins) {
-                if (ins->type != TAG_TEXTURE_4) continue;
-                if (ins->op != midgard_tex_op_derivative) continue;
+   mir_foreach_instr_in_block_safe(block, ins) {
+      if (ins->type != TAG_TEXTURE_4)
+         continue;
+      if (ins->op != midgard_tex_op_derivative)
+         continue;
 
-                /* Check if we need to split */
+      /* Check if we need to split */
 
-                bool upper = ins->mask & 0b1100;
-                bool lower = ins->mask & 0b0011;
+      bool upper = ins->mask & 0b1100;
+      bool lower = ins->mask & 0b0011;
 
-                if (!(upper && lower)) continue;
+      if (!(upper && lower))
+         continue;
 
-                /* Duplicate for dedicated upper instruction */
+      /* Duplicate for dedicated upper instruction */
 
-                midgard_instruction dup;
-                memcpy(&dup, ins, sizeof(dup));
+      midgard_instruction dup;
+      memcpy(&dup, ins, sizeof(dup));
 
-                /* Fixup masks. Make original just lower and dupe just upper */
+      /* Fixup masks. Make original just lower and dupe just upper */
 
-                ins->mask &= 0b0011;
-                dup.mask &= 0b1100;
+      ins->mask &= 0b0011;
+      dup.mask &= 0b1100;
 
-                /* Fixup swizzles */
-                dup.swizzle[0][0] = dup.swizzle[0][1] = dup.swizzle[0][2] = COMPONENT_X;
-                dup.swizzle[0][3] = COMPONENT_Y;
+      /* Fixup swizzles */
+      dup.swizzle[0][0] = dup.swizzle[0][1] = dup.swizzle[0][2] = COMPONENT_X;
+      dup.swizzle[0][3] = COMPONENT_Y;
 
-                dup.swizzle[1][0] = COMPONENT_Z;
-                dup.swizzle[1][1] = dup.swizzle[1][2] = dup.swizzle[1][3] = COMPONENT_W;
+      dup.swizzle[1][0] = COMPONENT_Z;
+      dup.swizzle[1][1] = dup.swizzle[1][2] = dup.swizzle[1][3] = COMPONENT_W;
 
-                /* Insert the new instruction */
-                mir_insert_instruction_before(ctx, mir_next_op(ins), dup);
+      /* Insert the new instruction */
+      mir_insert_instruction_before(ctx, mir_next_op(ins), dup);
 
-                /* We'll need both instructions to write to the same index, so
-                 * rewrite to use a register */
+      /* We'll need both instructions to write to the same index, so
+       * rewrite to use a register */
 
-                unsigned new = make_compiler_temp_reg(ctx);
-                mir_rewrite_index(ctx, ins->dest, new);
-        }
+      unsigned new = make_compiler_temp_reg(ctx);
+      mir_rewrite_index(ctx, ins->dest, new);
+   }
 }
diff --git a/src/panfrost/midgard/midgard_emit.c b/src/panfrost/midgard/midgard_emit.c
index 92c6dd11dab..7839760ba5e 100644
--- a/src/panfrost/midgard/midgard_emit.c
+++ b/src/panfrost/midgard/midgard_emit.c
@@ -29,63 +29,65 @@
 static midgard_int_mod
 mir_get_imod(bool shift, nir_alu_type T, bool half, bool scalar)
 {
-        if (!half) {
-                assert(!shift);
-                /* Doesn't matter, src mods are only used when expanding */
-                return midgard_int_sign_extend;
-        }
+   if (!half) {
+      assert(!shift);
+      /* Doesn't matter, src mods are only used when expanding */
+      return midgard_int_sign_extend;
+   }
 
-        if (shift)
-                return midgard_int_left_shift;
+   if (shift)
+      return midgard_int_left_shift;
 
-        if (nir_alu_type_get_base_type(T) == nir_type_int)
-                return midgard_int_sign_extend;
-        else
-                return midgard_int_zero_extend;
+   if (nir_alu_type_get_base_type(T) == nir_type_int)
+      return midgard_int_sign_extend;
+   else
+      return midgard_int_zero_extend;
 }
 
 void
 midgard_pack_ubo_index_imm(midgard_load_store_word *word, unsigned index)
 {
-        word->arg_comp = index & 0x3;
-        word->arg_reg = (index >> 2) & 0x7;
-        word->bitsize_toggle = (index >> 5) & 0x1;
-        word->index_format = (index >> 6) & 0x3;
+   word->arg_comp = index & 0x3;
+   word->arg_reg = (index >> 2) & 0x7;
+   word->bitsize_toggle = (index >> 5) & 0x1;
+   word->index_format = (index >> 6) & 0x3;
 }
 
-void midgard_pack_varying_params(midgard_load_store_word *word, midgard_varying_params p)
+void
+midgard_pack_varying_params(midgard_load_store_word *word,
+                            midgard_varying_params p)
 {
-        /* Currently these parameters are not supported. */
-        assert(p.direct_sample_pos_x == 0 && p.direct_sample_pos_y == 0);
+   /* Currently these parameters are not supported. */
+   assert(p.direct_sample_pos_x == 0 && p.direct_sample_pos_y == 0);
 
-        unsigned u;
-        memcpy(&u, &p, sizeof(p));
+   unsigned u;
+   memcpy(&u, &p, sizeof(p));
 
-        word->signed_offset |= u & 0x1FF;
+   word->signed_offset |= u & 0x1FF;
 }
 
-midgard_varying_params midgard_unpack_varying_params(midgard_load_store_word word)
+midgard_varying_params
+midgard_unpack_varying_params(midgard_load_store_word word)
 {
-        unsigned params = word.signed_offset & 0x1FF;
+   unsigned params = word.signed_offset & 0x1FF;
 
-        midgard_varying_params p;
-        memcpy(&p, &params, sizeof(p));
+   midgard_varying_params p;
+   memcpy(&p, &params, sizeof(p));
 
-        return p;
+   return p;
 }
 
 unsigned
 mir_pack_mod(midgard_instruction *ins, unsigned i, bool scalar)
 {
-        bool integer = midgard_is_integer_op(ins->op);
-        unsigned base_size = max_bitsize_for_alu(ins);
-        unsigned sz = nir_alu_type_get_type_size(ins->src_types[i]);
-        bool half = (sz == (base_size >> 1));
+   bool integer = midgard_is_integer_op(ins->op);
+   unsigned base_size = max_bitsize_for_alu(ins);
+   unsigned sz = nir_alu_type_get_type_size(ins->src_types[i]);
+   bool half = (sz == (base_size >> 1));
 
-        return integer ?
-                mir_get_imod(ins->src_shift[i], ins->src_types[i], half, scalar) :
-                ((ins->src_abs[i] << 0) |
-                 ((ins->src_neg[i] << 1)));
+   return integer
+             ? mir_get_imod(ins->src_shift[i], ins->src_types[i], half, scalar)
+             : ((ins->src_abs[i] << 0) | ((ins->src_neg[i] << 1)));
 }
 
 /* Midgard IR only knows vector ALU types, but we sometimes need to actually
@@ -95,75 +97,76 @@ mir_pack_mod(midgard_instruction *ins, unsigned i, bool scalar)
 static int
 component_from_mask(unsigned mask)
 {
-        for (int c = 0; c < 8; ++c) {
-                if (mask & (1 << c))
-                        return c;
-        }
+   for (int c = 0; c < 8; ++c) {
+      if (mask & (1 << c))
+         return c;
+   }
 
-        assert(0);
-        return 0;
+   assert(0);
+   return 0;
 }
 
 static unsigned
 mir_pack_scalar_source(unsigned mod, bool is_full, unsigned component)
 {
-        midgard_scalar_alu_src s = {
-                .mod = mod,
-                .full = is_full,
-                .component = component << (is_full ? 1 : 0),
-        };
+   midgard_scalar_alu_src s = {
+      .mod = mod,
+      .full = is_full,
+      .component = component << (is_full ? 1 : 0),
+   };
 
-        unsigned o;
-        memcpy(&o, &s, sizeof(s));
+   unsigned o;
+   memcpy(&o, &s, sizeof(s));
 
-        return o & ((1 << 6) - 1);
+   return o & ((1 << 6) - 1);
 }
 
 static midgard_scalar_alu
 vector_to_scalar_alu(midgard_vector_alu v, midgard_instruction *ins)
 {
-        bool is_full = nir_alu_type_get_type_size(ins->dest_type) == 32;
+   bool is_full = nir_alu_type_get_type_size(ins->dest_type) == 32;
 
-        bool half_0 = nir_alu_type_get_type_size(ins->src_types[0]) == 16;
-        bool half_1 = nir_alu_type_get_type_size(ins->src_types[1]) == 16;
-        unsigned comp = component_from_mask(ins->mask);
+   bool half_0 = nir_alu_type_get_type_size(ins->src_types[0]) == 16;
+   bool half_1 = nir_alu_type_get_type_size(ins->src_types[1]) == 16;
+   unsigned comp = component_from_mask(ins->mask);
 
-        unsigned packed_src[2] = {
-                mir_pack_scalar_source(mir_pack_mod(ins, 0, true), !half_0, ins->swizzle[0][comp]),
-                mir_pack_scalar_source(mir_pack_mod(ins, 1, true), !half_1, ins->swizzle[1][comp])
-        };
+   unsigned packed_src[2] = {
+      mir_pack_scalar_source(mir_pack_mod(ins, 0, true), !half_0,
+                             ins->swizzle[0][comp]),
+      mir_pack_scalar_source(mir_pack_mod(ins, 1, true), !half_1,
+                             ins->swizzle[1][comp])};
 
-        /* The output component is from the mask */
-        midgard_scalar_alu s = {
-                .op = v.op,
-                .src1 = packed_src[0],
-                .src2 = packed_src[1],
-                .outmod = v.outmod,
-                .output_full = is_full,
-                .output_component = comp,
-        };
+   /* The output component is from the mask */
+   midgard_scalar_alu s = {
+      .op = v.op,
+      .src1 = packed_src[0],
+      .src2 = packed_src[1],
+      .outmod = v.outmod,
+      .output_full = is_full,
+      .output_component = comp,
+   };
 
-        /* Full components are physically spaced out */
-        if (is_full) {
-                assert(s.output_component < 4);
-                s.output_component <<= 1;
-        }
+   /* Full components are physically spaced out */
+   if (is_full) {
+      assert(s.output_component < 4);
+      s.output_component <<= 1;
+   }
 
-        /* Inline constant is passed along rather than trying to extract it
-         * from v */
+   /* Inline constant is passed along rather than trying to extract it
+    * from v */
 
-        if (ins->has_inline_constant) {
-                uint16_t imm = 0;
-                int lower_11 = ins->inline_constant & ((1 << 12) - 1);
-                imm |= (lower_11 >> 9) & 3;
-                imm |= (lower_11 >> 6) & 4;
-                imm |= (lower_11 >> 2) & 0x38;
-                imm |= (lower_11 & 63) << 6;
+   if (ins->has_inline_constant) {
+      uint16_t imm = 0;
+      int lower_11 = ins->inline_constant & ((1 << 12) - 1);
+      imm |= (lower_11 >> 9) & 3;
+      imm |= (lower_11 >> 6) & 4;
+      imm |= (lower_11 >> 2) & 0x38;
+      imm |= (lower_11 & 63) << 6;
 
-                s.src2 = imm;
-        }
+      s.src2 = imm;
+   }
 
-        return s;
+   return s;
 }
 
 /* 64-bit swizzles are super easy since there are 2 components of 2 components
@@ -176,238 +179,230 @@ vector_to_scalar_alu(midgard_vector_alu v, midgard_instruction *ins)
  * with rep. Pretty nifty, huh? */
 
 static unsigned
-mir_pack_swizzle_64(unsigned *swizzle, unsigned max_component,
-                    bool expand_high)
+mir_pack_swizzle_64(unsigned *swizzle, unsigned max_component, bool expand_high)
 {
-        unsigned packed = 0;
-        unsigned base = expand_high ? 2 : 0;
+   unsigned packed = 0;
+   unsigned base = expand_high ? 2 : 0;
 
-        for (unsigned i = base; i < base + 2; ++i) {
-                assert(swizzle[i] <= max_component);
+   for (unsigned i = base; i < base + 2; ++i) {
+      assert(swizzle[i] <= max_component);
 
-                unsigned a = (swizzle[i] & 1) ?
-                        (COMPONENT_W << 2) | COMPONENT_Z :
-                        (COMPONENT_Y << 2) | COMPONENT_X;
+      unsigned a = (swizzle[i] & 1) ? (COMPONENT_W << 2) | COMPONENT_Z
+                                    : (COMPONENT_Y << 2) | COMPONENT_X;
 
-                if (i & 1)
-                        packed |= a << 4;
-                else
-                        packed |= a;
-        }
+      if (i & 1)
+         packed |= a << 4;
+      else
+         packed |= a;
+   }
 
-        return packed;
+   return packed;
 }
 
 static void
 mir_pack_mask_alu(midgard_instruction *ins, midgard_vector_alu *alu)
 {
-        unsigned effective = ins->mask;
+   unsigned effective = ins->mask;
 
-        /* If we have a destination override, we need to figure out whether to
-         * override to the lower or upper half, shifting the effective mask in
-         * the latter, so AAAA.... becomes AAAA */
+   /* If we have a destination override, we need to figure out whether to
+    * override to the lower or upper half, shifting the effective mask in
+    * the latter, so AAAA.... becomes AAAA */
 
-        unsigned inst_size = max_bitsize_for_alu(ins);
-        signed upper_shift = mir_upper_override(ins, inst_size);
+   unsigned inst_size = max_bitsize_for_alu(ins);
+   signed upper_shift = mir_upper_override(ins, inst_size);
 
-        if (upper_shift >= 0) {
-                effective >>= upper_shift;
-                alu->shrink_mode = upper_shift ?
-                        midgard_shrink_mode_upper :
-                        midgard_shrink_mode_lower;
-        } else {
-                alu->shrink_mode = midgard_shrink_mode_none;
-        }
+   if (upper_shift >= 0) {
+      effective >>= upper_shift;
+      alu->shrink_mode =
+         upper_shift ? midgard_shrink_mode_upper : midgard_shrink_mode_lower;
+   } else {
+      alu->shrink_mode = midgard_shrink_mode_none;
+   }
 
-        if (inst_size == 32)
-                alu->mask = expand_writemask(effective, 2);
-        else if (inst_size == 64)
-                alu->mask = expand_writemask(effective, 1);
-        else
-                alu->mask = effective;
+   if (inst_size == 32)
+      alu->mask = expand_writemask(effective, 2);
+   else if (inst_size == 64)
+      alu->mask = expand_writemask(effective, 1);
+   else
+      alu->mask = effective;
 }
 
 static unsigned
-mir_pack_swizzle(unsigned mask, unsigned *swizzle,
-                 unsigned sz, unsigned base_size,
-                 bool op_channeled, midgard_src_expand_mode *expand_mode)
+mir_pack_swizzle(unsigned mask, unsigned *swizzle, unsigned sz,
+                 unsigned base_size, bool op_channeled,
+                 midgard_src_expand_mode *expand_mode)
 {
-        unsigned packed = 0;
+   unsigned packed = 0;
 
-        *expand_mode = midgard_src_passthrough;
+   *expand_mode = midgard_src_passthrough;
 
-        midgard_reg_mode reg_mode = reg_mode_for_bitsize(base_size);
+   midgard_reg_mode reg_mode = reg_mode_for_bitsize(base_size);
 
-        if (reg_mode == midgard_reg_mode_64) {
-                assert(sz == 64 || sz == 32);
-                unsigned components = (sz == 32) ? 4 : 2;
+   if (reg_mode == midgard_reg_mode_64) {
+      assert(sz == 64 || sz == 32);
+      unsigned components = (sz == 32) ? 4 : 2;
 
-                packed = mir_pack_swizzle_64(swizzle, components,
-                                             mask & 0xc);
+      packed = mir_pack_swizzle_64(swizzle, components, mask & 0xc);
 
-                if (sz == 32) {
-                        ASSERTED bool dontcare = true;
-                        bool hi = false;
+      if (sz == 32) {
+         ASSERTED bool dontcare = true;
+         bool hi = false;
 
-                        assert(util_bitcount(mask) <= 2);
+         assert(util_bitcount(mask) <= 2);
 
-                        u_foreach_bit(i, mask) {
-                                bool hi_i = swizzle[i] >= COMPONENT_Z;
+         u_foreach_bit(i, mask) {
+            bool hi_i = swizzle[i] >= COMPONENT_Z;
 
-                                /* We can't mix halves */
-                                assert(dontcare || (hi == hi_i));
-                                hi = hi_i;
-                                dontcare = false;
-                        }
+            /* We can't mix halves */
+            assert(dontcare || (hi == hi_i));
+            hi = hi_i;
+            dontcare = false;
+         }
 
-                        *expand_mode = hi ? midgard_src_expand_high :
-                                            midgard_src_expand_low;
-                } else if (sz < 32) {
-                        unreachable("Cannot encode 8/16 swizzle in 64-bit");
-                }
-        } else {
-                /* For 32-bit, swizzle packing is stupid-simple. For 16-bit,
-                 * the strategy is to check whether the nibble we're on is
-                 * upper or lower. We need all components to be on the same
-                 * "side"; that much is enforced by the ISA and should have
-                 * been lowered. TODO: 8-bit packing. TODO: vec8 */
+         *expand_mode = hi ? midgard_src_expand_high : midgard_src_expand_low;
+      } else if (sz < 32) {
+         unreachable("Cannot encode 8/16 swizzle in 64-bit");
+      }
+   } else {
+      /* For 32-bit, swizzle packing is stupid-simple. For 16-bit,
+       * the strategy is to check whether the nibble we're on is
+       * upper or lower. We need all components to be on the same
+       * "side"; that much is enforced by the ISA and should have
+       * been lowered. TODO: 8-bit packing. TODO: vec8 */
 
-                unsigned first = mask ? ffs(mask) - 1 : 0;
-                bool upper = swizzle[first] > 3;
+      unsigned first = mask ? ffs(mask) - 1 : 0;
+      bool upper = swizzle[first] > 3;
 
-                if (upper && mask)
-                        assert(sz <= 16);
+      if (upper && mask)
+         assert(sz <= 16);
 
-                bool dest_up = !op_channeled && (first >= 4);
+      bool dest_up = !op_channeled && (first >= 4);
 
-                for (unsigned c = (dest_up ? 4 : 0); c < (dest_up ? 8 : 4); ++c) {
-                        unsigned v = swizzle[c];
+      for (unsigned c = (dest_up ? 4 : 0); c < (dest_up ? 8 : 4); ++c) {
+         unsigned v = swizzle[c];
 
-                        ASSERTED bool t_upper = v > (sz == 8 ? 7 : 3);
+         ASSERTED bool t_upper = v > (sz == 8 ? 7 : 3);
 
-                        /* Ensure we're doing something sane */
+         /* Ensure we're doing something sane */
 
-                        if (mask & (1 << c)) {
-                                assert(t_upper == upper);
-                                assert(v <= (sz == 8 ? 15 : 7));
-                        }
+         if (mask & (1 << c)) {
+            assert(t_upper == upper);
+            assert(v <= (sz == 8 ? 15 : 7));
+         }
 
-                        /* Use the non upper part */
-                        v &= 0x3;
+         /* Use the non upper part */
+         v &= 0x3;
 
-                        packed |= v << (2 * (c % 4));
-                }
+         packed |= v << (2 * (c % 4));
+      }
 
+      /* Replicate for now.. should really pick a side for
+       * dot products */
 
-                /* Replicate for now.. should really pick a side for
-                 * dot products */
+      if (reg_mode == midgard_reg_mode_16 && sz == 16) {
+         *expand_mode = upper ? midgard_src_rep_high : midgard_src_rep_low;
+      } else if (reg_mode == midgard_reg_mode_16 && sz == 8) {
+         if (base_size == 16) {
+            *expand_mode =
+               upper ? midgard_src_expand_high : midgard_src_expand_low;
+         } else if (upper) {
+            *expand_mode = midgard_src_swap;
+         }
+      } else if (reg_mode == midgard_reg_mode_32 && sz == 16) {
+         *expand_mode =
+            upper ? midgard_src_expand_high : midgard_src_expand_low;
+      } else if (reg_mode == midgard_reg_mode_8) {
+         unreachable("Unhandled reg mode");
+      }
+   }
 
-                if (reg_mode == midgard_reg_mode_16 && sz == 16) {
-                        *expand_mode = upper ? midgard_src_rep_high :
-                                               midgard_src_rep_low;
-                } else if (reg_mode == midgard_reg_mode_16 && sz == 8) {
-                        if (base_size == 16) {
-                                *expand_mode = upper ? midgard_src_expand_high :
-                                                       midgard_src_expand_low;
-                        } else if (upper) {
-                                *expand_mode = midgard_src_swap;
-                        }
-                } else if (reg_mode == midgard_reg_mode_32 && sz == 16) {
-                        *expand_mode = upper ? midgard_src_expand_high :
-                                               midgard_src_expand_low;
-                } else if (reg_mode == midgard_reg_mode_8) {
-                        unreachable("Unhandled reg mode");
-                }
-        }
-
-        return packed;
+   return packed;
 }
 
 static void
 mir_pack_vector_srcs(midgard_instruction *ins, midgard_vector_alu *alu)
 {
-        bool channeled = GET_CHANNEL_COUNT(alu_opcode_props[ins->op].props);
+   bool channeled = GET_CHANNEL_COUNT(alu_opcode_props[ins->op].props);
 
-        unsigned base_size = max_bitsize_for_alu(ins);
+   unsigned base_size = max_bitsize_for_alu(ins);
 
-        for (unsigned i = 0; i < 2; ++i) {
-                if (ins->has_inline_constant && (i == 1))
-                        continue;
+   for (unsigned i = 0; i < 2; ++i) {
+      if (ins->has_inline_constant && (i == 1))
+         continue;
 
-                if (ins->src[i] == ~0)
-                        continue;
+      if (ins->src[i] == ~0)
+         continue;
 
-                unsigned sz = nir_alu_type_get_type_size(ins->src_types[i]);
-                assert((sz == base_size) || (sz == base_size / 2));
+      unsigned sz = nir_alu_type_get_type_size(ins->src_types[i]);
+      assert((sz == base_size) || (sz == base_size / 2));
 
-                midgard_src_expand_mode expand_mode = midgard_src_passthrough;
-                unsigned swizzle = mir_pack_swizzle(ins->mask, ins->swizzle[i],
-                                                    sz, base_size, channeled,
-                                                    &expand_mode);
+      midgard_src_expand_mode expand_mode = midgard_src_passthrough;
+      unsigned swizzle = mir_pack_swizzle(ins->mask, ins->swizzle[i], sz,
+                                          base_size, channeled, &expand_mode);
 
-                midgard_vector_alu_src pack = {
-                        .mod = mir_pack_mod(ins, i, false),
-                        .expand_mode = expand_mode,
-                        .swizzle = swizzle,
-                };
+      midgard_vector_alu_src pack = {
+         .mod = mir_pack_mod(ins, i, false),
+         .expand_mode = expand_mode,
+         .swizzle = swizzle,
+      };
 
-                unsigned p = vector_alu_srco_unsigned(pack);
-                
-                if (i == 0)
-                        alu->src1 = p;
-                else
-                        alu->src2 = p;
-        }
+      unsigned p = vector_alu_srco_unsigned(pack);
+
+      if (i == 0)
+         alu->src1 = p;
+      else
+         alu->src2 = p;
+   }
 }
 
 static void
 mir_pack_swizzle_ldst(midgard_instruction *ins)
 {
-        unsigned compsz = OP_IS_STORE(ins->op) ?
-                          nir_alu_type_get_type_size(ins->src_types[0]) :
-                          nir_alu_type_get_type_size(ins->dest_type);
-        unsigned maxcomps = 128 / compsz;
-        unsigned step = DIV_ROUND_UP(32, compsz);
+   unsigned compsz = OP_IS_STORE(ins->op)
+                        ? nir_alu_type_get_type_size(ins->src_types[0])
+                        : nir_alu_type_get_type_size(ins->dest_type);
+   unsigned maxcomps = 128 / compsz;
+   unsigned step = DIV_ROUND_UP(32, compsz);
 
-        for (unsigned c = 0; c < maxcomps; c += step) {
-                unsigned v = ins->swizzle[0][c];
+   for (unsigned c = 0; c < maxcomps; c += step) {
+      unsigned v = ins->swizzle[0][c];
 
-                /* Make sure the component index doesn't exceed the maximum
-                 * number of components. */
-                assert(v <= maxcomps);
+      /* Make sure the component index doesn't exceed the maximum
+       * number of components. */
+      assert(v <= maxcomps);
 
-                if (compsz <= 32)
-                        ins->load_store.swizzle |= (v / step) << (2 * (c / step));
-                else
-                        ins->load_store.swizzle |= ((v / step) << (4 * c)) |
-                                                   (((v / step) + 1) << ((4 * c) + 2));
-        }
+      if (compsz <= 32)
+         ins->load_store.swizzle |= (v / step) << (2 * (c / step));
+      else
+         ins->load_store.swizzle |=
+            ((v / step) << (4 * c)) | (((v / step) + 1) << ((4 * c) + 2));
+   }
 
-        /* TODO: arg_1/2 */
+   /* TODO: arg_1/2 */
 }
 
 static void
 mir_pack_swizzle_tex(midgard_instruction *ins)
 {
-        for (unsigned i = 0; i < 2; ++i) {
-                unsigned packed = 0;
+   for (unsigned i = 0; i < 2; ++i) {
+      unsigned packed = 0;
 
-                for (unsigned c = 0; c < 4; ++c) {
-                        unsigned v = ins->swizzle[i][c];
+      for (unsigned c = 0; c < 4; ++c) {
+         unsigned v = ins->swizzle[i][c];
 
-                        /* Check vec4 */
-                        assert(v <= 3);
+         /* Check vec4 */
+         assert(v <= 3);
 
-                        packed |= v << (2 * c);
-                }
+         packed |= v << (2 * c);
+      }
 
-                if (i == 0)
-                        ins->texture.swizzle = packed;
-                else
-                        ins->texture.in_reg_swizzle = packed;
-        }
+      if (i == 0)
+         ins->texture.swizzle = packed;
+      else
+         ins->texture.in_reg_swizzle = packed;
+   }
 
-        /* TODO: bias component */
+   /* TODO: bias component */
 }
 
 /*
@@ -419,43 +414,45 @@ static bool
 mir_can_run_ooo(midgard_block *block, midgard_bundle *bundle,
                 unsigned dependency)
 {
-        /* Don't read out of bounds */
-        if (bundle >= (midgard_bundle *) ((char *) block->bundles.data + block->bundles.size))
-                return false;
+   /* Don't read out of bounds */
+   if (bundle >=
+       (midgard_bundle *)((char *)block->bundles.data + block->bundles.size))
+      return false;
 
-        /* Texture ops can't execute with other texture ops */
-        if (!IS_ALU(bundle->tag) && bundle->tag != TAG_LOAD_STORE_4)
-                return false;
+   /* Texture ops can't execute with other texture ops */
+   if (!IS_ALU(bundle->tag) && bundle->tag != TAG_LOAD_STORE_4)
+      return false;
 
-        for (unsigned i = 0; i < bundle->instruction_count; ++i) {
-                midgard_instruction *ins = bundle->instructions[i];
+   for (unsigned i = 0; i < bundle->instruction_count; ++i) {
+      midgard_instruction *ins = bundle->instructions[i];
 
-                /* No branches, jumps, or discards */
-                if (ins->compact_branch)
-                        return false;
+      /* No branches, jumps, or discards */
+      if (ins->compact_branch)
+         return false;
 
-                /* No read-after-write data dependencies */
-                mir_foreach_src(ins, s) {
-                        if (ins->src[s] == dependency)
-                                return false;
-                }
-        }
+      /* No read-after-write data dependencies */
+      mir_foreach_src(ins, s) {
+         if (ins->src[s] == dependency)
+            return false;
+      }
+   }
 
-        /* Otherwise, we're okay */
-        return true;
+   /* Otherwise, we're okay */
+   return true;
 }
 
 static void
-mir_pack_tex_ooo(midgard_block *block, midgard_bundle *bundle, midgard_instruction *ins)
+mir_pack_tex_ooo(midgard_block *block, midgard_bundle *bundle,
+                 midgard_instruction *ins)
 {
-        unsigned count = 0;
+   unsigned count = 0;
 
-        for (count = 0; count < 15; ++count) {
-                if (!mir_can_run_ooo(block, bundle + count + 1, ins->dest))
-                        break;
-        }
+   for (count = 0; count < 15; ++count) {
+      if (!mir_can_run_ooo(block, bundle + count + 1, ins->dest))
+         break;
+   }
 
-        ins->texture.out_of_order = count;
+   ins->texture.out_of_order = count;
 }
 
 /* Load store masks are 4-bits. Load/store ops pack for that.
@@ -466,113 +463,110 @@ mir_pack_tex_ooo(midgard_block *block, midgard_bundle *bundle, midgard_instructi
  */
 
 static unsigned
-midgard_pack_common_store_mask(midgard_instruction *ins) {
-        ASSERTED unsigned comp_sz = nir_alu_type_get_type_size(ins->src_types[0]);
-        unsigned bytemask = mir_bytemask(ins);
-        unsigned packed = 0;
+midgard_pack_common_store_mask(midgard_instruction *ins)
+{
+   ASSERTED unsigned comp_sz = nir_alu_type_get_type_size(ins->src_types[0]);
+   unsigned bytemask = mir_bytemask(ins);
+   unsigned packed = 0;
 
-        switch (ins->op) {
-        case midgard_op_st_u8:
-                return mir_bytemask(ins) & 1;
-        case midgard_op_st_u16:
-                return mir_bytemask(ins) & 3;
-        case midgard_op_st_32:
-                return mir_bytemask(ins);
-        case midgard_op_st_64:
-                assert(comp_sz >= 16);
-                for (unsigned i = 0; i < 4; i++) {
-                        if (bytemask & (3 << (i * 2)))
-                                packed |= 1 << i;
-                }
-                return packed;
-        case midgard_op_st_128:
-                assert(comp_sz >= 32);
-                for (unsigned i = 0; i < 4; i++) {
-                        if (bytemask & (0xf << (i * 4)))
-                                packed |= 1 << i;
-                }
-                return packed;
-        default:
-                unreachable("unexpected ldst opcode");
-        }
+   switch (ins->op) {
+   case midgard_op_st_u8:
+      return mir_bytemask(ins) & 1;
+   case midgard_op_st_u16:
+      return mir_bytemask(ins) & 3;
+   case midgard_op_st_32:
+      return mir_bytemask(ins);
+   case midgard_op_st_64:
+      assert(comp_sz >= 16);
+      for (unsigned i = 0; i < 4; i++) {
+         if (bytemask & (3 << (i * 2)))
+            packed |= 1 << i;
+      }
+      return packed;
+   case midgard_op_st_128:
+      assert(comp_sz >= 32);
+      for (unsigned i = 0; i < 4; i++) {
+         if (bytemask & (0xf << (i * 4)))
+            packed |= 1 << i;
+      }
+      return packed;
+   default:
+      unreachable("unexpected ldst opcode");
+   }
 }
 
 static void
 mir_pack_ldst_mask(midgard_instruction *ins)
 {
-        unsigned sz = nir_alu_type_get_type_size(ins->dest_type);
-        unsigned packed = ins->mask;
+   unsigned sz = nir_alu_type_get_type_size(ins->dest_type);
+   unsigned packed = ins->mask;
 
-        if (OP_IS_COMMON_STORE(ins->op)) {
-                packed = midgard_pack_common_store_mask(ins);
-        } else {
-                if (sz == 64) {
-                        packed = ((ins->mask & 0x2) ? (0x8 | 0x4) : 0) |
-                                ((ins->mask & 0x1) ? (0x2 | 0x1) : 0);
-                } else if (sz < 32) {
-                        unsigned comps_per_32b = 32 / sz;
+   if (OP_IS_COMMON_STORE(ins->op)) {
+      packed = midgard_pack_common_store_mask(ins);
+   } else {
+      if (sz == 64) {
+         packed = ((ins->mask & 0x2) ? (0x8 | 0x4) : 0) |
+                  ((ins->mask & 0x1) ? (0x2 | 0x1) : 0);
+      } else if (sz < 32) {
+         unsigned comps_per_32b = 32 / sz;
 
-                        packed = 0;
+         packed = 0;
 
-                        for (unsigned i = 0; i < 4; ++i) {
-                                unsigned submask = (ins->mask >> (i * comps_per_32b)) &
-                                                   BITFIELD_MASK(comps_per_32b);
+         for (unsigned i = 0; i < 4; ++i) {
+            unsigned submask = (ins->mask >> (i * comps_per_32b)) &
+                               BITFIELD_MASK(comps_per_32b);
 
-                                /* Make sure we're duplicated */
-                                assert(submask == 0 || submask == BITFIELD_MASK(comps_per_32b));
-                                packed |= (submask != 0) << i;
-                        }
-                } else {
-                        assert(sz == 32);
-                }
-        }
+            /* Make sure we're duplicated */
+            assert(submask == 0 || submask == BITFIELD_MASK(comps_per_32b));
+            packed |= (submask != 0) << i;
+         }
+      } else {
+         assert(sz == 32);
+      }
+   }
 
-        ins->load_store.mask = packed;
+   ins->load_store.mask = packed;
 }
 
 static void
 mir_lower_inverts(midgard_instruction *ins)
 {
-        bool inv[3] = {
-                ins->src_invert[0],
-                ins->src_invert[1],
-                ins->src_invert[2]
-        };
+   bool inv[3] = {ins->src_invert[0], ins->src_invert[1], ins->src_invert[2]};
 
-        switch (ins->op) {
-        case midgard_alu_op_iand:
-                /* a & ~b = iandnot(a, b) */
-                /* ~a & ~b = ~(a | b) = inor(a, b) */
+   switch (ins->op) {
+   case midgard_alu_op_iand:
+      /* a & ~b = iandnot(a, b) */
+      /* ~a & ~b = ~(a | b) = inor(a, b) */
 
-                if (inv[0] && inv[1])
-                        ins->op = midgard_alu_op_inor;
-                else if (inv[1])
-                        ins->op = midgard_alu_op_iandnot;
+      if (inv[0] && inv[1])
+         ins->op = midgard_alu_op_inor;
+      else if (inv[1])
+         ins->op = midgard_alu_op_iandnot;
 
-                break;
-        case midgard_alu_op_ior:
-                /*  a | ~b = iornot(a, b) */
-                /* ~a | ~b = ~(a & b) = inand(a, b) */
+      break;
+   case midgard_alu_op_ior:
+      /*  a | ~b = iornot(a, b) */
+      /* ~a | ~b = ~(a & b) = inand(a, b) */
 
-                if (inv[0] && inv[1])
-                        ins->op = midgard_alu_op_inand;
-                else if (inv[1])
-                        ins->op = midgard_alu_op_iornot;
+      if (inv[0] && inv[1])
+         ins->op = midgard_alu_op_inand;
+      else if (inv[1])
+         ins->op = midgard_alu_op_iornot;
 
-                break;
+      break;
 
-        case midgard_alu_op_ixor:
-                /* ~a ^ b = a ^ ~b = ~(a ^ b) = inxor(a, b) */
-                /* ~a ^ ~b = a ^ b */
+   case midgard_alu_op_ixor:
+      /* ~a ^ b = a ^ ~b = ~(a ^ b) = inxor(a, b) */
+      /* ~a ^ ~b = a ^ b */
 
-                if (inv[0] ^ inv[1])
-                        ins->op = midgard_alu_op_inxor;
+      if (inv[0] ^ inv[1])
+         ins->op = midgard_alu_op_inxor;
 
-                break;
+      break;
 
-        default:
-                break;
-        }
+   default:
+      break;
+   }
 }
 
 /* Opcodes with ROUNDS are the base (rte/0) type so we can just add */
@@ -580,329 +574,316 @@ mir_lower_inverts(midgard_instruction *ins)
 static void
 mir_lower_roundmode(midgard_instruction *ins)
 {
-        if (alu_opcode_props[ins->op].props & MIDGARD_ROUNDS) {
-                assert(ins->roundmode <= 0x3);
-                ins->op += ins->roundmode;
-        }
+   if (alu_opcode_props[ins->op].props & MIDGARD_ROUNDS) {
+      assert(ins->roundmode <= 0x3);
+      ins->op += ins->roundmode;
+   }
 }
 
 static midgard_load_store_word
 load_store_from_instr(midgard_instruction *ins)
 {
-        midgard_load_store_word ldst = ins->load_store;
-        ldst.op = ins->op;
+   midgard_load_store_word ldst = ins->load_store;
+   ldst.op = ins->op;
 
-        if (OP_IS_STORE(ldst.op)) {
-                ldst.reg = SSA_REG_FROM_FIXED(ins->src[0]) & 1;
-        } else {
-                ldst.reg = SSA_REG_FROM_FIXED(ins->dest);
-        }
+   if (OP_IS_STORE(ldst.op)) {
+      ldst.reg = SSA_REG_FROM_FIXED(ins->src[0]) & 1;
+   } else {
+      ldst.reg = SSA_REG_FROM_FIXED(ins->dest);
+   }
 
-        /* Atomic opcode swizzles have a special meaning:
-         *   - The first two bits say which component of the implicit register should be used
-         *   - The next two bits say if the implicit register is r26 or r27 */
-        if (OP_IS_ATOMIC(ins->op)) {
-                ldst.swizzle = 0;
-                ldst.swizzle |= ins->swizzle[3][0] & 3;
-                ldst.swizzle |= (SSA_REG_FROM_FIXED(ins->src[3]) & 1 ? 1 : 0) << 2;
-        }
+   /* Atomic opcode swizzles have a special meaning:
+    *   - The first two bits say which component of the implicit register should
+    * be used
+    *   - The next two bits say if the implicit register is r26 or r27 */
+   if (OP_IS_ATOMIC(ins->op)) {
+      ldst.swizzle = 0;
+      ldst.swizzle |= ins->swizzle[3][0] & 3;
+      ldst.swizzle |= (SSA_REG_FROM_FIXED(ins->src[3]) & 1 ? 1 : 0) << 2;
+   }
 
-        if (ins->src[1] != ~0) {
-                ldst.arg_reg = SSA_REG_FROM_FIXED(ins->src[1]) - REGISTER_LDST_BASE;
-                unsigned sz = nir_alu_type_get_type_size(ins->src_types[1]);
-                ldst.arg_comp = midgard_ldst_comp(ldst.arg_reg, ins->swizzle[1][0], sz);
-        }
+   if (ins->src[1] != ~0) {
+      ldst.arg_reg = SSA_REG_FROM_FIXED(ins->src[1]) - REGISTER_LDST_BASE;
+      unsigned sz = nir_alu_type_get_type_size(ins->src_types[1]);
+      ldst.arg_comp = midgard_ldst_comp(ldst.arg_reg, ins->swizzle[1][0], sz);
+   }
 
-        if (ins->src[2] != ~0) {
-                ldst.index_reg = SSA_REG_FROM_FIXED(ins->src[2]) - REGISTER_LDST_BASE;
-                unsigned sz = nir_alu_type_get_type_size(ins->src_types[2]);
-                ldst.index_comp = midgard_ldst_comp(ldst.index_reg, ins->swizzle[2][0], sz);
-        }
+   if (ins->src[2] != ~0) {
+      ldst.index_reg = SSA_REG_FROM_FIXED(ins->src[2]) - REGISTER_LDST_BASE;
+      unsigned sz = nir_alu_type_get_type_size(ins->src_types[2]);
+      ldst.index_comp =
+         midgard_ldst_comp(ldst.index_reg, ins->swizzle[2][0], sz);
+   }
 
-        return ldst;
+   return ldst;
 }
 
 static midgard_texture_word
 texture_word_from_instr(midgard_instruction *ins)
 {
-        midgard_texture_word tex = ins->texture;
-        tex.op = ins->op;
+   midgard_texture_word tex = ins->texture;
+   tex.op = ins->op;
 
-        unsigned src1 = ins->src[1] == ~0 ? REGISTER_UNUSED : SSA_REG_FROM_FIXED(ins->src[1]);
-        tex.in_reg_select = src1 & 1;
+   unsigned src1 =
+      ins->src[1] == ~0 ? REGISTER_UNUSED : SSA_REG_FROM_FIXED(ins->src[1]);
+   tex.in_reg_select = src1 & 1;
 
-        unsigned dest = ins->dest == ~0 ? REGISTER_UNUSED : SSA_REG_FROM_FIXED(ins->dest);
-        tex.out_reg_select = dest & 1;
+   unsigned dest =
+      ins->dest == ~0 ? REGISTER_UNUSED : SSA_REG_FROM_FIXED(ins->dest);
+   tex.out_reg_select = dest & 1;
 
-        if (ins->src[2] != ~0) {
-                midgard_tex_register_select sel = {
-                        .select = SSA_REG_FROM_FIXED(ins->src[2]) & 1,
-                        .full = 1,
-                        .component = ins->swizzle[2][0],
-                };
-                uint8_t packed;
-                memcpy(&packed, &sel, sizeof(packed));
-                tex.bias = packed;
-        }
+   if (ins->src[2] != ~0) {
+      midgard_tex_register_select sel = {
+         .select = SSA_REG_FROM_FIXED(ins->src[2]) & 1,
+         .full = 1,
+         .component = ins->swizzle[2][0],
+      };
+      uint8_t packed;
+      memcpy(&packed, &sel, sizeof(packed));
+      tex.bias = packed;
+   }
 
-        if (ins->src[3] != ~0) {
-                unsigned x = ins->swizzle[3][0];
-                unsigned y = x + 1;
-                unsigned z = x + 2;
+   if (ins->src[3] != ~0) {
+      unsigned x = ins->swizzle[3][0];
+      unsigned y = x + 1;
+      unsigned z = x + 2;
 
-                /* Check range, TODO: half-registers */
-                assert(z < 4);
+      /* Check range, TODO: half-registers */
+      assert(z < 4);
 
-                unsigned offset_reg = SSA_REG_FROM_FIXED(ins->src[3]);
-                tex.offset =
-                        (1)                   | /* full */
-                        (offset_reg & 1) << 1 | /* select */
-                        (0 << 2)              | /* upper */
-                        (x << 3)              | /* swizzle */
-                        (y << 5)              | /* swizzle */
-                        (z << 7);               /* swizzle */
-        }
+      unsigned offset_reg = SSA_REG_FROM_FIXED(ins->src[3]);
+      tex.offset = (1) |                   /* full */
+                   (offset_reg & 1) << 1 | /* select */
+                   (0 << 2) |              /* upper */
+                   (x << 3) |              /* swizzle */
+                   (y << 5) |              /* swizzle */
+                   (z << 7);               /* swizzle */
+   }
 
-        return tex;
+   return tex;
 }
 
 static midgard_vector_alu
 vector_alu_from_instr(midgard_instruction *ins)
 {
-        midgard_vector_alu alu = {
-                .op = ins->op,
-                .outmod = ins->outmod,
-                .reg_mode = reg_mode_for_bitsize(max_bitsize_for_alu(ins)),
-        };
+   midgard_vector_alu alu = {
+      .op = ins->op,
+      .outmod = ins->outmod,
+      .reg_mode = reg_mode_for_bitsize(max_bitsize_for_alu(ins)),
+   };
 
-        if (ins->has_inline_constant) {
-                /* Encode inline 16-bit constant. See disassembler for
-                 * where the algorithm is from */
+   if (ins->has_inline_constant) {
+      /* Encode inline 16-bit constant. See disassembler for
+       * where the algorithm is from */
 
-                int lower_11 = ins->inline_constant & ((1 << 12) - 1);
-                uint16_t imm = ((lower_11 >> 8) & 0x7) |
-                               ((lower_11 & 0xFF) << 3);
+      int lower_11 = ins->inline_constant & ((1 << 12) - 1);
+      uint16_t imm = ((lower_11 >> 8) & 0x7) | ((lower_11 & 0xFF) << 3);
 
-                alu.src2 = imm << 2;
-        }
+      alu.src2 = imm << 2;
+   }
 
-        return alu;
+   return alu;
 }
 
 static midgard_branch_extended
-midgard_create_branch_extended( midgard_condition cond,
-                                midgard_jmp_writeout_op op,
-                                unsigned dest_tag,
-                                signed quadword_offset)
+midgard_create_branch_extended(midgard_condition cond,
+                               midgard_jmp_writeout_op op, unsigned dest_tag,
+                               signed quadword_offset)
 {
-        /* The condition code is actually a LUT describing a function to
-         * combine multiple condition codes. However, we only support a single
-         * condition code at the moment, so we just duplicate over a bunch of
-         * times. */
+   /* The condition code is actually a LUT describing a function to
+    * combine multiple condition codes. However, we only support a single
+    * condition code at the moment, so we just duplicate over a bunch of
+    * times. */
 
-        uint16_t duplicated_cond =
-                (cond << 14) |
-                (cond << 12) |
-                (cond << 10) |
-                (cond << 8) |
-                (cond << 6) |
-                (cond << 4) |
-                (cond << 2) |
-                (cond << 0);
+   uint16_t duplicated_cond = (cond << 14) | (cond << 12) | (cond << 10) |
+                              (cond << 8) | (cond << 6) | (cond << 4) |
+                              (cond << 2) | (cond << 0);
 
-        midgard_branch_extended branch = {
-                .op = op,
-                .dest_tag = dest_tag,
-                .offset = quadword_offset,
-                .cond = duplicated_cond,
-        };
+   midgard_branch_extended branch = {
+      .op = op,
+      .dest_tag = dest_tag,
+      .offset = quadword_offset,
+      .cond = duplicated_cond,
+   };
 
-        return branch;
+   return branch;
 }
 
 static void
-emit_branch(midgard_instruction *ins,
-            compiler_context *ctx,
-            midgard_block *block,
-            midgard_bundle *bundle,
+emit_branch(midgard_instruction *ins, compiler_context *ctx,
+            midgard_block *block, midgard_bundle *bundle,
             struct util_dynarray *emission)
 {
-        /* Parse some basic branch info */
-        bool is_compact = ins->unit == ALU_ENAB_BR_COMPACT;
-        bool is_conditional = ins->branch.conditional;
-        bool is_inverted = ins->branch.invert_conditional;
-        bool is_discard = ins->branch.target_type == TARGET_DISCARD;
-        bool is_tilebuf_wait = ins->branch.target_type == TARGET_TILEBUF_WAIT;
-        bool is_special = is_discard || is_tilebuf_wait;
-        bool is_writeout = ins->writeout;
+   /* Parse some basic branch info */
+   bool is_compact = ins->unit == ALU_ENAB_BR_COMPACT;
+   bool is_conditional = ins->branch.conditional;
+   bool is_inverted = ins->branch.invert_conditional;
+   bool is_discard = ins->branch.target_type == TARGET_DISCARD;
+   bool is_tilebuf_wait = ins->branch.target_type == TARGET_TILEBUF_WAIT;
+   bool is_special = is_discard || is_tilebuf_wait;
+   bool is_writeout = ins->writeout;
 
-        /* Determine the block we're jumping to */
-        int target_number = ins->branch.target_block;
+   /* Determine the block we're jumping to */
+   int target_number = ins->branch.target_block;
 
-        /* Report the destination tag */
-        int dest_tag = is_discard ? 0 :
-                is_tilebuf_wait ? bundle->tag :
-                midgard_get_first_tag_from_block(ctx, target_number);
+   /* Report the destination tag */
+   int dest_tag = is_discard ? 0
+                  : is_tilebuf_wait
+                     ? bundle->tag
+                     : midgard_get_first_tag_from_block(ctx, target_number);
 
-        /* Count up the number of quadwords we're
-         * jumping over = number of quadwords until
-         * (br_block_idx, target_number) */
+   /* Count up the number of quadwords we're
+    * jumping over = number of quadwords until
+    * (br_block_idx, target_number) */
 
-        int quadword_offset = 0;
+   int quadword_offset = 0;
 
-        if (is_discard) {
-                /* Fixed encoding, not actually an offset */
-                quadword_offset = 0x2;
-        } else if (is_tilebuf_wait) {
-                quadword_offset = -1;
-        } else if (target_number > block->base.name) {
-                /* Jump forward */
+   if (is_discard) {
+      /* Fixed encoding, not actually an offset */
+      quadword_offset = 0x2;
+   } else if (is_tilebuf_wait) {
+      quadword_offset = -1;
+   } else if (target_number > block->base.name) {
+      /* Jump forward */
 
-                for (int idx = block->base.name+1; idx < target_number; ++idx) {
-                        midgard_block *blk = mir_get_block(ctx, idx);
-                        assert(blk);
+      for (int idx = block->base.name + 1; idx < target_number; ++idx) {
+         midgard_block *blk = mir_get_block(ctx, idx);
+         assert(blk);
 
-                        quadword_offset += blk->quadword_count;
-                }
-        } else {
-                /* Jump backwards */
+         quadword_offset += blk->quadword_count;
+      }
+   } else {
+      /* Jump backwards */
 
-                for (int idx = block->base.name; idx >= target_number; --idx) {
-                        midgard_block *blk = mir_get_block(ctx, idx);
-                        assert(blk);
+      for (int idx = block->base.name; idx >= target_number; --idx) {
+         midgard_block *blk = mir_get_block(ctx, idx);
+         assert(blk);
 
-                        quadword_offset -= blk->quadword_count;
-                }
-        }
+         quadword_offset -= blk->quadword_count;
+      }
+   }
 
-        /* Unconditional extended branches (far jumps)
-         * have issues, so we always use a conditional
-         * branch, setting the condition to always for
-         * unconditional. For compact unconditional
-         * branches, cond isn't used so it doesn't
-         * matter what we pick. */
+   /* Unconditional extended branches (far jumps)
+    * have issues, so we always use a conditional
+    * branch, setting the condition to always for
+    * unconditional. For compact unconditional
+    * branches, cond isn't used so it doesn't
+    * matter what we pick. */
 
-        midgard_condition cond =
-                !is_conditional ? midgard_condition_always :
-                is_inverted ? midgard_condition_false :
-                midgard_condition_true;
+   midgard_condition cond = !is_conditional ? midgard_condition_always
+                            : is_inverted   ? midgard_condition_false
+                                            : midgard_condition_true;
 
-        midgard_jmp_writeout_op op =
-                is_discard ? midgard_jmp_writeout_op_discard :
-                is_tilebuf_wait ? midgard_jmp_writeout_op_tilebuffer_pending :
-                is_writeout ? midgard_jmp_writeout_op_writeout :
-                (is_compact && !is_conditional) ?
-                midgard_jmp_writeout_op_branch_uncond :
-                midgard_jmp_writeout_op_branch_cond;
+   midgard_jmp_writeout_op op =
+      is_discard        ? midgard_jmp_writeout_op_discard
+      : is_tilebuf_wait ? midgard_jmp_writeout_op_tilebuffer_pending
+      : is_writeout     ? midgard_jmp_writeout_op_writeout
+      : (is_compact && !is_conditional) ? midgard_jmp_writeout_op_branch_uncond
+                                        : midgard_jmp_writeout_op_branch_cond;
 
-        if (is_compact) {
-                unsigned size = sizeof(midgard_branch_cond);
+   if (is_compact) {
+      unsigned size = sizeof(midgard_branch_cond);
 
-                if (is_conditional || is_special) {
-                        midgard_branch_cond branch = {
-                                .op = op,
-                                .dest_tag = dest_tag,
-                                .offset = quadword_offset,
-                                .cond = cond,
-                        };
-                        memcpy(util_dynarray_grow_bytes(emission, size, 1), &branch, size);
-                } else {
-                        assert(op == midgard_jmp_writeout_op_branch_uncond);
-                        midgard_branch_uncond branch = {
-                                .op = op,
-                                .dest_tag = dest_tag,
-                                .offset = quadword_offset,
-                                .call_mode = midgard_call_mode_default,
-                        };
-                        assert(branch.offset == quadword_offset);
-                        memcpy(util_dynarray_grow_bytes(emission, size, 1), &branch, size);
-                }
-        } else { /* `ins->compact_branch`,  misnomer */
-                unsigned size = sizeof(midgard_branch_extended);
+      if (is_conditional || is_special) {
+         midgard_branch_cond branch = {
+            .op = op,
+            .dest_tag = dest_tag,
+            .offset = quadword_offset,
+            .cond = cond,
+         };
+         memcpy(util_dynarray_grow_bytes(emission, size, 1), &branch, size);
+      } else {
+         assert(op == midgard_jmp_writeout_op_branch_uncond);
+         midgard_branch_uncond branch = {
+            .op = op,
+            .dest_tag = dest_tag,
+            .offset = quadword_offset,
+            .call_mode = midgard_call_mode_default,
+         };
+         assert(branch.offset == quadword_offset);
+         memcpy(util_dynarray_grow_bytes(emission, size, 1), &branch, size);
+      }
+   } else { /* `ins->compact_branch`,  misnomer */
+      unsigned size = sizeof(midgard_branch_extended);
 
-                midgard_branch_extended branch =
-                        midgard_create_branch_extended(
-                                        cond, op,
-                                        dest_tag,
-                                        quadword_offset);
+      midgard_branch_extended branch =
+         midgard_create_branch_extended(cond, op, dest_tag, quadword_offset);
 
-                memcpy(util_dynarray_grow_bytes(emission, size, 1), &branch, size);
-        }
+      memcpy(util_dynarray_grow_bytes(emission, size, 1), &branch, size);
+   }
 }
 
 static void
-emit_alu_bundle(compiler_context *ctx,
-                midgard_block *block,
-                midgard_bundle *bundle,
-                struct util_dynarray *emission,
+emit_alu_bundle(compiler_context *ctx, midgard_block *block,
+                midgard_bundle *bundle, struct util_dynarray *emission,
                 unsigned lookahead)
 {
-        /* Emit the control word */
-        util_dynarray_append(emission, uint32_t, bundle->control | lookahead);
+   /* Emit the control word */
+   util_dynarray_append(emission, uint32_t, bundle->control | lookahead);
 
-        /* Next up, emit register words */
-        for (unsigned i = 0; i < bundle->instruction_count; ++i) {
-                midgard_instruction *ins = bundle->instructions[i];
+   /* Next up, emit register words */
+   for (unsigned i = 0; i < bundle->instruction_count; ++i) {
+      midgard_instruction *ins = bundle->instructions[i];
 
-                /* Check if this instruction has registers */
-                if (ins->compact_branch) continue;
+      /* Check if this instruction has registers */
+      if (ins->compact_branch)
+         continue;
 
-                unsigned src2_reg = REGISTER_UNUSED;
-                if (ins->has_inline_constant)
-                        src2_reg = ins->inline_constant >> 11;
-                else if (ins->src[1] != ~0)
-                        src2_reg = SSA_REG_FROM_FIXED(ins->src[1]);
+      unsigned src2_reg = REGISTER_UNUSED;
+      if (ins->has_inline_constant)
+         src2_reg = ins->inline_constant >> 11;
+      else if (ins->src[1] != ~0)
+         src2_reg = SSA_REG_FROM_FIXED(ins->src[1]);
 
-                /* Otherwise, just emit the registers */
-                uint16_t reg_word = 0;
-                midgard_reg_info registers = {
-                        .src1_reg = (ins->src[0] == ~0 ?
-                                        REGISTER_UNUSED :
-                                        SSA_REG_FROM_FIXED(ins->src[0])),
-                        .src2_reg = src2_reg,
-                        .src2_imm = ins->has_inline_constant,
-                        .out_reg = (ins->dest == ~0 ?
-                                        REGISTER_UNUSED :
-                                        SSA_REG_FROM_FIXED(ins->dest)),
-                };
-                memcpy(&reg_word, &registers, sizeof(uint16_t));
-                util_dynarray_append(emission, uint16_t, reg_word);
-        }
+      /* Otherwise, just emit the registers */
+      uint16_t reg_word = 0;
+      midgard_reg_info registers = {
+         .src1_reg = (ins->src[0] == ~0 ? REGISTER_UNUSED
+                                        : SSA_REG_FROM_FIXED(ins->src[0])),
+         .src2_reg = src2_reg,
+         .src2_imm = ins->has_inline_constant,
+         .out_reg =
+            (ins->dest == ~0 ? REGISTER_UNUSED : SSA_REG_FROM_FIXED(ins->dest)),
+      };
+      memcpy(&reg_word, &registers, sizeof(uint16_t));
+      util_dynarray_append(emission, uint16_t, reg_word);
+   }
 
-        /* Now, we emit the body itself */
-        for (unsigned i = 0; i < bundle->instruction_count; ++i) {
-                midgard_instruction *ins = bundle->instructions[i];
+   /* Now, we emit the body itself */
+   for (unsigned i = 0; i < bundle->instruction_count; ++i) {
+      midgard_instruction *ins = bundle->instructions[i];
 
-                if (!ins->compact_branch) {
-                        mir_lower_inverts(ins);
-                        mir_lower_roundmode(ins);
-                }
+      if (!ins->compact_branch) {
+         mir_lower_inverts(ins);
+         mir_lower_roundmode(ins);
+      }
 
-                if (midgard_is_branch_unit(ins->unit)) {
-                        emit_branch(ins, ctx, block, bundle, emission);
-                } else if (ins->unit & UNITS_ANY_VECTOR) {
-                        midgard_vector_alu source = vector_alu_from_instr(ins);
-                        mir_pack_mask_alu(ins, &source);
-                        mir_pack_vector_srcs(ins, &source);
-                        unsigned size = sizeof(source);
-                        memcpy(util_dynarray_grow_bytes(emission, size, 1), &source, size);
-                } else {
-                        midgard_scalar_alu source = vector_to_scalar_alu(vector_alu_from_instr(ins), ins);
-                        unsigned size = sizeof(source);
-                        memcpy(util_dynarray_grow_bytes(emission, size, 1), &source, size);
-                }
-        }
+      if (midgard_is_branch_unit(ins->unit)) {
+         emit_branch(ins, ctx, block, bundle, emission);
+      } else if (ins->unit & UNITS_ANY_VECTOR) {
+         midgard_vector_alu source = vector_alu_from_instr(ins);
+         mir_pack_mask_alu(ins, &source);
+         mir_pack_vector_srcs(ins, &source);
+         unsigned size = sizeof(source);
+         memcpy(util_dynarray_grow_bytes(emission, size, 1), &source, size);
+      } else {
+         midgard_scalar_alu source =
+            vector_to_scalar_alu(vector_alu_from_instr(ins), ins);
+         unsigned size = sizeof(source);
+         memcpy(util_dynarray_grow_bytes(emission, size, 1), &source, size);
+      }
+   }
 
-        /* Emit padding (all zero) */
-        if (bundle->padding) {
-                memset(util_dynarray_grow_bytes(emission, bundle->padding, 1),
-                                0, bundle->padding);
-        }
+   /* Emit padding (all zero) */
+   if (bundle->padding) {
+      memset(util_dynarray_grow_bytes(emission, bundle->padding, 1), 0,
+             bundle->padding);
+   }
 
-        /* Tack on constants */
+   /* Tack on constants */
 
-        if (bundle->has_embedded_constants)
-                util_dynarray_append(emission, midgard_constants, bundle->constants);
+   if (bundle->has_embedded_constants)
+      util_dynarray_append(emission, midgard_constants, bundle->constants);
 }
 
 /* Shift applied to the immediate used as an offset. Probably this is papering
@@ -912,158 +893,153 @@ emit_alu_bundle(compiler_context *ctx,
 static void
 mir_ldst_pack_offset(midgard_instruction *ins, int offset)
 {
-        /* These opcodes don't support offsets */
-        assert(!OP_IS_REG2REG_LDST(ins->op) ||
-               ins->op == midgard_op_lea    ||
-               ins->op == midgard_op_lea_image);
+   /* These opcodes don't support offsets */
+   assert(!OP_IS_REG2REG_LDST(ins->op) || ins->op == midgard_op_lea ||
+          ins->op == midgard_op_lea_image);
 
-        if (OP_IS_UBO_READ(ins->op))
-                ins->load_store.signed_offset |= PACK_LDST_UBO_OFS(offset);
-        else if (OP_IS_IMAGE(ins->op))
-                ins->load_store.signed_offset |= PACK_LDST_ATTRIB_OFS(offset);
-        else if (OP_IS_SPECIAL(ins->op))
-                ins->load_store.signed_offset |= PACK_LDST_SELECTOR_OFS(offset);
-        else
-                ins->load_store.signed_offset |= PACK_LDST_MEM_OFS(offset);
+   if (OP_IS_UBO_READ(ins->op))
+      ins->load_store.signed_offset |= PACK_LDST_UBO_OFS(offset);
+   else if (OP_IS_IMAGE(ins->op))
+      ins->load_store.signed_offset |= PACK_LDST_ATTRIB_OFS(offset);
+   else if (OP_IS_SPECIAL(ins->op))
+      ins->load_store.signed_offset |= PACK_LDST_SELECTOR_OFS(offset);
+   else
+      ins->load_store.signed_offset |= PACK_LDST_MEM_OFS(offset);
 }
 
 static enum mali_sampler_type
-midgard_sampler_type(nir_alu_type t) {
-        switch (nir_alu_type_get_base_type(t))
-        {
-        case nir_type_float:
-                return MALI_SAMPLER_FLOAT;
-        case nir_type_int:
-                return MALI_SAMPLER_SIGNED;
-        case nir_type_uint:
-                return MALI_SAMPLER_UNSIGNED;
-        default:
-                unreachable("Unknown sampler type");
-        }
+midgard_sampler_type(nir_alu_type t)
+{
+   switch (nir_alu_type_get_base_type(t)) {
+   case nir_type_float:
+      return MALI_SAMPLER_FLOAT;
+   case nir_type_int:
+      return MALI_SAMPLER_SIGNED;
+   case nir_type_uint:
+      return MALI_SAMPLER_UNSIGNED;
+   default:
+      unreachable("Unknown sampler type");
+   }
 }
 
 /* After everything is scheduled, emit whole bundles at a time */
 
 void
-emit_binary_bundle(compiler_context *ctx,
-                   midgard_block *block,
-                   midgard_bundle *bundle,
-                   struct util_dynarray *emission,
+emit_binary_bundle(compiler_context *ctx, midgard_block *block,
+                   midgard_bundle *bundle, struct util_dynarray *emission,
                    int next_tag)
 {
-        int lookahead = next_tag << 4;
+   int lookahead = next_tag << 4;
 
-        switch (bundle->tag) {
-        case TAG_ALU_4:
-        case TAG_ALU_8:
-        case TAG_ALU_12:
-        case TAG_ALU_16:
-        case TAG_ALU_4 + 4:
-        case TAG_ALU_8 + 4:
-        case TAG_ALU_12 + 4:
-        case TAG_ALU_16 + 4:
-                emit_alu_bundle(ctx, block, bundle, emission, lookahead);
-                break;
+   switch (bundle->tag) {
+   case TAG_ALU_4:
+   case TAG_ALU_8:
+   case TAG_ALU_12:
+   case TAG_ALU_16:
+   case TAG_ALU_4 + 4:
+   case TAG_ALU_8 + 4:
+   case TAG_ALU_12 + 4:
+   case TAG_ALU_16 + 4:
+      emit_alu_bundle(ctx, block, bundle, emission, lookahead);
+      break;
 
-        case TAG_LOAD_STORE_4: {
-                /* One or two composing instructions */
+   case TAG_LOAD_STORE_4: {
+      /* One or two composing instructions */
 
-                uint64_t current64, next64 = LDST_NOP;
+      uint64_t current64, next64 = LDST_NOP;
 
-                /* Copy masks */
+      /* Copy masks */
 
-                for (unsigned i = 0; i < bundle->instruction_count; ++i) {
-                        midgard_instruction *ins = bundle->instructions[i];
-                        mir_pack_ldst_mask(ins);
+      for (unsigned i = 0; i < bundle->instruction_count; ++i) {
+         midgard_instruction *ins = bundle->instructions[i];
+         mir_pack_ldst_mask(ins);
 
-                        /* Atomic ops don't use this swizzle the same way as other ops */
-                        if (!OP_IS_ATOMIC(ins->op))
-                                mir_pack_swizzle_ldst(ins);
+         /* Atomic ops don't use this swizzle the same way as other ops */
+         if (!OP_IS_ATOMIC(ins->op))
+            mir_pack_swizzle_ldst(ins);
 
-                        /* Apply a constant offset */
-                        unsigned offset = ins->constants.u32[0];
-                        if (offset)
-                                mir_ldst_pack_offset(ins, offset);
-                }
+         /* Apply a constant offset */
+         unsigned offset = ins->constants.u32[0];
+         if (offset)
+            mir_ldst_pack_offset(ins, offset);
+      }
 
-                midgard_load_store_word ldst0 =
-                        load_store_from_instr(bundle->instructions[0]);
-                memcpy(&current64, &ldst0, sizeof(current64));
+      midgard_load_store_word ldst0 =
+         load_store_from_instr(bundle->instructions[0]);
+      memcpy(&current64, &ldst0, sizeof(current64));
 
-                if (bundle->instruction_count == 2) {
-                        midgard_load_store_word ldst1 =
-                                load_store_from_instr(bundle->instructions[1]);
-                        memcpy(&next64, &ldst1, sizeof(next64));
-                }
+      if (bundle->instruction_count == 2) {
+         midgard_load_store_word ldst1 =
+            load_store_from_instr(bundle->instructions[1]);
+         memcpy(&next64, &ldst1, sizeof(next64));
+      }
 
-                midgard_load_store instruction = {
-                        .type = bundle->tag,
-                        .next_type = next_tag,
-                        .word1 = current64,
-                        .word2 = next64,
-                };
+      midgard_load_store instruction = {
+         .type = bundle->tag,
+         .next_type = next_tag,
+         .word1 = current64,
+         .word2 = next64,
+      };
 
-                util_dynarray_append(emission, midgard_load_store, instruction);
+      util_dynarray_append(emission, midgard_load_store, instruction);
 
-                break;
-        }
+      break;
+   }
 
-        case TAG_TEXTURE_4:
-        case TAG_TEXTURE_4_VTX:
-        case TAG_TEXTURE_4_BARRIER: {
-                /* Texture instructions are easy, since there is no pipelining
-                 * nor VLIW to worry about. We may need to set .cont/.last
-                 * flags. */
+   case TAG_TEXTURE_4:
+   case TAG_TEXTURE_4_VTX:
+   case TAG_TEXTURE_4_BARRIER: {
+      /* Texture instructions are easy, since there is no pipelining
+       * nor VLIW to worry about. We may need to set .cont/.last
+       * flags. */
 
-                midgard_instruction *ins = bundle->instructions[0];
+      midgard_instruction *ins = bundle->instructions[0];
 
-                ins->texture.type = bundle->tag;
-                ins->texture.next_type = next_tag;
-                ins->texture.exec = MIDGARD_PARTIAL_EXECUTION_NONE; /* default */
+      ins->texture.type = bundle->tag;
+      ins->texture.next_type = next_tag;
+      ins->texture.exec = MIDGARD_PARTIAL_EXECUTION_NONE; /* default */
 
-                /* Nothing else to pack for barriers */
-                if (ins->op == midgard_tex_op_barrier) {
-                        ins->texture.op = ins->op;
-                        util_dynarray_append(emission, midgard_texture_word, ins->texture);
-                        return;
-                }
+      /* Nothing else to pack for barriers */
+      if (ins->op == midgard_tex_op_barrier) {
+         ins->texture.op = ins->op;
+         util_dynarray_append(emission, midgard_texture_word, ins->texture);
+         return;
+      }
 
-                signed override = mir_upper_override(ins, 32);
+      signed override = mir_upper_override(ins, 32);
 
-                ins->texture.mask = override > 0 ?
-                        ins->mask >> override :
-                        ins->mask;
+      ins->texture.mask = override > 0 ? ins->mask >> override : ins->mask;
 
-                mir_pack_swizzle_tex(ins);
+      mir_pack_swizzle_tex(ins);
 
-                if (!(ctx->quirks & MIDGARD_NO_OOO))
-                        mir_pack_tex_ooo(block, bundle, ins);
+      if (!(ctx->quirks & MIDGARD_NO_OOO))
+         mir_pack_tex_ooo(block, bundle, ins);
 
-                unsigned osz = nir_alu_type_get_type_size(ins->dest_type);
-                unsigned isz = nir_alu_type_get_type_size(ins->src_types[1]);
+      unsigned osz = nir_alu_type_get_type_size(ins->dest_type);
+      unsigned isz = nir_alu_type_get_type_size(ins->src_types[1]);
 
-                assert(osz == 32 || osz == 16);
-                assert(isz == 32 || isz == 16);
+      assert(osz == 32 || osz == 16);
+      assert(isz == 32 || isz == 16);
 
-                ins->texture.out_full = (osz == 32);
-                ins->texture.out_upper = override > 0;
-                ins->texture.in_reg_full = (isz == 32);
-                ins->texture.sampler_type = midgard_sampler_type(ins->dest_type);
-                ins->texture.outmod = ins->outmod;
+      ins->texture.out_full = (osz == 32);
+      ins->texture.out_upper = override > 0;
+      ins->texture.in_reg_full = (isz == 32);
+      ins->texture.sampler_type = midgard_sampler_type(ins->dest_type);
+      ins->texture.outmod = ins->outmod;
 
-                if (mir_op_computes_derivatives(ctx->stage, ins->op)) {
-                        if (ins->helper_terminate)
-                                ins->texture.exec = MIDGARD_PARTIAL_EXECUTION_KILL;
-                        else if (!ins->helper_execute)
-                                ins->texture.exec = MIDGARD_PARTIAL_EXECUTION_SKIP;
-                }
+      if (mir_op_computes_derivatives(ctx->stage, ins->op)) {
+         if (ins->helper_terminate)
+            ins->texture.exec = MIDGARD_PARTIAL_EXECUTION_KILL;
+         else if (!ins->helper_execute)
+            ins->texture.exec = MIDGARD_PARTIAL_EXECUTION_SKIP;
+      }
 
-                midgard_texture_word texture = texture_word_from_instr(ins);
-                util_dynarray_append(emission, midgard_texture_word, texture);
-                break;
-        }
+      midgard_texture_word texture = texture_word_from_instr(ins);
+      util_dynarray_append(emission, midgard_texture_word, texture);
+      break;
+   }
 
-        default:
-                unreachable("Unknown midgard instruction type\n");
-        }
+   default:
+      unreachable("Unknown midgard instruction type\n");
+   }
 }
diff --git a/src/panfrost/midgard/midgard_errata_lod.c b/src/panfrost/midgard/midgard_errata_lod.c
index 395d8c1b388..d4f7eb203af 100644
--- a/src/panfrost/midgard/midgard_errata_lod.c
+++ b/src/panfrost/midgard/midgard_errata_lod.c
@@ -35,57 +35,55 @@ bool midgard_nir_lod_errata(nir_shader *shader);
 static bool
 nir_lod_errata_instr(nir_builder *b, nir_instr *instr, void *data)
 {
-        if (instr->type != nir_instr_type_tex)
-                return false;
+   if (instr->type != nir_instr_type_tex)
+      return false;
 
-        nir_tex_instr *tex = nir_instr_as_tex(instr);
-        b->cursor = nir_before_instr(instr);
+   nir_tex_instr *tex = nir_instr_as_tex(instr);
+   b->cursor = nir_before_instr(instr);
 
-        /* The errata only applies to textureLod ("TEXGRD") */
-        if (tex->op != nir_texop_txl)
-                return false;
+   /* The errata only applies to textureLod ("TEXGRD") */
+   if (tex->op != nir_texop_txl)
+      return false;
 
-        /* Let's grab the sampler parameters */
-        nir_intrinsic_instr *l = nir_intrinsic_instr_create(b->shader,
-                        nir_intrinsic_load_sampler_lod_parameters_pan);
-        l->num_components = 3;
-        nir_ssa_dest_init(&l->instr, &l->dest, 3, 32, NULL);
+   /* Let's grab the sampler parameters */
+   nir_intrinsic_instr *l = nir_intrinsic_instr_create(
+      b->shader, nir_intrinsic_load_sampler_lod_parameters_pan);
+   l->num_components = 3;
+   nir_ssa_dest_init(&l->instr, &l->dest, 3, 32, NULL);
 
-        /* TODO: Indirect samplers, separate sampler objects XXX */
-        nir_src idx = nir_src_for_ssa(nir_imm_int(b, tex->texture_index));
-        nir_src_copy(&l->src[0], &idx, &l->instr);
+   /* TODO: Indirect samplers, separate sampler objects XXX */
+   nir_src idx = nir_src_for_ssa(nir_imm_int(b, tex->texture_index));
+   nir_src_copy(&l->src[0], &idx, &l->instr);
 
-        nir_builder_instr_insert(b, &l->instr);
-        nir_ssa_def *params = &l->dest.ssa;
+   nir_builder_instr_insert(b, &l->instr);
+   nir_ssa_def *params = &l->dest.ssa;
 
-        /* Extract the individual components */
-        nir_ssa_def *min_lod = nir_channel(b, params, 0);
-        nir_ssa_def *max_lod = nir_channel(b, params, 1);
-        nir_ssa_def *lod_bias = nir_channel(b, params, 2);
+   /* Extract the individual components */
+   nir_ssa_def *min_lod = nir_channel(b, params, 0);
+   nir_ssa_def *max_lod = nir_channel(b, params, 1);
+   nir_ssa_def *lod_bias = nir_channel(b, params, 2);
 
-        /* Rewrite the LOD with bias/clamps. Order sensitive. */
-        for (unsigned i = 0; i < tex->num_srcs; i++) {
-                if (tex->src[i].src_type != nir_tex_src_lod)
-                        continue;
+   /* Rewrite the LOD with bias/clamps. Order sensitive. */
+   for (unsigned i = 0; i < tex->num_srcs; i++) {
+      if (tex->src[i].src_type != nir_tex_src_lod)
+         continue;
 
-                nir_ssa_def *lod = nir_ssa_for_src(b, tex->src[i].src, 1);
+      nir_ssa_def *lod = nir_ssa_for_src(b, tex->src[i].src, 1);
 
-                nir_ssa_def *biased = nir_fadd(b, lod, lod_bias);
-                nir_ssa_def *clamped = nir_fmin(b,
-                                nir_fmax(b, biased, min_lod), max_lod);
+      nir_ssa_def *biased = nir_fadd(b, lod, lod_bias);
+      nir_ssa_def *clamped = nir_fmin(b, nir_fmax(b, biased, min_lod), max_lod);
 
-                nir_instr_rewrite_src(&tex->instr, &tex->src[i].src,
-                                nir_src_for_ssa(clamped));
-        }
+      nir_instr_rewrite_src(&tex->instr, &tex->src[i].src,
+                            nir_src_for_ssa(clamped));
+   }
 
-        return true;
+   return true;
 }
 
 bool
 midgard_nir_lod_errata(nir_shader *shader)
 {
-        return nir_shader_instructions_pass(shader,
-                                            nir_lod_errata_instr,
-                                            nir_metadata_block_index | nir_metadata_dominance,
-                                            NULL);
+   return nir_shader_instructions_pass(
+      shader, nir_lod_errata_instr,
+      nir_metadata_block_index | nir_metadata_dominance, NULL);
 }
diff --git a/src/panfrost/midgard/midgard_helper_invocations.c b/src/panfrost/midgard/midgard_helper_invocations.c
index 407de6676b9..0321c2b4ba1 100644
--- a/src/panfrost/midgard/midgard_helper_invocations.c
+++ b/src/panfrost/midgard/midgard_helper_invocations.c
@@ -66,182 +66,188 @@
 static bool
 mir_block_uses_helpers(gl_shader_stage stage, midgard_block *block)
 {
-        mir_foreach_instr_in_block(block, ins) {
-                if (ins->type != TAG_TEXTURE_4) continue;
-                if (mir_op_computes_derivatives(stage, ins->op))
-                        return true;
-        }
+   mir_foreach_instr_in_block(block, ins) {
+      if (ins->type != TAG_TEXTURE_4)
+         continue;
+      if (mir_op_computes_derivatives(stage, ins->op))
+         return true;
+   }
 
-        return false;
+   return false;
 }
 
 static bool
 mir_block_terminates_helpers(midgard_block *block)
 {
-        /* Can't terminate if there are no helpers */
-        if (!block->helpers_in)
-                return false;
+   /* Can't terminate if there are no helpers */
+   if (!block->helpers_in)
+      return false;
 
-        /* Can't terminate if a successor needs helpers */
-        pan_foreach_successor((&block->base), succ) {
-                if (((midgard_block *) succ)->helpers_in)
-                        return false;
-        }
+   /* Can't terminate if a successor needs helpers */
+   pan_foreach_successor((&block->base), succ) {
+      if (((midgard_block *)succ)->helpers_in)
+         return false;
+   }
 
-        /* Otherwise we terminate */
-        return true;
+   /* Otherwise we terminate */
+   return true;
 }
 
 void
 mir_analyze_helper_terminate(compiler_context *ctx)
 {
-        /* Set blocks as directly requiring helpers, and if they do add them to
-         * the worklist to propagate to their predecessors */
+   /* Set blocks as directly requiring helpers, and if they do add them to
+    * the worklist to propagate to their predecessors */
 
-        struct set *worklist = _mesa_set_create(NULL,
-                        _mesa_hash_pointer,
-                        _mesa_key_pointer_equal);
+   struct set *worklist =
+      _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
 
-        struct set *visited = _mesa_set_create(NULL,
-                        _mesa_hash_pointer,
-                        _mesa_key_pointer_equal);
+   struct set *visited =
+      _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
 
-        mir_foreach_block(ctx, _block) {
-                midgard_block *block = (midgard_block *) _block;
-                block->helpers_in |= mir_block_uses_helpers(ctx->stage, block);
+   mir_foreach_block(ctx, _block) {
+      midgard_block *block = (midgard_block *)_block;
+      block->helpers_in |= mir_block_uses_helpers(ctx->stage, block);
 
-                if (block->helpers_in)
-                        _mesa_set_add(worklist, _block);
-        }
+      if (block->helpers_in)
+         _mesa_set_add(worklist, _block);
+   }
 
-        /* Next, propagate back. Since there are a finite number of blocks, the
-         * worklist (a subset of all the blocks) is finite. Since a block can
-         * only be added to the worklist if it is not on the visited list and
-         * the visited list - also a subset of the blocks - grows every
-         * iteration, the algorithm must terminate. */
+   /* Next, propagate back. Since there are a finite number of blocks, the
+    * worklist (a subset of all the blocks) is finite. Since a block can
+    * only be added to the worklist if it is not on the visited list and
+    * the visited list - also a subset of the blocks - grows every
+    * iteration, the algorithm must terminate. */
 
-        struct set_entry *cur;
+   struct set_entry *cur;
 
-        while((cur = _mesa_set_next_entry(worklist, NULL)) != NULL) {
-                /* Pop off a block requiring helpers */
-                pan_block *blk = (struct pan_block *) cur->key;
-                _mesa_set_remove(worklist, cur);
+   while ((cur = _mesa_set_next_entry(worklist, NULL)) != NULL) {
+      /* Pop off a block requiring helpers */
+      pan_block *blk = (struct pan_block *)cur->key;
+      _mesa_set_remove(worklist, cur);
 
-                /* Its predecessors also require helpers */
-                pan_foreach_predecessor(blk, pred) {
-                        if (!_mesa_set_search(visited, pred)) {
-                                ((midgard_block *) pred)->helpers_in = true;
-                                _mesa_set_add(worklist, pred);
-                        }
-                }
- 
-                _mesa_set_add(visited, blk);
-        }
+      /* Its predecessors also require helpers */
+      pan_foreach_predecessor(blk, pred) {
+         if (!_mesa_set_search(visited, pred)) {
+            ((midgard_block *)pred)->helpers_in = true;
+            _mesa_set_add(worklist, pred);
+         }
+      }
 
-        _mesa_set_destroy(visited, NULL);
-        _mesa_set_destroy(worklist, NULL);
+      _mesa_set_add(visited, blk);
+   }
 
-        /* Finally, set helper_terminate on the last derivative-calculating
-         * instruction in a block that terminates helpers */
-        mir_foreach_block(ctx, _block) {
-                midgard_block *block = (midgard_block *) _block;
+   _mesa_set_destroy(visited, NULL);
+   _mesa_set_destroy(worklist, NULL);
 
-                if (!mir_block_terminates_helpers(block))
-                        continue;
+   /* Finally, set helper_terminate on the last derivative-calculating
+    * instruction in a block that terminates helpers */
+   mir_foreach_block(ctx, _block) {
+      midgard_block *block = (midgard_block *)_block;
 
-                mir_foreach_instr_in_block_rev(block, ins) {
-                        if (ins->type != TAG_TEXTURE_4) continue;
-                        if (!mir_op_computes_derivatives(ctx->stage, ins->op)) continue;
+      if (!mir_block_terminates_helpers(block))
+         continue;
 
-                        ins->helper_terminate = true;
-                        break;
-                }
-        }
+      mir_foreach_instr_in_block_rev(block, ins) {
+         if (ins->type != TAG_TEXTURE_4)
+            continue;
+         if (!mir_op_computes_derivatives(ctx->stage, ins->op))
+            continue;
+
+         ins->helper_terminate = true;
+         break;
+      }
+   }
 }
 
 static bool
-mir_helper_block_update(BITSET_WORD *deps, pan_block *_block, unsigned temp_count)
+mir_helper_block_update(BITSET_WORD *deps, pan_block *_block,
+                        unsigned temp_count)
 {
-        bool progress = false;
-        midgard_block *block = (midgard_block *) _block;
+   bool progress = false;
+   midgard_block *block = (midgard_block *)_block;
 
-        mir_foreach_instr_in_block_rev(block, ins) {
-                /* Ensure we write to a helper dependency */
-                if (ins->dest >= temp_count || !BITSET_TEST(deps, ins->dest))
-                        continue;
+   mir_foreach_instr_in_block_rev(block, ins) {
+      /* Ensure we write to a helper dependency */
+      if (ins->dest >= temp_count || !BITSET_TEST(deps, ins->dest))
+         continue;
 
-                /* Then add all of our dependencies */
-                mir_foreach_src(ins, s) {
-                        if (ins->src[s] >= temp_count)
-                                continue;
+      /* Then add all of our dependencies */
+      mir_foreach_src(ins, s) {
+         if (ins->src[s] >= temp_count)
+            continue;
 
-                        /* Progress if the dependency set changes */
-                        progress |= !BITSET_TEST(deps, ins->src[s]);
-                        BITSET_SET(deps, ins->src[s]);
-                }
-        }
+         /* Progress if the dependency set changes */
+         progress |= !BITSET_TEST(deps, ins->src[s]);
+         BITSET_SET(deps, ins->src[s]);
+      }
+   }
 
-        return progress;
+   return progress;
 }
 
 void
 mir_analyze_helper_requirements(compiler_context *ctx)
 {
-        mir_compute_temp_count(ctx);
-        unsigned temp_count = ctx->temp_count;
-        BITSET_WORD *deps = calloc(sizeof(BITSET_WORD), BITSET_WORDS(temp_count));
+   mir_compute_temp_count(ctx);
+   unsigned temp_count = ctx->temp_count;
+   BITSET_WORD *deps = calloc(sizeof(BITSET_WORD), BITSET_WORDS(temp_count));
 
-        /* Initialize with the sources of instructions consuming
-         * derivatives */
+   /* Initialize with the sources of instructions consuming
+    * derivatives */
 
-        mir_foreach_instr_global(ctx, ins) {
-                if (ins->type != TAG_TEXTURE_4) continue;
-                if (ins->dest >= ctx->temp_count) continue;
-                if (!mir_op_computes_derivatives(ctx->stage, ins->op)) continue;
+   mir_foreach_instr_global(ctx, ins) {
+      if (ins->type != TAG_TEXTURE_4)
+         continue;
+      if (ins->dest >= ctx->temp_count)
+         continue;
+      if (!mir_op_computes_derivatives(ctx->stage, ins->op))
+         continue;
 
-                mir_foreach_src(ins, s) {
-                        if (ins->src[s] < temp_count)
-                                BITSET_SET(deps, ins->src[s]);
-                }
-        }
+      mir_foreach_src(ins, s) {
+         if (ins->src[s] < temp_count)
+            BITSET_SET(deps, ins->src[s]);
+      }
+   }
 
-        /* Propagate that up */
+   /* Propagate that up */
 
-        struct set *work_list = _mesa_set_create(NULL,
-                        _mesa_hash_pointer,
-                        _mesa_key_pointer_equal);
+   struct set *work_list =
+      _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
 
-        struct set *visited = _mesa_set_create(NULL,
-                        _mesa_hash_pointer,
-                        _mesa_key_pointer_equal);
+   struct set *visited =
+      _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
 
-        struct set_entry *cur = _mesa_set_add(work_list, pan_exit_block(&ctx->blocks));
+   struct set_entry *cur =
+      _mesa_set_add(work_list, pan_exit_block(&ctx->blocks));
 
-        do {
-                pan_block *blk = (struct pan_block *) cur->key;
-                _mesa_set_remove(work_list, cur);
+   do {
+      pan_block *blk = (struct pan_block *)cur->key;
+      _mesa_set_remove(work_list, cur);
 
-                bool progress = mir_helper_block_update(deps, blk, temp_count);
+      bool progress = mir_helper_block_update(deps, blk, temp_count);
 
-                if (progress || !_mesa_set_search(visited, blk)) {
-                        pan_foreach_predecessor(blk, pred)
-                                _mesa_set_add(work_list, pred);
-                }
+      if (progress || !_mesa_set_search(visited, blk)) {
+         pan_foreach_predecessor(blk, pred)
+            _mesa_set_add(work_list, pred);
+      }
 
-                _mesa_set_add(visited, blk);
-        } while((cur = _mesa_set_next_entry(work_list, NULL)) != NULL);
+      _mesa_set_add(visited, blk);
+   } while ((cur = _mesa_set_next_entry(work_list, NULL)) != NULL);
 
-        _mesa_set_destroy(visited, NULL);
-        _mesa_set_destroy(work_list, NULL);
+   _mesa_set_destroy(visited, NULL);
+   _mesa_set_destroy(work_list, NULL);
 
-        /* Set the execute bits */
+   /* Set the execute bits */
 
-        mir_foreach_instr_global(ctx, ins) {
-                if (ins->type != TAG_TEXTURE_4) continue;
-                if (ins->dest >= ctx->temp_count) continue;
+   mir_foreach_instr_global(ctx, ins) {
+      if (ins->type != TAG_TEXTURE_4)
+         continue;
+      if (ins->dest >= ctx->temp_count)
+         continue;
 
-                ins->helper_execute = BITSET_TEST(deps, ins->dest);
-        }
+      ins->helper_execute = BITSET_TEST(deps, ins->dest);
+   }
 
-        free(deps);
+   free(deps);
 }
diff --git a/src/panfrost/midgard/midgard_liveness.c b/src/panfrost/midgard/midgard_liveness.c
index 77103c9ea01..984c95f1bcd 100644
--- a/src/panfrost/midgard/midgard_liveness.c
+++ b/src/panfrost/midgard/midgard_liveness.c
@@ -27,36 +27,37 @@
 void
 mir_liveness_ins_update(uint16_t *live, midgard_instruction *ins, unsigned max)
 {
-        /* live_in[s] = GEN[s] + (live_out[s] - KILL[s]) */
+   /* live_in[s] = GEN[s] + (live_out[s] - KILL[s]) */
 
-        pan_liveness_kill(live, ins->dest, max, mir_bytemask(ins));
+   pan_liveness_kill(live, ins->dest, max, mir_bytemask(ins));
 
-        mir_foreach_src(ins, src) {
-                unsigned node = ins->src[src];
-                unsigned bytemask = mir_bytemask_of_read_components(ins, node);
+   mir_foreach_src(ins, src) {
+      unsigned node = ins->src[src];
+      unsigned bytemask = mir_bytemask_of_read_components(ins, node);
 
-                pan_liveness_gen(live, node, max, bytemask);
-        }
+      pan_liveness_gen(live, node, max, bytemask);
+   }
 }
 
 static void
 mir_liveness_ins_update_wrap(uint16_t *live, void *ins, unsigned max)
 {
-        mir_liveness_ins_update(live, (midgard_instruction *) ins, max);
+   mir_liveness_ins_update(live, (midgard_instruction *)ins, max);
 }
 
 void
 mir_compute_liveness(compiler_context *ctx)
 {
-        /* If we already have fresh liveness, nothing to do */
-        if (ctx->metadata & MIDGARD_METADATA_LIVENESS)
-                return;
+   /* If we already have fresh liveness, nothing to do */
+   if (ctx->metadata & MIDGARD_METADATA_LIVENESS)
+      return;
 
-        mir_compute_temp_count(ctx);
-        pan_compute_liveness(&ctx->blocks, ctx->temp_count, mir_liveness_ins_update_wrap);
+   mir_compute_temp_count(ctx);
+   pan_compute_liveness(&ctx->blocks, ctx->temp_count,
+                        mir_liveness_ins_update_wrap);
 
-        /* Liveness is now valid */
-        ctx->metadata |= MIDGARD_METADATA_LIVENESS;
+   /* Liveness is now valid */
+   ctx->metadata |= MIDGARD_METADATA_LIVENESS;
 }
 
 /* Once liveness data is no longer valid, call this */
@@ -64,32 +65,33 @@ mir_compute_liveness(compiler_context *ctx)
 void
 mir_invalidate_liveness(compiler_context *ctx)
 {
-        /* If we didn't already compute liveness, there's nothing to do */
-        if (!(ctx->metadata & MIDGARD_METADATA_LIVENESS))
-                return;
+   /* If we didn't already compute liveness, there's nothing to do */
+   if (!(ctx->metadata & MIDGARD_METADATA_LIVENESS))
+      return;
 
-        pan_free_liveness(&ctx->blocks);
+   pan_free_liveness(&ctx->blocks);
 
-        /* It's now invalid regardless */
-        ctx->metadata &= ~MIDGARD_METADATA_LIVENESS;
+   /* It's now invalid regardless */
+   ctx->metadata &= ~MIDGARD_METADATA_LIVENESS;
 }
 
 bool
-mir_is_live_after(compiler_context *ctx, midgard_block *block, midgard_instruction *start, int src)
+mir_is_live_after(compiler_context *ctx, midgard_block *block,
+                  midgard_instruction *start, int src)
 {
-        mir_compute_liveness(ctx);
+   mir_compute_liveness(ctx);
 
-        /* Check whether we're live in the successors */
+   /* Check whether we're live in the successors */
 
-        if (pan_liveness_get(block->base.live_out, src, ctx->temp_count))
-                return true;
+   if (pan_liveness_get(block->base.live_out, src, ctx->temp_count))
+      return true;
 
-        /* Check the rest of the block for liveness */
+   /* Check the rest of the block for liveness */
 
-        mir_foreach_instr_in_block_from(block, ins, mir_next_op(start)) {
-                if (mir_has_arg(ins, src))
-                        return true;
-        }
+   mir_foreach_instr_in_block_from(block, ins, mir_next_op(start)) {
+      if (mir_has_arg(ins, src))
+         return true;
+   }
 
-        return false;
+   return false;
 }
diff --git a/src/panfrost/midgard/midgard_nir_lower_helper_writes.c b/src/panfrost/midgard/midgard_nir_lower_helper_writes.c
index 51c4b7db5b6..de63a79b954 100644
--- a/src/panfrost/midgard/midgard_nir_lower_helper_writes.c
+++ b/src/panfrost/midgard/midgard_nir_lower_helper_writes.c
@@ -29,65 +29,63 @@
 static bool
 nir_lower_helper_writes(nir_builder *b, nir_instr *instr, UNUSED void *data)
 {
-        if (instr->type != nir_instr_type_intrinsic)
-                return false;
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
 
-        nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 
-        switch (intr->intrinsic) {
-        case nir_intrinsic_global_atomic_add:
-        case nir_intrinsic_global_atomic_and:
-        case nir_intrinsic_global_atomic_comp_swap:
-        case nir_intrinsic_global_atomic_exchange:
-        case nir_intrinsic_global_atomic_fadd:
-        case nir_intrinsic_global_atomic_fcomp_swap:
-        case nir_intrinsic_global_atomic_fmax:
-        case nir_intrinsic_global_atomic_fmin:
-        case nir_intrinsic_global_atomic_imax:
-        case nir_intrinsic_global_atomic_imin:
-        case nir_intrinsic_global_atomic_or:
-        case nir_intrinsic_global_atomic_umax:
-        case nir_intrinsic_global_atomic_umin:
-        case nir_intrinsic_global_atomic_xor:
-        case nir_intrinsic_image_atomic_add:
-        case nir_intrinsic_image_atomic_and:
-        case nir_intrinsic_image_atomic_comp_swap:
-        case nir_intrinsic_image_atomic_dec_wrap:
-        case nir_intrinsic_image_atomic_exchange:
-        case nir_intrinsic_image_atomic_fadd:
-        case nir_intrinsic_image_atomic_imax:
-        case nir_intrinsic_image_atomic_imin:
-        case nir_intrinsic_image_atomic_inc_wrap:
-        case nir_intrinsic_image_atomic_or:
-        case nir_intrinsic_image_atomic_umax:
-        case nir_intrinsic_image_atomic_umin:
-        case nir_intrinsic_image_atomic_xor:
-        case nir_intrinsic_image_store:
-        case nir_intrinsic_store_global:
-                break;
-        default:
-                return false;
-        }
+   switch (intr->intrinsic) {
+   case nir_intrinsic_global_atomic_add:
+   case nir_intrinsic_global_atomic_and:
+   case nir_intrinsic_global_atomic_comp_swap:
+   case nir_intrinsic_global_atomic_exchange:
+   case nir_intrinsic_global_atomic_fadd:
+   case nir_intrinsic_global_atomic_fcomp_swap:
+   case nir_intrinsic_global_atomic_fmax:
+   case nir_intrinsic_global_atomic_fmin:
+   case nir_intrinsic_global_atomic_imax:
+   case nir_intrinsic_global_atomic_imin:
+   case nir_intrinsic_global_atomic_or:
+   case nir_intrinsic_global_atomic_umax:
+   case nir_intrinsic_global_atomic_umin:
+   case nir_intrinsic_global_atomic_xor:
+   case nir_intrinsic_image_atomic_add:
+   case nir_intrinsic_image_atomic_and:
+   case nir_intrinsic_image_atomic_comp_swap:
+   case nir_intrinsic_image_atomic_dec_wrap:
+   case nir_intrinsic_image_atomic_exchange:
+   case nir_intrinsic_image_atomic_fadd:
+   case nir_intrinsic_image_atomic_imax:
+   case nir_intrinsic_image_atomic_imin:
+   case nir_intrinsic_image_atomic_inc_wrap:
+   case nir_intrinsic_image_atomic_or:
+   case nir_intrinsic_image_atomic_umax:
+   case nir_intrinsic_image_atomic_umin:
+   case nir_intrinsic_image_atomic_xor:
+   case nir_intrinsic_image_store:
+   case nir_intrinsic_store_global:
+      break;
+   default:
+      return false;
+   }
 
-        b->cursor = nir_before_instr(instr);
+   b->cursor = nir_before_instr(instr);
 
-        nir_ssa_def *helper = nir_load_helper_invocation(b, 1);
-        nir_push_if(b, nir_inot(b, helper));
-        nir_instr_remove(instr);
-        nir_builder_instr_insert(b, instr);
-        nir_pop_if(b, NULL);
+   nir_ssa_def *helper = nir_load_helper_invocation(b, 1);
+   nir_push_if(b, nir_inot(b, helper));
+   nir_instr_remove(instr);
+   nir_builder_instr_insert(b, instr);
+   nir_pop_if(b, NULL);
 
-        return true;
+   return true;
 }
 
 bool
 midgard_nir_lower_helper_writes(nir_shader *shader)
 {
-        if (shader->info.stage != MESA_SHADER_FRAGMENT)
-                return false;
+   if (shader->info.stage != MESA_SHADER_FRAGMENT)
+      return false;
 
-        return nir_shader_instructions_pass(shader,
-                        nir_lower_helper_writes,
-                        nir_metadata_none,
-                        NULL);
+   return nir_shader_instructions_pass(shader, nir_lower_helper_writes,
+                                       nir_metadata_none, NULL);
 }
diff --git a/src/panfrost/midgard/midgard_nir_lower_image_bitsize.c b/src/panfrost/midgard/midgard_nir_lower_image_bitsize.c
index f82f6e2ae35..69c18b9be0c 100644
--- a/src/panfrost/midgard/midgard_nir_lower_image_bitsize.c
+++ b/src/panfrost/midgard/midgard_nir_lower_image_bitsize.c
@@ -31,50 +31,48 @@
 static bool
 nir_lower_image_bitsize(nir_builder *b, nir_instr *instr, UNUSED void *data)
 {
-        if (instr->type != nir_instr_type_intrinsic)
-                return false;
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
 
-        nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 
-        switch (intr->intrinsic) {
-        case nir_intrinsic_image_load:
-        case nir_intrinsic_image_store:
-        case nir_intrinsic_image_atomic_add:
-        case nir_intrinsic_image_atomic_and:
-        case nir_intrinsic_image_atomic_comp_swap:
-        case nir_intrinsic_image_atomic_exchange:
-        case nir_intrinsic_image_atomic_imax:
-        case nir_intrinsic_image_atomic_imin:
-        case nir_intrinsic_image_atomic_or:
-        case nir_intrinsic_image_atomic_umax:
-        case nir_intrinsic_image_atomic_umin:
-        case nir_intrinsic_image_atomic_xor:
-                break;
-        default:
-                return false;
-        }
+   switch (intr->intrinsic) {
+   case nir_intrinsic_image_load:
+   case nir_intrinsic_image_store:
+   case nir_intrinsic_image_atomic_add:
+   case nir_intrinsic_image_atomic_and:
+   case nir_intrinsic_image_atomic_comp_swap:
+   case nir_intrinsic_image_atomic_exchange:
+   case nir_intrinsic_image_atomic_imax:
+   case nir_intrinsic_image_atomic_imin:
+   case nir_intrinsic_image_atomic_or:
+   case nir_intrinsic_image_atomic_umax:
+   case nir_intrinsic_image_atomic_umin:
+   case nir_intrinsic_image_atomic_xor:
+      break;
+   default:
+      return false;
+   }
 
-        if (nir_src_bit_size(intr->src[1]) == 16)
-                return false;
+   if (nir_src_bit_size(intr->src[1]) == 16)
+      return false;
 
-        b->cursor = nir_before_instr(instr);
+   b->cursor = nir_before_instr(instr);
 
-        nir_ssa_def *coord =
-                nir_ssa_for_src(b, intr->src[1],
-                                nir_src_num_components(intr->src[1]));
+   nir_ssa_def *coord =
+      nir_ssa_for_src(b, intr->src[1], nir_src_num_components(intr->src[1]));
 
-        nir_ssa_def *coord16 = nir_u2u16(b, coord);
+   nir_ssa_def *coord16 = nir_u2u16(b, coord);
 
-        nir_instr_rewrite_src(instr, &intr->src[1], nir_src_for_ssa(coord16));
+   nir_instr_rewrite_src(instr, &intr->src[1], nir_src_for_ssa(coord16));
 
-        return true;
+   return true;
 }
 
 bool
 midgard_nir_lower_image_bitsize(nir_shader *shader)
 {
-        return nir_shader_instructions_pass(shader,
-                        nir_lower_image_bitsize,
-                        nir_metadata_block_index | nir_metadata_dominance,
-                        NULL);
+   return nir_shader_instructions_pass(
+      shader, nir_lower_image_bitsize,
+      nir_metadata_block_index | nir_metadata_dominance, NULL);
 }
diff --git a/src/panfrost/midgard/midgard_ops.h b/src/panfrost/midgard/midgard_ops.h
index 2a6e2c3bc32..a77e5e5089e 100644
--- a/src/panfrost/midgard/midgard_ops.h
+++ b/src/panfrost/midgard/midgard_ops.h
@@ -32,9 +32,9 @@ extern struct mir_ldst_op_props load_store_opcode_props[256];
 extern struct mir_tex_op_props tex_opcode_props[16];
 extern struct mir_tag_props midgard_tag_props[16];
 
-#define OP_IS_ATOMIC(op) (load_store_opcode_props[op].props & LDST_ATOMIC)
+#define OP_IS_ATOMIC(op)   (load_store_opcode_props[op].props & LDST_ATOMIC)
 #define OP_USES_ATTRIB(op) (load_store_opcode_props[op].props & LDST_ATTRIB)
-#define OP_IS_STORE(op) (load_store_opcode_props[op].props & LDST_STORE)
+#define OP_IS_STORE(op)    (load_store_opcode_props[op].props & LDST_STORE)
 #define OP_HAS_ADDRESS(op) (load_store_opcode_props[op].props & LDST_ADDRESS)
 
 /* Is this opcode that of an integer (regardless of signedness)? Instruction
@@ -43,38 +43,38 @@ extern struct mir_tag_props midgard_tag_props[16];
 static inline bool
 midgard_is_integer_op(int op)
 {
-        return (op >= 0x40 && op <= 0x7E) || (op >= 0xA0 && op <= 0xC1);
+   return (op >= 0x40 && op <= 0x7E) || (op >= 0xA0 && op <= 0xC1);
 }
 
 static inline bool
 midgard_is_unsigned_op(int op)
 {
-        assert(midgard_is_integer_op(op));
+   assert(midgard_is_integer_op(op));
 
-        switch (op) {
-        case midgard_alu_op_uaddsat:
-        case midgard_alu_op_usubsat:
-        case midgard_alu_op_uwmul:
-        case midgard_alu_op_umin:
-        case midgard_alu_op_umax:
-        case midgard_alu_op_uavg:
-        case midgard_alu_op_uravg:
-        case midgard_alu_op_ushlsat:
-        case midgard_alu_op_uabsdiff:
-        case midgard_alu_op_ult:
-        case midgard_alu_op_ule:
-        case midgard_alu_op_uball_lt:
-        case midgard_alu_op_uball_lte:
-        case midgard_alu_op_ubany_lt:
-        case midgard_alu_op_ubany_lte:
-        case midgard_alu_op_u2f_rte:
-        case midgard_alu_op_u2f_rtz:
-        case midgard_alu_op_u2f_rtn:
-        case midgard_alu_op_u2f_rtp:
-                return true;
-        default:
-                return false;
-        }
+   switch (op) {
+   case midgard_alu_op_uaddsat:
+   case midgard_alu_op_usubsat:
+   case midgard_alu_op_uwmul:
+   case midgard_alu_op_umin:
+   case midgard_alu_op_umax:
+   case midgard_alu_op_uavg:
+   case midgard_alu_op_uravg:
+   case midgard_alu_op_ushlsat:
+   case midgard_alu_op_uabsdiff:
+   case midgard_alu_op_ult:
+   case midgard_alu_op_ule:
+   case midgard_alu_op_uball_lt:
+   case midgard_alu_op_uball_lte:
+   case midgard_alu_op_ubany_lt:
+   case midgard_alu_op_ubany_lte:
+   case midgard_alu_op_u2f_rte:
+   case midgard_alu_op_u2f_rtz:
+   case midgard_alu_op_u2f_rtn:
+   case midgard_alu_op_u2f_rtp:
+      return true;
+   default:
+      return false;
+   }
 }
 
 /* Does this opcode *write* an integer? Same as is_integer_op, unless it's a
@@ -83,10 +83,10 @@ midgard_is_unsigned_op(int op)
 static inline bool
 midgard_is_integer_out_op(int op)
 {
-        bool is_int = midgard_is_integer_op(op);
-        bool is_conversion = alu_opcode_props[op].props & OP_TYPE_CONVERT;
+   bool is_int = midgard_is_integer_op(op);
+   bool is_conversion = alu_opcode_props[op].props & OP_TYPE_CONVERT;
 
-        return is_int ^ is_conversion;
+   return is_int ^ is_conversion;
 }
 
 /* Determines effective writemask, taking quirks and expansion into account */
@@ -94,17 +94,17 @@ midgard_is_integer_out_op(int op)
 static inline unsigned
 effective_writemask(midgard_alu_op op, unsigned existing_mask)
 {
-        /* Channel count is off-by-one to fit in two-bits (0 channel makes no
-         * sense) */
+   /* Channel count is off-by-one to fit in two-bits (0 channel makes no
+    * sense) */
 
-        unsigned channel_count = GET_CHANNEL_COUNT(alu_opcode_props[op].props);
+   unsigned channel_count = GET_CHANNEL_COUNT(alu_opcode_props[op].props);
 
-        /* If there is a fixed channel count, construct the appropriate mask */
+   /* If there is a fixed channel count, construct the appropriate mask */
 
-        if (channel_count)
-                return (1 << channel_count) - 1;
+   if (channel_count)
+      return (1 << channel_count) - 1;
 
-        return existing_mask;
+   return existing_mask;
 };
 
 #endif
diff --git a/src/panfrost/midgard/midgard_opt_copy_prop.c b/src/panfrost/midgard/midgard_opt_copy_prop.c
index 667440aea19..7fa3fa35675 100644
--- a/src/panfrost/midgard/midgard_opt_copy_prop.c
+++ b/src/panfrost/midgard/midgard_opt_copy_prop.c
@@ -28,70 +28,78 @@
 bool
 midgard_opt_copy_prop(compiler_context *ctx, midgard_block *block)
 {
-        bool progress = false;
+   bool progress = false;
 
-        mir_foreach_instr_in_block_safe(block, ins) {
-                if (ins->type != TAG_ALU_4) continue;
-                if (!OP_IS_MOVE(ins->op)) continue;
-                if (ins->is_pack) continue;
+   mir_foreach_instr_in_block_safe(block, ins) {
+      if (ins->type != TAG_ALU_4)
+         continue;
+      if (!OP_IS_MOVE(ins->op))
+         continue;
+      if (ins->is_pack)
+         continue;
 
-                unsigned from = ins->src[1];
-                unsigned to = ins->dest;
+      unsigned from = ins->src[1];
+      unsigned to = ins->dest;
 
-                /* We only work on pure SSA */
+      /* We only work on pure SSA */
 
-                if (to & PAN_IS_REG) continue;
-                if (from & PAN_IS_REG) continue;
+      if (to & PAN_IS_REG)
+         continue;
+      if (from & PAN_IS_REG)
+         continue;
 
-                /* Constant propagation is not handled here, either */
-                if (ins->has_inline_constant) continue;
-                if (ins->has_constants) continue;
+      /* Constant propagation is not handled here, either */
+      if (ins->has_inline_constant)
+         continue;
+      if (ins->has_constants)
+         continue;
 
-                /* Modifier propagation is not handled here */
-                if (mir_nontrivial_mod(ins, 1, false)) continue;
-                if (mir_nontrivial_outmod(ins)) continue;
+      /* Modifier propagation is not handled here */
+      if (mir_nontrivial_mod(ins, 1, false))
+         continue;
+      if (mir_nontrivial_outmod(ins))
+         continue;
 
-                /* Shortened arguments (bias for textures, extra load/store
-                 * arguments, etc.) do not get a swizzle, only a start
-                 * component and even that is restricted. Fragment writeout
-                 * doesn't even get that much */
+      /* Shortened arguments (bias for textures, extra load/store
+       * arguments, etc.) do not get a swizzle, only a start
+       * component and even that is restricted. Fragment writeout
+       * doesn't even get that much */
 
-                bool skip = false;
+      bool skip = false;
 
-                mir_foreach_instr_global(ctx, q) {
-                        bool is_tex = q->type == TAG_TEXTURE_4;
-                        bool is_ldst = q->type == TAG_LOAD_STORE_4;
-                        bool is_branch = q->compact_branch;
+      mir_foreach_instr_global(ctx, q) {
+         bool is_tex = q->type == TAG_TEXTURE_4;
+         bool is_ldst = q->type == TAG_LOAD_STORE_4;
+         bool is_branch = q->compact_branch;
 
-                        if (!(is_tex || is_ldst || is_branch)) continue;
+         if (!(is_tex || is_ldst || is_branch))
+            continue;
 
-                        /* For textures, we get a real swizzle for the
-                         * coordinate and the content. For stores, we get one.
-                         * For loads, we get none. */
+         /* For textures, we get a real swizzle for the
+          * coordinate and the content. For stores, we get one.
+          * For loads, we get none. */
 
-                        unsigned start =
-                                is_tex ? 2 :
-                                OP_IS_STORE(q->op) ? 1 : 0;
+         unsigned start = is_tex ? 2 : OP_IS_STORE(q->op) ? 1 : 0;
 
-                        mir_foreach_src(q, s) {
-                                if ((s >= start) && q->src[s] == to) {
-                                        skip = true;
-                                        break;
-                                }
-                        }
-                }
+         mir_foreach_src(q, s) {
+            if ((s >= start) && q->src[s] == to) {
+               skip = true;
+               break;
+            }
+         }
+      }
 
-                if (skip)
-                        continue;
+      if (skip)
+         continue;
 
-                if (ctx->blend_src1 == to)
-                        ctx->blend_src1 = from;
+      if (ctx->blend_src1 == to)
+         ctx->blend_src1 = from;
 
-                /* We're clear -- rewrite, composing the swizzle */
-                mir_rewrite_index_src_swizzle(ctx, to, from, ins->swizzle[1]);
-                mir_remove_instruction(ins);
-                progress |= true;
-        }
+      /* We're clear -- rewrite, composing the swizzle */
+      mir_rewrite_index_src_swizzle(ctx, to, from, ins->swizzle[1]);
+      mir_remove_instruction(ins);
+      progress |= true;
+   }
 
-        return progress;
+   return progress;
 }
diff --git a/src/panfrost/midgard/midgard_opt_dce.c b/src/panfrost/midgard/midgard_opt_dce.c
index f08972f1107..50a1f0a912d 100644
--- a/src/panfrost/midgard/midgard_opt_dce.c
+++ b/src/panfrost/midgard/midgard_opt_dce.c
@@ -22,8 +22,8 @@
  * SOFTWARE.
  */
 
-#include "compiler.h"
 #include "util/u_memory.h"
+#include "compiler.h"
 #include "midgard_ops.h"
 
 /* SIMD-aware dead code elimination. Perform liveness analysis step-by-step,
@@ -33,97 +33,100 @@
 static bool
 can_cull_mask(compiler_context *ctx, midgard_instruction *ins)
 {
-        if (ins->dest >= ctx->temp_count)
-                return false;
+   if (ins->dest >= ctx->temp_count)
+      return false;
 
-        if (ins->dest == ctx->blend_src1)
-                return false;
+   if (ins->dest == ctx->blend_src1)
+      return false;
 
-        if (ins->type == TAG_LOAD_STORE_4)
-                if (load_store_opcode_props[ins->op].props & LDST_SPECIAL_MASK)
-                        return false;
+   if (ins->type == TAG_LOAD_STORE_4)
+      if (load_store_opcode_props[ins->op].props & LDST_SPECIAL_MASK)
+         return false;
 
-        return true;
+   return true;
 }
 
 static bool
 can_dce(midgard_instruction *ins)
 {
-        if (ins->mask)
-                return false;
+   if (ins->mask)
+      return false;
 
-        if (ins->compact_branch)
-                return false;
+   if (ins->compact_branch)
+      return false;
 
-        if (ins->type == TAG_LOAD_STORE_4)
-                if (load_store_opcode_props[ins->op].props & LDST_SIDE_FX)
-                        return false;
+   if (ins->type == TAG_LOAD_STORE_4)
+      if (load_store_opcode_props[ins->op].props & LDST_SIDE_FX)
+         return false;
 
-        if (ins->type == TAG_TEXTURE_4)
-                if (ins->op == midgard_tex_op_barrier)
-                        return false;
+   if (ins->type == TAG_TEXTURE_4)
+      if (ins->op == midgard_tex_op_barrier)
+         return false;
 
-        return true;
+   return true;
 }
 
 static bool
-midgard_opt_dead_code_eliminate_block(compiler_context *ctx, midgard_block *block)
+midgard_opt_dead_code_eliminate_block(compiler_context *ctx,
+                                      midgard_block *block)
 {
-        bool progress = false;
+   bool progress = false;
 
-        uint16_t *live = mem_dup(block->base.live_out, ctx->temp_count * sizeof(uint16_t));
+   uint16_t *live =
+      mem_dup(block->base.live_out, ctx->temp_count * sizeof(uint16_t));
 
-        mir_foreach_instr_in_block_rev(block, ins) {
-                if (can_cull_mask(ctx, ins)) {
-                        unsigned type_size = nir_alu_type_get_type_size(ins->dest_type);
-                        unsigned round_size = type_size;
-                        unsigned oldmask = ins->mask;
+   mir_foreach_instr_in_block_rev(block, ins) {
+      if (can_cull_mask(ctx, ins)) {
+         unsigned type_size = nir_alu_type_get_type_size(ins->dest_type);
+         unsigned round_size = type_size;
+         unsigned oldmask = ins->mask;
 
-                        /* Make sure we're packable */
-                        if (type_size < 32 && ins->type == TAG_LOAD_STORE_4)
-                                round_size = 32;
+         /* Make sure we're packable */
+         if (type_size < 32 && ins->type == TAG_LOAD_STORE_4)
+            round_size = 32;
 
-                        unsigned rounded = mir_round_bytemask_up(live[ins->dest], round_size);
-                        unsigned cmask = mir_from_bytemask(rounded, type_size);
+         unsigned rounded = mir_round_bytemask_up(live[ins->dest], round_size);
+         unsigned cmask = mir_from_bytemask(rounded, type_size);
 
-                        ins->mask &= cmask;
-                        progress |= (ins->mask != oldmask);
-                }
+         ins->mask &= cmask;
+         progress |= (ins->mask != oldmask);
+      }
 
-                mir_liveness_ins_update(live, ins, ctx->temp_count);
-        }
+      mir_liveness_ins_update(live, ins, ctx->temp_count);
+   }
 
-        mir_foreach_instr_in_block_safe(block, ins) {
-                if (can_dce(ins)) {
-                        mir_remove_instruction(ins);
-                        progress = true;
-                }
-        }
+   mir_foreach_instr_in_block_safe(block, ins) {
+      if (can_dce(ins)) {
+         mir_remove_instruction(ins);
+         progress = true;
+      }
+   }
 
-        free(live);
+   free(live);
 
-        return progress;
+   return progress;
 }
 
 bool
 midgard_opt_dead_code_eliminate(compiler_context *ctx)
 {
-        /* We track liveness. In fact, it's ok if we assume more things are
-         * live than they actually are, that just reduces the effectiveness of
-         * this iterations lightly. And DCE has the effect of strictly reducing
-         * liveness, so we can run DCE across all blocks while only computing
-         * liveness at the beginning. */
+   /* We track liveness. In fact, it's ok if we assume more things are
+    * live than they actually are, that just reduces the effectiveness of
+    * this iterations lightly. And DCE has the effect of strictly reducing
+    * liveness, so we can run DCE across all blocks while only computing
+    * liveness at the beginning. */
 
-        mir_invalidate_liveness(ctx);
-        mir_compute_liveness(ctx);
+   mir_invalidate_liveness(ctx);
+   mir_compute_liveness(ctx);
 
-        bool progress = false;
+   bool progress = false;
 
-        mir_foreach_block(ctx, block) {
-                progress |= midgard_opt_dead_code_eliminate_block(ctx, (midgard_block *) block);
-        }
+   mir_foreach_block(ctx, block) {
+      progress |=
+         midgard_opt_dead_code_eliminate_block(ctx, (midgard_block *)block);
+   }
 
-        return progress;
+   return progress;
 }
 
 /* Removes dead moves, that is, moves with a destination overwritten before
@@ -133,36 +136,39 @@ midgard_opt_dead_code_eliminate(compiler_context *ctx)
 bool
 midgard_opt_dead_move_eliminate(compiler_context *ctx, midgard_block *block)
 {
-        bool progress = false;
+   bool progress = false;
 
-        mir_foreach_instr_in_block_safe(block, ins) {
-                if (ins->type != TAG_ALU_4) continue;
-                if (ins->compact_branch) continue;
-                if (!OP_IS_MOVE(ins->op)) continue;
+   mir_foreach_instr_in_block_safe(block, ins) {
+      if (ins->type != TAG_ALU_4)
+         continue;
+      if (ins->compact_branch)
+         continue;
+      if (!OP_IS_MOVE(ins->op))
+         continue;
 
-                /* Check if it's overwritten in this block before being read */
-                bool overwritten = false;
+      /* Check if it's overwritten in this block before being read */
+      bool overwritten = false;
 
-                mir_foreach_instr_in_block_from(block, q, mir_next_op(ins)) {
-                        /* Check if used */
-                        if (mir_has_arg(q, ins->dest))
-                                break;
+      mir_foreach_instr_in_block_from(block, q, mir_next_op(ins)) {
+         /* Check if used */
+         if (mir_has_arg(q, ins->dest))
+            break;
 
-                        /* Check if overwritten */
-                        if (q->dest == ins->dest) {
-                                /* Special case to vec4; component tracking is
-                                 * harder */
+         /* Check if overwritten */
+         if (q->dest == ins->dest) {
+            /* Special case to vec4; component tracking is
+             * harder */
 
-                                overwritten = (q->mask == 0xF);
-                                break;
-                        }
-                }
+            overwritten = (q->mask == 0xF);
+            break;
+         }
+      }
 
-                if (overwritten) {
-                        mir_remove_instruction(ins);
-                        progress = true;
-                }
-        }
+      if (overwritten) {
+         mir_remove_instruction(ins);
+         progress = true;
+      }
+   }
 
-        return progress;
+   return progress;
 }
diff --git a/src/panfrost/midgard/midgard_opt_perspective.c b/src/panfrost/midgard/midgard_opt_perspective.c
index c0f8ba83a1a..1e131992a0e 100644
--- a/src/panfrost/midgard/midgard_opt_perspective.c
+++ b/src/panfrost/midgard/midgard_opt_perspective.c
@@ -40,160 +40,190 @@
 static bool
 is_swizzle_0(unsigned *swizzle)
 {
-        for (unsigned c = 0; c < MIR_VEC_COMPONENTS; ++c)
-                if (swizzle[c])
-                        return false;
+   for (unsigned c = 0; c < MIR_VEC_COMPONENTS; ++c)
+      if (swizzle[c])
+         return false;
 
-        return true;
+   return true;
 }
 
 bool
 midgard_opt_combine_projection(compiler_context *ctx, midgard_block *block)
 {
-        bool progress = false;
+   bool progress = false;
 
-        mir_foreach_instr_in_block_safe(block, ins) {
-                /* First search for fmul */
-                if (ins->type != TAG_ALU_4) continue;
-                if (ins->op != midgard_alu_op_fmul) continue;
+   mir_foreach_instr_in_block_safe(block, ins) {
+      /* First search for fmul */
+      if (ins->type != TAG_ALU_4)
+         continue;
+      if (ins->op != midgard_alu_op_fmul)
+         continue;
 
-                /* TODO: Flip */
+      /* TODO: Flip */
 
-                /* Check the swizzles */
-                
-                if (!mir_is_simple_swizzle(ins->swizzle[0], ins->mask)) continue;
-                if (!is_swizzle_0(ins->swizzle[1])) continue;
+      /* Check the swizzles */
 
-                /* Awesome, we're the right form. Now check where src2 is from */
-                unsigned frcp = ins->src[1];
-                unsigned to = ins->dest;
+      if (!mir_is_simple_swizzle(ins->swizzle[0], ins->mask))
+         continue;
+      if (!is_swizzle_0(ins->swizzle[1]))
+         continue;
 
-                if (frcp & PAN_IS_REG) continue;
-                if (to & PAN_IS_REG) continue;
+      /* Awesome, we're the right form. Now check where src2 is from */
+      unsigned frcp = ins->src[1];
+      unsigned to = ins->dest;
 
-                bool frcp_found = false;
-                unsigned frcp_component = 0;
-                unsigned frcp_from = 0;
+      if (frcp & PAN_IS_REG)
+         continue;
+      if (to & PAN_IS_REG)
+         continue;
 
-                mir_foreach_instr_in_block_safe(block, sub) {
-                        if (sub->dest != frcp) continue;
+      bool frcp_found = false;
+      unsigned frcp_component = 0;
+      unsigned frcp_from = 0;
 
-                        frcp_component = sub->swizzle[0][0];
-                        frcp_from = sub->src[0];
+      mir_foreach_instr_in_block_safe(block, sub) {
+         if (sub->dest != frcp)
+            continue;
 
-                        frcp_found =
-                                (sub->type == TAG_ALU_4) &&
-                                (sub->op == midgard_alu_op_frcp);
-                        break;
-                }
+         frcp_component = sub->swizzle[0][0];
+         frcp_from = sub->src[0];
 
-                if (!frcp_found) continue;
-                if (frcp_from != ins->src[0]) continue;
-                if (frcp_component != COMPONENT_W && frcp_component != COMPONENT_Z) continue;
-                if (!mir_single_use(ctx, frcp)) continue;
+         frcp_found =
+            (sub->type == TAG_ALU_4) && (sub->op == midgard_alu_op_frcp);
+         break;
+      }
 
-                /* Heuristic: check if the frcp is from a single-use varying */
+      if (!frcp_found)
+         continue;
+      if (frcp_from != ins->src[0])
+         continue;
+      if (frcp_component != COMPONENT_W && frcp_component != COMPONENT_Z)
+         continue;
+      if (!mir_single_use(ctx, frcp))
+         continue;
 
-                bool ok = false;
+      /* Heuristic: check if the frcp is from a single-use varying */
 
-                /* One for frcp and one for fmul */
-                if (mir_use_count(ctx, frcp_from) > 2) continue;
+      bool ok = false;
 
-                mir_foreach_instr_in_block_safe(block, v) {
-                        if (v->dest != frcp_from) continue;
-                        if (v->type != TAG_LOAD_STORE_4) break;
-                        if (!OP_IS_LOAD_VARY_F(v->op)) break;
+      /* One for frcp and one for fmul */
+      if (mir_use_count(ctx, frcp_from) > 2)
+         continue;
 
-                        ok = true;
-                        break;
-                }
+      mir_foreach_instr_in_block_safe(block, v) {
+         if (v->dest != frcp_from)
+            continue;
+         if (v->type != TAG_LOAD_STORE_4)
+            break;
+         if (!OP_IS_LOAD_VARY_F(v->op))
+            break;
 
-                if (!ok)
-                        continue;
+         ok = true;
+         break;
+      }
 
-                /* Nice, we got the form spot on. Let's convert! */
+      if (!ok)
+         continue;
 
-                midgard_instruction accel = {
-                        .type = TAG_LOAD_STORE_4,
-                        .mask = ins->mask,
-                        .dest = to,
-                        .dest_type = nir_type_float32,
-                        .src = { frcp_from, ~0, ~0, ~0, },
-                        .src_types = { nir_type_float32, },
-                        .swizzle = SWIZZLE_IDENTITY_4,
-                        .op = frcp_component == COMPONENT_W ?
-                                midgard_op_ldst_perspective_div_w :
-                                midgard_op_ldst_perspective_div_z,
-                        .load_store = {
-                                .bitsize_toggle = true,
-                        },
-                };
+      /* Nice, we got the form spot on. Let's convert! */
 
-                mir_insert_instruction_before(ctx, ins, accel);
-                mir_remove_instruction(ins);
+      midgard_instruction accel = {
+         .type = TAG_LOAD_STORE_4,
+         .mask = ins->mask,
+         .dest = to,
+         .dest_type = nir_type_float32,
+         .src =
+            {
+               frcp_from,
+               ~0,
+               ~0,
+               ~0,
+            },
+         .src_types =
+            {
+               nir_type_float32,
+            },
+         .swizzle = SWIZZLE_IDENTITY_4,
+         .op = frcp_component == COMPONENT_W
+                  ? midgard_op_ldst_perspective_div_w
+                  : midgard_op_ldst_perspective_div_z,
+         .load_store =
+            {
+               .bitsize_toggle = true,
+            },
+      };
 
-                progress |= true;
-        }
+      mir_insert_instruction_before(ctx, ins, accel);
+      mir_remove_instruction(ins);
 
-        return progress;
+      progress |= true;
+   }
+
+   return progress;
 }
 
 bool
 midgard_opt_varying_projection(compiler_context *ctx, midgard_block *block)
 {
-        bool progress = false;
+   bool progress = false;
 
-        mir_foreach_instr_in_block_safe(block, ins) {
-                /* Search for a projection */
-                if (ins->type != TAG_LOAD_STORE_4) continue;
-                if (!OP_IS_PROJECTION(ins->op)) continue;
+   mir_foreach_instr_in_block_safe(block, ins) {
+      /* Search for a projection */
+      if (ins->type != TAG_LOAD_STORE_4)
+         continue;
+      if (!OP_IS_PROJECTION(ins->op))
+         continue;
 
-                unsigned vary = ins->src[0];
-                unsigned to = ins->dest;
+      unsigned vary = ins->src[0];
+      unsigned to = ins->dest;
 
-                if (vary & PAN_IS_REG) continue;
-                if (to & PAN_IS_REG) continue;
-                if (!mir_single_use(ctx, vary)) continue;
+      if (vary & PAN_IS_REG)
+         continue;
+      if (to & PAN_IS_REG)
+         continue;
+      if (!mir_single_use(ctx, vary))
+         continue;
 
-                /* Check for a varying source. If we find it, we rewrite */
+      /* Check for a varying source. If we find it, we rewrite */
 
-                bool rewritten = false;
+      bool rewritten = false;
 
-                mir_foreach_instr_in_block_safe(block, v) {
-                        if (v->dest != vary) continue;
-                        if (v->type != TAG_LOAD_STORE_4) break;
-                        if (!OP_IS_LOAD_VARY_F(v->op)) break;
+      mir_foreach_instr_in_block_safe(block, v) {
+         if (v->dest != vary)
+            continue;
+         if (v->type != TAG_LOAD_STORE_4)
+            break;
+         if (!OP_IS_LOAD_VARY_F(v->op))
+            break;
 
-                        /* We found it, so rewrite it to project. Grab the
-                         * modifier */
+         /* We found it, so rewrite it to project. Grab the
+          * modifier */
 
-                        midgard_varying_params p =
-                                midgard_unpack_varying_params(v->load_store);
+         midgard_varying_params p =
+            midgard_unpack_varying_params(v->load_store);
 
-                        if (p.modifier != midgard_varying_mod_none)
-                                break;
+         if (p.modifier != midgard_varying_mod_none)
+            break;
 
-                        bool projects_w =
-                                ins->op == midgard_op_ldst_perspective_div_w;
+         bool projects_w = ins->op == midgard_op_ldst_perspective_div_w;
 
-                        p.modifier = projects_w ?
-                                midgard_varying_mod_perspective_w :
-                                midgard_varying_mod_perspective_z;
+         p.modifier = projects_w ? midgard_varying_mod_perspective_w
+                                 : midgard_varying_mod_perspective_z;
 
-                        midgard_pack_varying_params(&v->load_store, p);
+         midgard_pack_varying_params(&v->load_store, p);
 
-                        /* Use the new destination */
-                        v->dest = to;
+         /* Use the new destination */
+         v->dest = to;
 
-                        rewritten = true;
-                        break;
-                }
+         rewritten = true;
+         break;
+      }
 
-                if (rewritten)
-                        mir_remove_instruction(ins);
+      if (rewritten)
+         mir_remove_instruction(ins);
 
-                progress |= rewritten;
-        }
+      progress |= rewritten;
+   }
 
-        return progress;
+   return progress;
 }
diff --git a/src/panfrost/midgard/midgard_print.c b/src/panfrost/midgard/midgard_print.c
index 06b92e032a5..6fe3746ab34 100644
--- a/src/panfrost/midgard/midgard_print.c
+++ b/src/panfrost/midgard/midgard_print.c
@@ -39,25 +39,25 @@
 static void
 mir_print_index(int source)
 {
-        if (source == ~0) {
-                printf("_");
-                return;
-        }
+   if (source == ~0) {
+      printf("_");
+      return;
+   }
 
-        if (source >= SSA_FIXED_MINIMUM) {
-                /* Specific register */
-                int reg = SSA_REG_FROM_FIXED(source);
+   if (source >= SSA_FIXED_MINIMUM) {
+      /* Specific register */
+      int reg = SSA_REG_FROM_FIXED(source);
 
-                /* TODO: Moving threshold */
-                if (reg > 16 && reg < 24)
-                        printf("U%d", 23 - reg);
-                else
-                        printf("R%d", reg);
-        } else if (source & PAN_IS_REG) {
-                printf("r%d", source >> 1);
-        } else {
-                printf("%d", source >> 1);
-        }
+      /* TODO: Moving threshold */
+      if (reg > 16 && reg < 24)
+         printf("U%d", 23 - reg);
+      else
+         printf("R%d", reg);
+   } else if (source & PAN_IS_REG) {
+      printf("r%d", source >> 1);
+   } else {
+      printf("%d", source >> 1);
+   }
 }
 
 static const char components[16] = "xyzwefghijklmnop";
@@ -65,12 +65,12 @@ static const char components[16] = "xyzwefghijklmnop";
 static void
 mir_print_mask(unsigned mask)
 {
-        printf(".");
+   printf(".");
 
-        for (unsigned i = 0; i < 16; ++i) {
-                if (mask & (1 << i))
-                        putchar(components[i]);
-        }
+   for (unsigned i = 0; i < 16; ++i) {
+      if (mask & (1 << i))
+         putchar(components[i]);
+   }
 }
 
 /*
@@ -81,246 +81,246 @@ mir_print_mask(unsigned mask)
 static void
 mir_print_swizzle(unsigned mask, unsigned *swizzle)
 {
-        printf(".");
+   printf(".");
 
-        for (unsigned i = 0; i < 16; ++i) {
-                if (mask & BITFIELD_BIT(i)) {
-                        unsigned C = swizzle[i];
-                        putchar(components[C]);
-                }
-        }
+   for (unsigned i = 0; i < 16; ++i) {
+      if (mask & BITFIELD_BIT(i)) {
+         unsigned C = swizzle[i];
+         putchar(components[C]);
+      }
+   }
 }
 
 static const char *
 mir_get_unit(unsigned unit)
 {
-        switch (unit) {
-        case ALU_ENAB_VEC_MUL:
-                return "vmul";
-        case ALU_ENAB_SCAL_ADD:
-                return "sadd";
-        case ALU_ENAB_VEC_ADD:
-                return "vadd";
-        case ALU_ENAB_SCAL_MUL:
-                return "smul";
-        case ALU_ENAB_VEC_LUT:
-                return "lut";
-        case ALU_ENAB_BR_COMPACT:
-                return "br";
-        case ALU_ENAB_BRANCH:
-                return "brx";
-        default:
-                return "???";
-        }
+   switch (unit) {
+   case ALU_ENAB_VEC_MUL:
+      return "vmul";
+   case ALU_ENAB_SCAL_ADD:
+      return "sadd";
+   case ALU_ENAB_VEC_ADD:
+      return "vadd";
+   case ALU_ENAB_SCAL_MUL:
+      return "smul";
+   case ALU_ENAB_VEC_LUT:
+      return "lut";
+   case ALU_ENAB_BR_COMPACT:
+      return "br";
+   case ALU_ENAB_BRANCH:
+      return "brx";
+   default:
+      return "???";
+   }
 }
 
 static void
 mir_print_embedded_constant(midgard_instruction *ins, unsigned src_idx)
 {
-        assert(src_idx <= 1);
+   assert(src_idx <= 1);
 
-        unsigned base_size = max_bitsize_for_alu(ins);
-        unsigned sz = nir_alu_type_get_type_size(ins->src_types[src_idx]);
-        bool half = (sz == (base_size >> 1));
-        unsigned mod = mir_pack_mod(ins, src_idx, false);
-        unsigned *swizzle = ins->swizzle[src_idx];
-        midgard_reg_mode reg_mode = reg_mode_for_bitsize(max_bitsize_for_alu(ins));
-        unsigned comp_mask = effective_writemask(ins->op, ins->mask);
-        unsigned num_comp = util_bitcount(comp_mask);
-        unsigned max_comp = mir_components_for_type(ins->dest_type);
-        bool first = true;
+   unsigned base_size = max_bitsize_for_alu(ins);
+   unsigned sz = nir_alu_type_get_type_size(ins->src_types[src_idx]);
+   bool half = (sz == (base_size >> 1));
+   unsigned mod = mir_pack_mod(ins, src_idx, false);
+   unsigned *swizzle = ins->swizzle[src_idx];
+   midgard_reg_mode reg_mode = reg_mode_for_bitsize(max_bitsize_for_alu(ins));
+   unsigned comp_mask = effective_writemask(ins->op, ins->mask);
+   unsigned num_comp = util_bitcount(comp_mask);
+   unsigned max_comp = mir_components_for_type(ins->dest_type);
+   bool first = true;
 
-        printf("#");
+   printf("#");
 
-        if (num_comp > 1)
-                printf("vec%d(", num_comp);
+   if (num_comp > 1)
+      printf("vec%d(", num_comp);
 
-        for (unsigned comp = 0; comp < max_comp; comp++) {
-                if (!(comp_mask & (1 << comp)))
-                        continue;
+   for (unsigned comp = 0; comp < max_comp; comp++) {
+      if (!(comp_mask & (1 << comp)))
+         continue;
 
-                if (first)
-                        first = false;
-                else
-                        printf(", ");
+      if (first)
+         first = false;
+      else
+         printf(", ");
 
-                mir_print_constant_component(stdout, &ins->constants,
-                                             swizzle[comp], reg_mode,
-                                             half, mod, ins->op);
-        }
+      mir_print_constant_component(stdout, &ins->constants, swizzle[comp],
+                                   reg_mode, half, mod, ins->op);
+   }
 
-        if (num_comp > 1)
-                printf(")");
+   if (num_comp > 1)
+      printf(")");
 }
 
 static void
 mir_print_src(midgard_instruction *ins, unsigned c)
 {
-        mir_print_index(ins->src[c]);
+   mir_print_index(ins->src[c]);
 
-        if (ins->src[c] != ~0 && ins->src_types[c] != nir_type_invalid) {
-                pan_print_alu_type(ins->src_types[c], stdout);
-                mir_print_swizzle(ins->mask, ins->swizzle[c]);
-        }
+   if (ins->src[c] != ~0 && ins->src_types[c] != nir_type_invalid) {
+      pan_print_alu_type(ins->src_types[c], stdout);
+      mir_print_swizzle(ins->mask, ins->swizzle[c]);
+   }
 }
 
 void
 mir_print_instruction(midgard_instruction *ins)
 {
-        printf("\t");
+   printf("\t");
 
-        if (midgard_is_branch_unit(ins->unit)) {
-                const char *branch_target_names[] = {
-                        "goto", "break", "continue", "discard"
-                };
+   if (midgard_is_branch_unit(ins->unit)) {
+      const char *branch_target_names[] = {"goto", "break", "continue",
+                                           "discard"};
 
-                printf("%s.", mir_get_unit(ins->unit));
-                if (ins->branch.target_type == TARGET_DISCARD)
-                        printf("discard.");
-                else if (ins->writeout)
-                        printf("write.");
-                else if (ins->unit == ALU_ENAB_BR_COMPACT &&
-                         !ins->branch.conditional)
-                        printf("uncond.");
-                else
-                        printf("cond.");
+      printf("%s.", mir_get_unit(ins->unit));
+      if (ins->branch.target_type == TARGET_DISCARD)
+         printf("discard.");
+      else if (ins->writeout)
+         printf("write.");
+      else if (ins->unit == ALU_ENAB_BR_COMPACT && !ins->branch.conditional)
+         printf("uncond.");
+      else
+         printf("cond.");
 
-                if (!ins->branch.conditional)
-                        printf("always");
-                else if (ins->branch.invert_conditional)
-                        printf("false");
-                else
-                        printf("true");
+      if (!ins->branch.conditional)
+         printf("always");
+      else if (ins->branch.invert_conditional)
+         printf("false");
+      else
+         printf("true");
 
-                if (ins->writeout) {
-                        printf(" (c: ");
-                        mir_print_src(ins, 0);
-                        printf(", z: ");
-                        mir_print_src(ins, 2);
-                        printf(", s: ");
-                        mir_print_src(ins, 3);
-                        printf(")");
-                }
+      if (ins->writeout) {
+         printf(" (c: ");
+         mir_print_src(ins, 0);
+         printf(", z: ");
+         mir_print_src(ins, 2);
+         printf(", s: ");
+         mir_print_src(ins, 3);
+         printf(")");
+      }
 
-                if (ins->branch.target_type != TARGET_DISCARD)
-                        printf(" %s -> block(%d)\n",
-                               ins->branch.target_type < 4 ?
-                                       branch_target_names[ins->branch.target_type] : "??",
-                               ins->branch.target_block);
+      if (ins->branch.target_type != TARGET_DISCARD)
+         printf(" %s -> block(%d)\n",
+                ins->branch.target_type < 4
+                   ? branch_target_names[ins->branch.target_type]
+                   : "??",
+                ins->branch.target_block);
 
-                return;
-        }
+      return;
+   }
 
-        switch (ins->type) {
-        case TAG_ALU_4: {
-                midgard_alu_op op = ins->op;
-                const char *name = alu_opcode_props[op].name;
+   switch (ins->type) {
+   case TAG_ALU_4: {
+      midgard_alu_op op = ins->op;
+      const char *name = alu_opcode_props[op].name;
 
-                if (ins->unit)
-                        printf("%s.", mir_get_unit(ins->unit));
+      if (ins->unit)
+         printf("%s.", mir_get_unit(ins->unit));
 
-                printf("%s", name ? name : "??");
+      printf("%s", name ? name : "??");
 
-                if (!(midgard_is_integer_out_op(ins->op) && ins->outmod == midgard_outmod_keeplo)) {
-                        mir_print_outmod(stdout, ins->outmod, midgard_is_integer_out_op(ins->op));
-                }
+      if (!(midgard_is_integer_out_op(ins->op) &&
+            ins->outmod == midgard_outmod_keeplo)) {
+         mir_print_outmod(stdout, ins->outmod,
+                          midgard_is_integer_out_op(ins->op));
+      }
 
-                break;
-        }
+      break;
+   }
 
-        case TAG_LOAD_STORE_4: {
-                midgard_load_store_op op = ins->op;
-                const char *name = load_store_opcode_props[op].name;
+   case TAG_LOAD_STORE_4: {
+      midgard_load_store_op op = ins->op;
+      const char *name = load_store_opcode_props[op].name;
 
-                assert(name);
-                printf("%s", name);
-                break;
-        }
+      assert(name);
+      printf("%s", name);
+      break;
+   }
 
-        case TAG_TEXTURE_4: {
-                printf("TEX");
+   case TAG_TEXTURE_4: {
+      printf("TEX");
 
-                if (ins->helper_terminate)
-                        printf(".terminate");
+      if (ins->helper_terminate)
+         printf(".terminate");
 
-                if (ins->helper_execute)
-                        printf(".execute");
+      if (ins->helper_execute)
+         printf(".execute");
 
-                break;
-        }
+      break;
+   }
 
-        default:
-                assert(0);
-        }
+   default:
+      assert(0);
+   }
 
-        if (ins->compact_branch && ins->branch.invert_conditional)
-                printf(".not");
+   if (ins->compact_branch && ins->branch.invert_conditional)
+      printf(".not");
 
-        printf(" ");
-        mir_print_index(ins->dest);
+   printf(" ");
+   mir_print_index(ins->dest);
 
-        if (ins->dest != ~0) {
-                pan_print_alu_type(ins->dest_type, stdout);
-                mir_print_mask(ins->mask);
-        }
+   if (ins->dest != ~0) {
+      pan_print_alu_type(ins->dest_type, stdout);
+      mir_print_mask(ins->mask);
+   }
 
-        printf(", ");
+   printf(", ");
 
-        /* Only ALU can have an embedded constant, r26 as read on load/store is
-         * something else entirely */
-        bool is_alu = ins->type == TAG_ALU_4;
-        unsigned r_constant = SSA_FIXED_REGISTER(REGISTER_CONSTANT);
+   /* Only ALU can have an embedded constant, r26 as read on load/store is
+    * something else entirely */
+   bool is_alu = ins->type == TAG_ALU_4;
+   unsigned r_constant = SSA_FIXED_REGISTER(REGISTER_CONSTANT);
 
-        if (is_alu && alu_opcode_props[ins->op].props & QUIRK_FLIPPED_R24) {
-                /* Moves (indicated by QUIRK_FLIPPED_R24) are 1-src, with their
-                 * one source in the second slot
-                 */
-                assert(ins->src[0] == ~0);
-        } else {
-                if (ins->src[0] == r_constant && is_alu)
-                        mir_print_embedded_constant(ins, 0);
-                else
-                        mir_print_src(ins, 0);
+   if (is_alu && alu_opcode_props[ins->op].props & QUIRK_FLIPPED_R24) {
+      /* Moves (indicated by QUIRK_FLIPPED_R24) are 1-src, with their
+       * one source in the second slot
+       */
+      assert(ins->src[0] == ~0);
+   } else {
+      if (ins->src[0] == r_constant && is_alu)
+         mir_print_embedded_constant(ins, 0);
+      else
+         mir_print_src(ins, 0);
 
-                printf(", ");
-        }
+      printf(", ");
+   }
 
-        if (ins->has_inline_constant)
-                printf("#%d", ins->inline_constant);
-        else if (ins->src[1] == r_constant && is_alu)
-                mir_print_embedded_constant(ins, 1);
-        else
-                mir_print_src(ins, 1);
+   if (ins->has_inline_constant)
+      printf("#%d", ins->inline_constant);
+   else if (ins->src[1] == r_constant && is_alu)
+      mir_print_embedded_constant(ins, 1);
+   else
+      mir_print_src(ins, 1);
 
-        if (is_alu) {
-                /* ALU ops are all 2-src, though CSEL is treated like a 3-src
-                 * pseudo op with the third source scheduler lowered
-                 */
-                switch (ins->op) {
-                case midgard_alu_op_icsel:
-                case midgard_alu_op_fcsel:
-                case midgard_alu_op_icsel_v:
-                case midgard_alu_op_fcsel_v:
-                        printf(", ");
-                        mir_print_src(ins, 2);
-                        break;
-                default:
-                        assert(ins->src[2] == ~0);
-                        break;
-                }
+   if (is_alu) {
+      /* ALU ops are all 2-src, though CSEL is treated like a 3-src
+       * pseudo op with the third source scheduler lowered
+       */
+      switch (ins->op) {
+      case midgard_alu_op_icsel:
+      case midgard_alu_op_fcsel:
+      case midgard_alu_op_icsel_v:
+      case midgard_alu_op_fcsel_v:
+         printf(", ");
+         mir_print_src(ins, 2);
+         break;
+      default:
+         assert(ins->src[2] == ~0);
+         break;
+      }
 
-                assert(ins->src[3] == ~0);
-        } else {
-                for (unsigned c = 2; c <= 3; ++c) {
-                        printf(", ");
-                        mir_print_src(ins, c);
-                }
-        }
+      assert(ins->src[3] == ~0);
+   } else {
+      for (unsigned c = 2; c <= 3; ++c) {
+         printf(", ");
+         mir_print_src(ins, c);
+      }
+   }
 
-        if (ins->no_spill)
-                printf(" /* no spill */");
+   if (ins->no_spill)
+      printf(" /* no spill */");
 
-        printf("\n");
+   printf("\n");
 }
 
 /* Dumps MIR for a block or entire shader respective */
@@ -328,41 +328,41 @@ mir_print_instruction(midgard_instruction *ins)
 void
 mir_print_block(midgard_block *block)
 {
-        printf("block%u: {\n", block->base.name);
+   printf("block%u: {\n", block->base.name);
 
-        if (block->scheduled) {
-                mir_foreach_bundle_in_block(block, bundle) {
-                        for (unsigned i = 0; i < bundle->instruction_count; ++i)
-                                mir_print_instruction(bundle->instructions[i]);
+   if (block->scheduled) {
+      mir_foreach_bundle_in_block(block, bundle) {
+         for (unsigned i = 0; i < bundle->instruction_count; ++i)
+            mir_print_instruction(bundle->instructions[i]);
 
-                        printf("\n");
-                }
-        } else {
-                mir_foreach_instr_in_block(block, ins) {
-                        mir_print_instruction(ins);
-                }
-        }
+         printf("\n");
+      }
+   } else {
+      mir_foreach_instr_in_block(block, ins) {
+         mir_print_instruction(ins);
+      }
+   }
 
-        printf("}");
+   printf("}");
 
-        if (block->base.successors[0]) {
-                printf(" -> ");
-                pan_foreach_successor((&block->base), succ)
-                        printf(" block%u ", succ->name);
-        }
+   if (block->base.successors[0]) {
+      printf(" -> ");
+      pan_foreach_successor((&block->base), succ)
+         printf(" block%u ", succ->name);
+   }
 
-        printf(" from { ");
-        mir_foreach_predecessor(block, pred)
-                printf("block%u ", pred->base.name);
-        printf("}");
+   printf(" from { ");
+   mir_foreach_predecessor(block, pred)
+      printf("block%u ", pred->base.name);
+   printf("}");
 
-        printf("\n\n");
+   printf("\n\n");
 }
 
 void
 mir_print_shader(compiler_context *ctx)
 {
-        mir_foreach_block(ctx, block) {
-                mir_print_block((midgard_block *) block);
-        }
+   mir_foreach_block(ctx, block) {
+      mir_print_block((midgard_block *)block);
+   }
 }
diff --git a/src/panfrost/midgard/midgard_print_constant.c b/src/panfrost/midgard/midgard_print_constant.c
index d588bc24bec..fec65701173 100644
--- a/src/panfrost/midgard/midgard_print_constant.c
+++ b/src/panfrost/midgard/midgard_print_constant.c
@@ -22,156 +22,152 @@
  * SOFTWARE.
  */
 
-#include <math.h>
 #include <inttypes.h>
+#include <math.h>
 #include "util/half_float.h"
-#include "midgard.h"
 #include "helpers.h"
+#include "midgard.h"
 #include "midgard_ops.h"
 
 void
-mir_print_constant_component(FILE *fp, const midgard_constants *consts, unsigned c,
-                             midgard_reg_mode reg_mode, bool half,
+mir_print_constant_component(FILE *fp, const midgard_constants *consts,
+                             unsigned c, midgard_reg_mode reg_mode, bool half,
                              unsigned mod, midgard_alu_op op)
 {
-        bool is_sint = false, is_uint = false, is_hex = false;
-        const char *opname = alu_opcode_props[op].name;
+   bool is_sint = false, is_uint = false, is_hex = false;
+   const char *opname = alu_opcode_props[op].name;
 
-        bool is_int = midgard_is_integer_op(op);
+   bool is_int = midgard_is_integer_op(op);
 
-        /* Add a sentinel name to prevent crashing */
-        if (!opname)
-                opname = "unknown";
+   /* Add a sentinel name to prevent crashing */
+   if (!opname)
+      opname = "unknown";
 
-        if (is_int) {
-                is_uint = midgard_is_unsigned_op(op);
+   if (is_int) {
+      is_uint = midgard_is_unsigned_op(op);
 
-                if (!is_uint) {
-                        /* Bit ops are easier to follow when the constant is printed in
-                         * hexadecimal. Other operations starting with a 'i' are
-                         * considered to operate on signed integers. That might not
-                         * be true for all of them, but it's good enough for traces.
-                         */
-                        if (op >= midgard_alu_op_iand &&
-                            op <= midgard_alu_op_ipopcnt)
-                                is_hex = true;
-                        else
-                                is_sint = true;
-                }
-	}
+      if (!is_uint) {
+         /* Bit ops are easier to follow when the constant is printed in
+          * hexadecimal. Other operations starting with a 'i' are
+          * considered to operate on signed integers. That might not
+          * be true for all of them, but it's good enough for traces.
+          */
+         if (op >= midgard_alu_op_iand && op <= midgard_alu_op_ipopcnt)
+            is_hex = true;
+         else
+            is_sint = true;
+      }
+   }
 
-        if (half)
-                reg_mode--;
+   if (half)
+      reg_mode--;
 
-        switch (reg_mode) {
-        case midgard_reg_mode_64:
-                if (is_sint) {
-                        fprintf(fp, "%"PRIi64, consts->i64[c]);
-                } else if (is_uint) {
-                        fprintf(fp, "%"PRIu64, consts->u64[c]);
-                } else if (is_hex) {
-                        fprintf(fp, "0x%"PRIX64, consts->u64[c]);
-                } else {
-                        double v = consts->f64[c];
+   switch (reg_mode) {
+   case midgard_reg_mode_64:
+      if (is_sint) {
+         fprintf(fp, "%" PRIi64, consts->i64[c]);
+      } else if (is_uint) {
+         fprintf(fp, "%" PRIu64, consts->u64[c]);
+      } else if (is_hex) {
+         fprintf(fp, "0x%" PRIX64, consts->u64[c]);
+      } else {
+         double v = consts->f64[c];
 
-                        if (mod & MIDGARD_FLOAT_MOD_ABS) v = fabs(v);
-                        if (mod & MIDGARD_FLOAT_MOD_NEG) v = -v;
+         if (mod & MIDGARD_FLOAT_MOD_ABS)
+            v = fabs(v);
+         if (mod & MIDGARD_FLOAT_MOD_NEG)
+            v = -v;
 
-                        printf("%g", v);
-                }
-                break;
+         printf("%g", v);
+      }
+      break;
 
-        case midgard_reg_mode_32:
-                if (is_sint) {
-                        int64_t v;
+   case midgard_reg_mode_32:
+      if (is_sint) {
+         int64_t v;
 
-                        if (half && mod == midgard_int_zero_extend)
-                                v = consts->u32[c];
-                        else if (half && mod == midgard_int_left_shift)
-                                v = (uint64_t)consts->u32[c] << 32;
-                        else
-                                v = consts->i32[c];
+         if (half && mod == midgard_int_zero_extend)
+            v = consts->u32[c];
+         else if (half && mod == midgard_int_left_shift)
+            v = (uint64_t)consts->u32[c] << 32;
+         else
+            v = consts->i32[c];
 
-                        fprintf(fp, "%"PRIi64, v);
-                } else if (is_uint || is_hex) {
-                        uint64_t v;
+         fprintf(fp, "%" PRIi64, v);
+      } else if (is_uint || is_hex) {
+         uint64_t v;
 
-                        if (half && mod == midgard_int_left_shift)
-                                v = (uint64_t)consts->u32[c] << 32;
-                        else
-                                v = consts->u32[c];
+         if (half && mod == midgard_int_left_shift)
+            v = (uint64_t)consts->u32[c] << 32;
+         else
+            v = consts->u32[c];
 
-                        fprintf(fp, is_uint ? "%"PRIu64 : "0x%"PRIX64, v);
-                } else {
-                        float v = consts->f32[c];
+         fprintf(fp, is_uint ? "%" PRIu64 : "0x%" PRIX64, v);
+      } else {
+         float v = consts->f32[c];
 
-                        if (mod & MIDGARD_FLOAT_MOD_ABS) v = fabsf(v);
-                        if (mod & MIDGARD_FLOAT_MOD_NEG) v = -v;
+         if (mod & MIDGARD_FLOAT_MOD_ABS)
+            v = fabsf(v);
+         if (mod & MIDGARD_FLOAT_MOD_NEG)
+            v = -v;
 
-                        fprintf(fp, "%g", v);
-                }
-                break;
+         fprintf(fp, "%g", v);
+      }
+      break;
 
-        case midgard_reg_mode_16:
-                if (is_sint) {
-                        int32_t v;
+   case midgard_reg_mode_16:
+      if (is_sint) {
+         int32_t v;
 
-                        if (half && mod == midgard_int_zero_extend)
-                                v = consts->u16[c];
-                        else if (half && mod == midgard_int_left_shift)
-                                v = (uint32_t)consts->u16[c] << 16;
-                        else
-                                v = consts->i16[c];
+         if (half && mod == midgard_int_zero_extend)
+            v = consts->u16[c];
+         else if (half && mod == midgard_int_left_shift)
+            v = (uint32_t)consts->u16[c] << 16;
+         else
+            v = consts->i16[c];
 
-                        fprintf(fp, "%d", v);
-                } else if (is_uint || is_hex) {
-                        uint32_t v;
+         fprintf(fp, "%d", v);
+      } else if (is_uint || is_hex) {
+         uint32_t v;
 
-                        if (half && mod == midgard_int_left_shift)
-                                v = (uint32_t)consts->u16[c] << 16;
-                        else
-                                v = consts->u16[c];
+         if (half && mod == midgard_int_left_shift)
+            v = (uint32_t)consts->u16[c] << 16;
+         else
+            v = consts->u16[c];
 
-                        fprintf(fp, is_uint ? "%u" : "0x%X", v);
-                } else {
-                        float v = _mesa_half_to_float(consts->f16[c]);
+         fprintf(fp, is_uint ? "%u" : "0x%X", v);
+      } else {
+         float v = _mesa_half_to_float(consts->f16[c]);
 
-                        if (mod & MIDGARD_FLOAT_MOD_ABS) v = fabsf(v);
-                        if (mod & MIDGARD_FLOAT_MOD_NEG) v = -v;
+         if (mod & MIDGARD_FLOAT_MOD_ABS)
+            v = fabsf(v);
+         if (mod & MIDGARD_FLOAT_MOD_NEG)
+            v = -v;
 
-                        fprintf(fp, "%g", v);
-                }
-                break;
+         fprintf(fp, "%g", v);
+      }
+      break;
 
-        case midgard_reg_mode_8:
-                fprintf(fp, "0x%X", consts->u8[c]);
+   case midgard_reg_mode_8:
+      fprintf(fp, "0x%X", consts->u8[c]);
 
-                if (mod)
-                        fprintf(fp, " /* %u */", mod);
+      if (mod)
+         fprintf(fp, " /* %u */", mod);
 
-                assert(!half); /* No 4-bit */
+      assert(!half); /* No 4-bit */
 
-                break;
-        }
+      break;
+   }
 }
 
-static char *outmod_names_float[4] = {
-        "",
-        ".clamp_0_inf",
-        ".clamp_m1_1",
-        ".clamp_0_1"
-};
+static char *outmod_names_float[4] = {"", ".clamp_0_inf", ".clamp_m1_1",
+                                      ".clamp_0_1"};
 
-static char *outmod_names_int[4] = {
-        ".ssat",
-        ".usat",
-        ".keeplo",
-        ".keephi"
-};
+static char *outmod_names_int[4] = {".ssat", ".usat", ".keeplo", ".keephi"};
 
 void
 mir_print_outmod(FILE *fp, unsigned outmod, bool is_int)
 {
-        fprintf(fp, "%s", is_int ? outmod_names_int[outmod] :
-                outmod_names_float[outmod]);
+   fprintf(fp, "%s",
+           is_int ? outmod_names_int[outmod] : outmod_names_float[outmod]);
 }
diff --git a/src/panfrost/midgard/midgard_quirks.h b/src/panfrost/midgard/midgard_quirks.h
index 3e7c2a0280e..3003dbdf7c2 100644
--- a/src/panfrost/midgard/midgard_quirks.h
+++ b/src/panfrost/midgard/midgard_quirks.h
@@ -69,36 +69,30 @@
 static inline unsigned
 midgard_get_quirks(unsigned gpu_id)
 {
-        switch (gpu_id) {
-        case 0x600:
-        case 0x620:
-                return MIDGARD_OLD_BLEND |
-                        MIDGARD_BROKEN_BLEND_LOADS |
-                        MIDGARD_BROKEN_LOD |
-                        MIDGARD_NO_UPPER_ALU |
-                        MIDGARD_NO_OOO;
+   switch (gpu_id) {
+   case 0x600:
+   case 0x620:
+      return MIDGARD_OLD_BLEND | MIDGARD_BROKEN_BLEND_LOADS |
+             MIDGARD_BROKEN_LOD | MIDGARD_NO_UPPER_ALU | MIDGARD_NO_OOO;
 
-        case 0x720:
-                return MIDGARD_INTERPIPE_REG_ALIASING | 
-                        MIDGARD_OLD_BLEND |
-                        MIDGARD_BROKEN_LOD |
-                        MIDGARD_NO_UPPER_ALU |
-                        MIDGARD_NO_OOO;
+   case 0x720:
+      return MIDGARD_INTERPIPE_REG_ALIASING | MIDGARD_OLD_BLEND |
+             MIDGARD_BROKEN_LOD | MIDGARD_NO_UPPER_ALU | MIDGARD_NO_OOO;
 
-        case 0x820:
-        case 0x830:
-                return MIDGARD_INTERPIPE_REG_ALIASING;
+   case 0x820:
+   case 0x830:
+      return MIDGARD_INTERPIPE_REG_ALIASING;
 
-        case 0x750:
-                return MIDGARD_NO_UPPER_ALU;
+   case 0x750:
+      return MIDGARD_NO_UPPER_ALU;
 
-        case 0x860:
-        case 0x880:
-                return 0;
+   case 0x860:
+   case 0x880:
+      return 0;
 
-        default:
-                unreachable("Invalid Midgard GPU ID");
-        }
+   default:
+      unreachable("Invalid Midgard GPU ID");
+   }
 }
 
 #endif
diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c
index 99f544c4e16..bae8e695327 100644
--- a/src/panfrost/midgard/midgard_ra.c
+++ b/src/panfrost/midgard/midgard_ra.c
@@ -22,44 +22,45 @@
  * SOFTWARE.
  */
 
-#include "compiler.h"
-#include "midgard_ops.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
+#include "compiler.h"
+#include "midgard_ops.h"
 #include "midgard_quirks.h"
 
 struct phys_reg {
-        /* Physical register: 0-31 */
-        unsigned reg;
+   /* Physical register: 0-31 */
+   unsigned reg;
 
-        /* Byte offset into the physical register: 0-15 */
-        unsigned offset;
+   /* Byte offset into the physical register: 0-15 */
+   unsigned offset;
 
-        /* log2(bytes per component) for fast mul/div */
-        unsigned shift;
+   /* log2(bytes per component) for fast mul/div */
+   unsigned shift;
 };
 
 /* Shift up by reg_offset and horizontally by dst_offset. */
 
 static void
-offset_swizzle(unsigned *swizzle, unsigned reg_offset, unsigned srcshift, unsigned dstshift, unsigned dst_offset)
+offset_swizzle(unsigned *swizzle, unsigned reg_offset, unsigned srcshift,
+               unsigned dstshift, unsigned dst_offset)
 {
-        unsigned out[MIR_VEC_COMPONENTS];
+   unsigned out[MIR_VEC_COMPONENTS];
 
-        signed reg_comp = reg_offset >> srcshift;
-        signed dst_comp = dst_offset >> dstshift;
+   signed reg_comp = reg_offset >> srcshift;
+   signed dst_comp = dst_offset >> dstshift;
 
-        unsigned max_component = (16 >> srcshift) - 1;
+   unsigned max_component = (16 >> srcshift) - 1;
 
-        assert(reg_comp << srcshift == reg_offset);
-        assert(dst_comp << dstshift == dst_offset);
+   assert(reg_comp << srcshift == reg_offset);
+   assert(dst_comp << dstshift == dst_offset);
 
-        for (signed c = 0; c < MIR_VEC_COMPONENTS; ++c) {
-                signed comp = MAX2(c - dst_comp, 0);
-                out[c] = MIN2(swizzle[comp] + reg_comp, max_component);
-        }
+   for (signed c = 0; c < MIR_VEC_COMPONENTS; ++c) {
+      signed comp = MAX2(c - dst_comp, 0);
+      out[c] = MIN2(swizzle[comp] + reg_comp, max_component);
+   }
 
-        memcpy(swizzle, out, sizeof(out));
+   memcpy(swizzle, out, sizeof(out));
 }
 
 /* Helper to return the default phys_reg for a given register */
@@ -67,50 +68,51 @@ offset_swizzle(unsigned *swizzle, unsigned reg_offset, unsigned srcshift, unsign
 static struct phys_reg
 default_phys_reg(int reg, unsigned shift)
 {
-        struct phys_reg r = {
-                .reg = reg,
-                .offset = 0,
-                .shift = shift,
-        };
+   struct phys_reg r = {
+      .reg = reg,
+      .offset = 0,
+      .shift = shift,
+   };
 
-        return r;
+   return r;
 }
 
 /* Determine which physical register, swizzle, and mask a virtual
  * register corresponds to */
 
 static struct phys_reg
-index_to_reg(compiler_context *ctx, struct lcra_state *l, unsigned reg, unsigned shift)
+index_to_reg(compiler_context *ctx, struct lcra_state *l, unsigned reg,
+             unsigned shift)
 {
-        /* Check for special cases */
-        if (reg == ~0)
-                return default_phys_reg(REGISTER_UNUSED, shift);
-        else if (reg >= SSA_FIXED_MINIMUM)
-                return default_phys_reg(SSA_REG_FROM_FIXED(reg), shift);
-        else if (!l)
-                return default_phys_reg(REGISTER_UNUSED, shift);
+   /* Check for special cases */
+   if (reg == ~0)
+      return default_phys_reg(REGISTER_UNUSED, shift);
+   else if (reg >= SSA_FIXED_MINIMUM)
+      return default_phys_reg(SSA_REG_FROM_FIXED(reg), shift);
+   else if (!l)
+      return default_phys_reg(REGISTER_UNUSED, shift);
 
-        struct phys_reg r = {
-                .reg = l->solutions[reg] / 16,
-                .offset = l->solutions[reg] & 0xF,
-                .shift = shift,
-        };
+   struct phys_reg r = {
+      .reg = l->solutions[reg] / 16,
+      .offset = l->solutions[reg] & 0xF,
+      .shift = shift,
+   };
 
-        /* Report that we actually use this register, and return it */
+   /* Report that we actually use this register, and return it */
 
-        if (r.reg < 16)
-                ctx->info->work_reg_count = MAX2(ctx->info->work_reg_count, r.reg + 1);
+   if (r.reg < 16)
+      ctx->info->work_reg_count = MAX2(ctx->info->work_reg_count, r.reg + 1);
 
-        return r;
+   return r;
 }
 
 static void
 set_class(unsigned *classes, unsigned node, unsigned class)
 {
-        if (node < SSA_FIXED_MINIMUM && class != classes[node]) {
-                assert(classes[node] == REG_CLASS_WORK);
-                classes[node] = class;
-        }
+   if (node < SSA_FIXED_MINIMUM && class != classes[node]) {
+      assert(classes[node] == REG_CLASS_WORK);
+      classes[node] = class;
+   }
 }
 
 /* Special register classes impose special constraints on who can read their
@@ -119,42 +121,42 @@ set_class(unsigned *classes, unsigned node, unsigned class)
 static bool ASSERTED
 check_read_class(unsigned *classes, unsigned tag, unsigned node)
 {
-        /* Non-nodes are implicitly ok */
-        if (node >= SSA_FIXED_MINIMUM)
-                return true;
+   /* Non-nodes are implicitly ok */
+   if (node >= SSA_FIXED_MINIMUM)
+      return true;
 
-        switch (classes[node]) {
-        case REG_CLASS_LDST:
-                return (tag == TAG_LOAD_STORE_4);
-        case REG_CLASS_TEXR:
-                return (tag == TAG_TEXTURE_4);
-        case REG_CLASS_TEXW:
-                return (tag != TAG_LOAD_STORE_4);
-        case REG_CLASS_WORK:
-                return IS_ALU(tag);
-        default:
-                unreachable("Invalid class");
-        }
+   switch (classes[node]) {
+   case REG_CLASS_LDST:
+      return (tag == TAG_LOAD_STORE_4);
+   case REG_CLASS_TEXR:
+      return (tag == TAG_TEXTURE_4);
+   case REG_CLASS_TEXW:
+      return (tag != TAG_LOAD_STORE_4);
+   case REG_CLASS_WORK:
+      return IS_ALU(tag);
+   default:
+      unreachable("Invalid class");
+   }
 }
 
 static bool ASSERTED
 check_write_class(unsigned *classes, unsigned tag, unsigned node)
 {
-        /* Non-nodes are implicitly ok */
-        if (node >= SSA_FIXED_MINIMUM)
-                return true;
+   /* Non-nodes are implicitly ok */
+   if (node >= SSA_FIXED_MINIMUM)
+      return true;
 
-        switch (classes[node]) {
-        case REG_CLASS_TEXR:
-                return true;
-        case REG_CLASS_TEXW:
-                return (tag == TAG_TEXTURE_4);
-        case REG_CLASS_LDST:
-        case REG_CLASS_WORK:
-                return IS_ALU(tag) || (tag == TAG_LOAD_STORE_4);
-        default:
-                unreachable("Invalid class");
-        }
+   switch (classes[node]) {
+   case REG_CLASS_TEXR:
+      return true;
+   case REG_CLASS_TEXW:
+      return (tag == TAG_TEXTURE_4);
+   case REG_CLASS_LDST:
+   case REG_CLASS_WORK:
+      return IS_ALU(tag) || (tag == TAG_LOAD_STORE_4);
+   default:
+      unreachable("Invalid class");
+   }
 }
 
 /* Prepass before RA to ensure special class restrictions are met. The idea is
@@ -162,284 +164,287 @@ check_write_class(unsigned *classes, unsigned tag, unsigned node)
  * Later, we'll add moves as appropriate and rewrite to specialize by type. */
 
 static void
-mark_node_class (unsigned *bitfield, unsigned node)
+mark_node_class(unsigned *bitfield, unsigned node)
 {
-        if (node < SSA_FIXED_MINIMUM)
-                BITSET_SET(bitfield, node);
+   if (node < SSA_FIXED_MINIMUM)
+      BITSET_SET(bitfield, node);
 }
 
 void
 mir_lower_special_reads(compiler_context *ctx)
 {
-        size_t sz = BITSET_WORDS(ctx->temp_count) * sizeof(BITSET_WORD);
+   size_t sz = BITSET_WORDS(ctx->temp_count) * sizeof(BITSET_WORD);
 
-        /* Bitfields for the various types of registers we could have. aluw can
-         * be written by either ALU or load/store */
+   /* Bitfields for the various types of registers we could have. aluw can
+    * be written by either ALU or load/store */
 
-        unsigned *alur = calloc(sz, 1);
-        unsigned *aluw = calloc(sz, 1);
-        unsigned *brar = calloc(sz, 1);
-        unsigned *ldst = calloc(sz, 1);
-        unsigned *texr = calloc(sz, 1);
-        unsigned *texw = calloc(sz, 1);
+   unsigned *alur = calloc(sz, 1);
+   unsigned *aluw = calloc(sz, 1);
+   unsigned *brar = calloc(sz, 1);
+   unsigned *ldst = calloc(sz, 1);
+   unsigned *texr = calloc(sz, 1);
+   unsigned *texw = calloc(sz, 1);
 
-        /* Pass #1 is analysis, a linear scan to fill out the bitfields */
+   /* Pass #1 is analysis, a linear scan to fill out the bitfields */
 
-        mir_foreach_instr_global(ctx, ins) {
-                switch (ins->type) {
-                case TAG_ALU_4:
-                        mark_node_class(aluw, ins->dest);
-                        mark_node_class(alur, ins->src[0]);
-                        mark_node_class(alur, ins->src[1]);
-                        mark_node_class(alur, ins->src[2]);
+   mir_foreach_instr_global(ctx, ins) {
+      switch (ins->type) {
+      case TAG_ALU_4:
+         mark_node_class(aluw, ins->dest);
+         mark_node_class(alur, ins->src[0]);
+         mark_node_class(alur, ins->src[1]);
+         mark_node_class(alur, ins->src[2]);
 
-                        if (ins->compact_branch && ins->writeout)
-                                mark_node_class(brar, ins->src[0]);
+         if (ins->compact_branch && ins->writeout)
+            mark_node_class(brar, ins->src[0]);
 
-                        break;
+         break;
 
-                case TAG_LOAD_STORE_4:
-                        mark_node_class(aluw, ins->dest);
-                        mark_node_class(ldst, ins->src[0]);
-                        mark_node_class(ldst, ins->src[1]);
-                        mark_node_class(ldst, ins->src[2]);
-                        mark_node_class(ldst, ins->src[3]);
-                        break;
+      case TAG_LOAD_STORE_4:
+         mark_node_class(aluw, ins->dest);
+         mark_node_class(ldst, ins->src[0]);
+         mark_node_class(ldst, ins->src[1]);
+         mark_node_class(ldst, ins->src[2]);
+         mark_node_class(ldst, ins->src[3]);
+         break;
 
-                case TAG_TEXTURE_4:
-                        mark_node_class(texr, ins->src[0]);
-                        mark_node_class(texr, ins->src[1]);
-                        mark_node_class(texr, ins->src[2]);
-                        mark_node_class(texw, ins->dest);
-                        break;
+      case TAG_TEXTURE_4:
+         mark_node_class(texr, ins->src[0]);
+         mark_node_class(texr, ins->src[1]);
+         mark_node_class(texr, ins->src[2]);
+         mark_node_class(texw, ins->dest);
+         break;
 
-                default:
-                        break;
-                }
-        }
+      default:
+         break;
+      }
+   }
 
-        /* Pass #2 is lowering now that we've analyzed all the classes.
-         * Conceptually, if an index is only marked for a single type of use,
-         * there is nothing to lower. If it is marked for different uses, we
-         * split up based on the number of types of uses. To do so, we divide
-         * into N distinct classes of use (where N>1 by definition), emit N-1
-         * moves from the index to copies of the index, and finally rewrite N-1
-         * of the types of uses to use the corresponding move */
+   /* Pass #2 is lowering now that we've analyzed all the classes.
+    * Conceptually, if an index is only marked for a single type of use,
+    * there is nothing to lower. If it is marked for different uses, we
+    * split up based on the number of types of uses. To do so, we divide
+    * into N distinct classes of use (where N>1 by definition), emit N-1
+    * moves from the index to copies of the index, and finally rewrite N-1
+    * of the types of uses to use the corresponding move */
 
-        unsigned spill_idx = ctx->temp_count;
+   unsigned spill_idx = ctx->temp_count;
 
-        for (unsigned i = 0; i < ctx->temp_count; ++i) {
-                bool is_alur = BITSET_TEST(alur, i);
-                bool is_aluw = BITSET_TEST(aluw, i);
-                bool is_brar = BITSET_TEST(brar, i);
-                bool is_ldst = BITSET_TEST(ldst, i);
-                bool is_texr = BITSET_TEST(texr, i);
-                bool is_texw = BITSET_TEST(texw, i);
+   for (unsigned i = 0; i < ctx->temp_count; ++i) {
+      bool is_alur = BITSET_TEST(alur, i);
+      bool is_aluw = BITSET_TEST(aluw, i);
+      bool is_brar = BITSET_TEST(brar, i);
+      bool is_ldst = BITSET_TEST(ldst, i);
+      bool is_texr = BITSET_TEST(texr, i);
+      bool is_texw = BITSET_TEST(texw, i);
 
-                /* Analyse to check how many distinct uses there are. ALU ops
-                 * (alur) can read the results of the texture pipeline (texw)
-                 * but not ldst or texr. Load/store ops (ldst) cannot read
-                 * anything but load/store inputs. Texture pipeline cannot read
-                 * anything but texture inputs. TODO: Simplify.  */
+      /* Analyse to check how many distinct uses there are. ALU ops
+       * (alur) can read the results of the texture pipeline (texw)
+       * but not ldst or texr. Load/store ops (ldst) cannot read
+       * anything but load/store inputs. Texture pipeline cannot read
+       * anything but texture inputs. TODO: Simplify.  */
 
-                bool collision =
-                        (is_alur && (is_ldst || is_texr)) ||
-                        (is_ldst && (is_alur || is_texr || is_texw)) ||
-                        (is_texr && (is_alur || is_ldst || is_texw)) ||
-                        (is_texw && (is_aluw || is_ldst || is_texr)) ||
-                        (is_brar && is_texw);
-        
-                if (!collision)
-                        continue;
+      bool collision = (is_alur && (is_ldst || is_texr)) ||
+                       (is_ldst && (is_alur || is_texr || is_texw)) ||
+                       (is_texr && (is_alur || is_ldst || is_texw)) ||
+                       (is_texw && (is_aluw || is_ldst || is_texr)) ||
+                       (is_brar && is_texw);
 
-                /* Use the index as-is as the work copy. Emit copies for
-                 * special uses */
+      if (!collision)
+         continue;
 
-                unsigned classes[] = { TAG_LOAD_STORE_4, TAG_TEXTURE_4, TAG_TEXTURE_4, TAG_ALU_4};
-                bool collisions[] = { is_ldst, is_texr, is_texw && is_aluw, is_brar };
+      /* Use the index as-is as the work copy. Emit copies for
+       * special uses */
 
-                for (unsigned j = 0; j < ARRAY_SIZE(collisions); ++j) {
-                        if (!collisions[j]) continue;
+      unsigned classes[] = {TAG_LOAD_STORE_4, TAG_TEXTURE_4, TAG_TEXTURE_4,
+                            TAG_ALU_4};
+      bool collisions[] = {is_ldst, is_texr, is_texw && is_aluw, is_brar};
 
-                        /* When the hazard is from reading, we move and rewrite
-                         * sources (typical case). When it's from writing, we
-                         * flip the move and rewrite destinations (obscure,
-                         * only from control flow -- impossible in SSA) */
+      for (unsigned j = 0; j < ARRAY_SIZE(collisions); ++j) {
+         if (!collisions[j])
+            continue;
 
-                        bool hazard_write = (j == 2);
+         /* When the hazard is from reading, we move and rewrite
+          * sources (typical case). When it's from writing, we
+          * flip the move and rewrite destinations (obscure,
+          * only from control flow -- impossible in SSA) */
 
-                        unsigned idx = spill_idx++;
+         bool hazard_write = (j == 2);
 
-                        /* Insert move before each read/write, depending on the
-                         * hazard we're trying to account for */
+         unsigned idx = spill_idx++;
 
-                        mir_foreach_instr_global_safe(ctx, pre_use) {
-                                if (pre_use->type != classes[j])
-                                        continue;
+         /* Insert move before each read/write, depending on the
+          * hazard we're trying to account for */
 
-                                if (hazard_write) {
-                                        if (pre_use->dest != i)
-                                                continue;
+         mir_foreach_instr_global_safe(ctx, pre_use) {
+            if (pre_use->type != classes[j])
+               continue;
 
-                                        midgard_instruction m = v_mov(idx, i);
-                                        m.dest_type = pre_use->dest_type;
-                                        m.src_types[1] = m.dest_type;
-                                        m.mask = pre_use->mask;
+            if (hazard_write) {
+               if (pre_use->dest != i)
+                  continue;
 
-                                        midgard_instruction *use = mir_next_op(pre_use);
-                                        assert(use);
-                                        mir_insert_instruction_before(ctx, use, m);
-                                        mir_rewrite_index_dst_single(pre_use, i, idx);
-                                } else {
-                                        if (!mir_has_arg(pre_use, i))
-                                                continue;
+               midgard_instruction m = v_mov(idx, i);
+               m.dest_type = pre_use->dest_type;
+               m.src_types[1] = m.dest_type;
+               m.mask = pre_use->mask;
 
-                                        idx = spill_idx++;
+               midgard_instruction *use = mir_next_op(pre_use);
+               assert(use);
+               mir_insert_instruction_before(ctx, use, m);
+               mir_rewrite_index_dst_single(pre_use, i, idx);
+            } else {
+               if (!mir_has_arg(pre_use, i))
+                  continue;
 
-                                        midgard_instruction m = v_mov(i, idx);
-                                        m.mask = mir_from_bytemask(mir_round_bytemask_up(
-                                                                mir_bytemask_of_read_components(pre_use, i), 32), 32);
-                                        mir_insert_instruction_before(ctx, pre_use, m);
-                                        mir_rewrite_index_src_single(pre_use, i, idx);
-                                }
-                        }
-                }
-        }
+               idx = spill_idx++;
 
-        free(alur);
-        free(aluw);
-        free(brar);
-        free(ldst);
-        free(texr);
-        free(texw);
+               midgard_instruction m = v_mov(i, idx);
+               m.mask = mir_from_bytemask(
+                  mir_round_bytemask_up(
+                     mir_bytemask_of_read_components(pre_use, i), 32),
+                  32);
+               mir_insert_instruction_before(ctx, pre_use, m);
+               mir_rewrite_index_src_single(pre_use, i, idx);
+            }
+         }
+      }
+   }
+
+   free(alur);
+   free(aluw);
+   free(brar);
+   free(ldst);
+   free(texr);
+   free(texw);
 }
 
 static void
-mir_compute_interference(
-                compiler_context *ctx,
-                struct lcra_state *l)
+mir_compute_interference(compiler_context *ctx, struct lcra_state *l)
 {
-        /* First, we need liveness information to be computed per block */
-        mir_compute_liveness(ctx);
+   /* First, we need liveness information to be computed per block */
+   mir_compute_liveness(ctx);
 
-        /* We need to force r1.w live throughout a blend shader */
+   /* We need to force r1.w live throughout a blend shader */
 
-        if (ctx->inputs->is_blend) {
-                unsigned r1w = ~0;
+   if (ctx->inputs->is_blend) {
+      unsigned r1w = ~0;
 
-                mir_foreach_block(ctx, _block) {
-                        midgard_block *block = (midgard_block *) _block;
-                        mir_foreach_instr_in_block_rev(block, ins) {
-                                if (ins->writeout)
-                                        r1w = ins->dest;
-                        }
+      mir_foreach_block(ctx, _block) {
+         midgard_block *block = (midgard_block *)_block;
+         mir_foreach_instr_in_block_rev(block, ins) {
+            if (ins->writeout)
+               r1w = ins->dest;
+         }
 
-                        if (r1w != ~0)
-                                break;
-                }
+         if (r1w != ~0)
+            break;
+      }
 
-                mir_foreach_instr_global(ctx, ins) {
-                        if (ins->dest < ctx->temp_count)
-                                lcra_add_node_interference(l, ins->dest, mir_bytemask(ins), r1w, 0xF);
-                }
-        }
+      mir_foreach_instr_global(ctx, ins) {
+         if (ins->dest < ctx->temp_count)
+            lcra_add_node_interference(l, ins->dest, mir_bytemask(ins), r1w,
+                                       0xF);
+      }
+   }
 
-        /* Now that every block has live_in/live_out computed, we can determine
-         * interference by walking each block linearly. Take live_out at the
-         * end of each block and walk the block backwards. */
+   /* Now that every block has live_in/live_out computed, we can determine
+    * interference by walking each block linearly. Take live_out at the
+    * end of each block and walk the block backwards. */
 
-        mir_foreach_block(ctx, _blk) {
-                midgard_block *blk = (midgard_block *) _blk;
+   mir_foreach_block(ctx, _blk) {
+      midgard_block *blk = (midgard_block *)_blk;
 
-                /* The scalar and vector units run in parallel. We need to make
-                 * sure they don't write to same portion of the register file
-                 * otherwise the result is undefined. Add interferences to
-                 * avoid this situation.
-                 */
-                util_dynarray_foreach(&blk->bundles, midgard_bundle, bundle) {
-                        midgard_instruction *instrs[2][4];
-                        unsigned instr_count[2] = { 0, 0 };
+      /* The scalar and vector units run in parallel. We need to make
+       * sure they don't write to same portion of the register file
+       * otherwise the result is undefined. Add interferences to
+       * avoid this situation.
+       */
+      util_dynarray_foreach(&blk->bundles, midgard_bundle, bundle) {
+         midgard_instruction *instrs[2][4];
+         unsigned instr_count[2] = {0, 0};
 
-                        for (unsigned i = 0; i < bundle->instruction_count; i++) {
-                                if (bundle->instructions[i]->unit == UNIT_VMUL ||
-                                    bundle->instructions[i]->unit == UNIT_SADD)
-                                        instrs[0][instr_count[0]++] = bundle->instructions[i];
-                                else
-                                        instrs[1][instr_count[1]++] = bundle->instructions[i];
-                        }
+         for (unsigned i = 0; i < bundle->instruction_count; i++) {
+            if (bundle->instructions[i]->unit == UNIT_VMUL ||
+                bundle->instructions[i]->unit == UNIT_SADD)
+               instrs[0][instr_count[0]++] = bundle->instructions[i];
+            else
+               instrs[1][instr_count[1]++] = bundle->instructions[i];
+         }
 
-                        for (unsigned i = 0; i < ARRAY_SIZE(instr_count); i++) {
-                                for (unsigned j = 0; j < instr_count[i]; j++) {
-                                        midgard_instruction *ins_a = instrs[i][j];
+         for (unsigned i = 0; i < ARRAY_SIZE(instr_count); i++) {
+            for (unsigned j = 0; j < instr_count[i]; j++) {
+               midgard_instruction *ins_a = instrs[i][j];
 
-                                        if (ins_a->dest >= ctx->temp_count) continue;
+               if (ins_a->dest >= ctx->temp_count)
+                  continue;
 
-                                        for (unsigned k = j + 1; k < instr_count[i]; k++) {
-                                                midgard_instruction *ins_b = instrs[i][k];
+               for (unsigned k = j + 1; k < instr_count[i]; k++) {
+                  midgard_instruction *ins_b = instrs[i][k];
 
-                                                if (ins_b->dest >= ctx->temp_count) continue;
+                  if (ins_b->dest >= ctx->temp_count)
+                     continue;
 
-                                                lcra_add_node_interference(l, ins_b->dest,
-                                                                           mir_bytemask(ins_b),
-                                                                           ins_a->dest,
-                                                                           mir_bytemask(ins_a));
-                                        }
-                                }
-                        }
-                }
+                  lcra_add_node_interference(l, ins_b->dest,
+                                             mir_bytemask(ins_b), ins_a->dest,
+                                             mir_bytemask(ins_a));
+               }
+            }
+         }
+      }
 
-                uint16_t *live = mem_dup(_blk->live_out, ctx->temp_count * sizeof(uint16_t));
+      uint16_t *live =
+         mem_dup(_blk->live_out, ctx->temp_count * sizeof(uint16_t));
 
-                mir_foreach_instr_in_block_rev(blk, ins) {
-                        /* Mark all registers live after the instruction as
-                         * interfering with the destination */
+      mir_foreach_instr_in_block_rev(blk, ins) {
+         /* Mark all registers live after the instruction as
+          * interfering with the destination */
 
-                        unsigned dest = ins->dest;
+         unsigned dest = ins->dest;
 
-                        if (dest < ctx->temp_count) {
-                                for (unsigned i = 0; i < ctx->temp_count; ++i) {
-                                        if (live[i]) {
-                                                unsigned mask = mir_bytemask(ins);
-                                                lcra_add_node_interference(l, dest, mask, i, live[i]);
-                                        }
-                                }
-                        }
+         if (dest < ctx->temp_count) {
+            for (unsigned i = 0; i < ctx->temp_count; ++i) {
+               if (live[i]) {
+                  unsigned mask = mir_bytemask(ins);
+                  lcra_add_node_interference(l, dest, mask, i, live[i]);
+               }
+            }
+         }
 
-                        /* Add blend shader interference: blend shaders might
-                         * clobber r0-r3. */
-                        if (ins->compact_branch && ins->writeout) {
-                                for (unsigned i = 0; i < ctx->temp_count; ++i) {
-                                        if (!live[i])
-                                                continue;
+         /* Add blend shader interference: blend shaders might
+          * clobber r0-r3. */
+         if (ins->compact_branch && ins->writeout) {
+            for (unsigned i = 0; i < ctx->temp_count; ++i) {
+               if (!live[i])
+                  continue;
 
-                                        for (unsigned j = 0; j < 4; j++) {
-                                                lcra_add_node_interference(l, ctx->temp_count + j,
-                                                                0xFFFF,
-                                                                i, live[i]);
-                                        }
-                                }
-                        }
+               for (unsigned j = 0; j < 4; j++) {
+                  lcra_add_node_interference(l, ctx->temp_count + j, 0xFFFF, i,
+                                             live[i]);
+               }
+            }
+         }
 
-                        /* Update live_in */
-                        mir_liveness_ins_update(live, ins, ctx->temp_count);
-                }
+         /* Update live_in */
+         mir_liveness_ins_update(live, ins, ctx->temp_count);
+      }
 
-                free(live);
-        }
+      free(live);
+   }
 }
 
 static bool
 mir_is_64(midgard_instruction *ins)
 {
-        if (nir_alu_type_get_type_size(ins->dest_type) == 64)
-                return true;
+   if (nir_alu_type_get_type_size(ins->dest_type) == 64)
+      return true;
 
-        mir_foreach_src(ins, v) {
-                if (nir_alu_type_get_type_size(ins->src_types[v]) == 64)
-                        return true;
-        }
+   mir_foreach_src(ins, v) {
+      if (nir_alu_type_get_type_size(ins->src_types[v]) == 64)
+         return true;
+   }
 
-        return false;
+   return false;
 }
 
 /*
@@ -449,7 +454,7 @@ mir_is_64(midgard_instruction *ins)
 static bool
 needs_contiguous_workgroup(compiler_context *ctx)
 {
-        return gl_shader_stage_uses_workgroup(ctx->stage);
+   return gl_shader_stage_uses_workgroup(ctx->stage);
 }
 
 /*
@@ -461,13 +466,13 @@ needs_contiguous_workgroup(compiler_context *ctx)
 static unsigned
 max_threads_per_workgroup(compiler_context *ctx)
 {
-        if (ctx->nir->info.workgroup_size_variable) {
-                return 128;
-        } else {
-                return ctx->nir->info.workgroup_size[0] *
-                       ctx->nir->info.workgroup_size[1] *
-                       ctx->nir->info.workgroup_size[2];
-        }
+   if (ctx->nir->info.workgroup_size_variable) {
+      return 128;
+   } else {
+      return ctx->nir->info.workgroup_size[0] *
+             ctx->nir->info.workgroup_size[1] *
+             ctx->nir->info.workgroup_size[2];
+   }
 }
 
 /*
@@ -488,21 +493,21 @@ max_threads_per_workgroup(compiler_context *ctx)
 static unsigned
 max_work_registers(compiler_context *ctx)
 {
-        if (ctx->inputs->is_blend)
-                return 8;
+   if (ctx->inputs->is_blend)
+      return 8;
 
-        unsigned rmu_vec4 = ctx->info->push.count / 4;
-        unsigned max_work_registers = (rmu_vec4 >= 8) ? (24 - rmu_vec4) : 16;
+   unsigned rmu_vec4 = ctx->info->push.count / 4;
+   unsigned max_work_registers = (rmu_vec4 >= 8) ? (24 - rmu_vec4) : 16;
 
-        if (needs_contiguous_workgroup(ctx)) {
-                unsigned threads = max_threads_per_workgroup(ctx);
-                assert(threads <= 128 && "maximum threads in ABI exceeded");
+   if (needs_contiguous_workgroup(ctx)) {
+      unsigned threads = max_threads_per_workgroup(ctx);
+      assert(threads <= 128 && "maximum threads in ABI exceeded");
 
-                if (threads > 64)
-                        max_work_registers = MIN2(max_work_registers, 8);
-        }
+      if (threads > 64)
+         max_work_registers = MIN2(max_work_registers, 8);
+   }
 
-        return max_work_registers;
+   return max_work_registers;
 }
 
 /* This routine performs the actual register allocation. It should be succeeded
@@ -511,689 +516,693 @@ max_work_registers(compiler_context *ctx)
 static struct lcra_state *
 allocate_registers(compiler_context *ctx, bool *spilled)
 {
-        int work_count = max_work_registers(ctx);
+   int work_count = max_work_registers(ctx);
 
-       /* No register allocation to do with no SSA */
+   /* No register allocation to do with no SSA */
 
-        if (!ctx->temp_count)
-                return NULL;
+   if (!ctx->temp_count)
+      return NULL;
 
-        /* Initialize LCRA. Allocate extra node at the end for r1-r3 for
-         * interference */
+   /* Initialize LCRA. Allocate extra node at the end for r1-r3 for
+    * interference */
 
-        struct lcra_state *l = lcra_alloc_equations(ctx->temp_count + 4, 5);
-        unsigned node_r1 = ctx->temp_count + 1;
+   struct lcra_state *l = lcra_alloc_equations(ctx->temp_count + 4, 5);
+   unsigned node_r1 = ctx->temp_count + 1;
 
-        /* Starts of classes, in bytes */
-        l->class_start[REG_CLASS_WORK]  = 16 * 0;
-        l->class_start[REG_CLASS_LDST]  = 16 * 26;
-        l->class_start[REG_CLASS_TEXR]  = 16 * 28;
-        l->class_start[REG_CLASS_TEXW]  = 16 * 28;
+   /* Starts of classes, in bytes */
+   l->class_start[REG_CLASS_WORK] = 16 * 0;
+   l->class_start[REG_CLASS_LDST] = 16 * 26;
+   l->class_start[REG_CLASS_TEXR] = 16 * 28;
+   l->class_start[REG_CLASS_TEXW] = 16 * 28;
 
-        l->class_size[REG_CLASS_WORK] = 16 * work_count;
-        l->class_size[REG_CLASS_LDST]  = 16 * 2;
-        l->class_size[REG_CLASS_TEXR]  = 16 * 2;
-        l->class_size[REG_CLASS_TEXW]  = 16 * 2;
+   l->class_size[REG_CLASS_WORK] = 16 * work_count;
+   l->class_size[REG_CLASS_LDST] = 16 * 2;
+   l->class_size[REG_CLASS_TEXR] = 16 * 2;
+   l->class_size[REG_CLASS_TEXW] = 16 * 2;
 
-        lcra_set_disjoint_class(l, REG_CLASS_TEXR, REG_CLASS_TEXW);
+   lcra_set_disjoint_class(l, REG_CLASS_TEXR, REG_CLASS_TEXW);
 
-        /* To save space on T*20, we don't have real texture registers.
-         * Instead, tex inputs reuse the load/store pipeline registers, and
-         * tex outputs use work r0/r1. Note we still use TEXR/TEXW classes,
-         * noting that this handles interferences and sizes correctly. */
+   /* To save space on T*20, we don't have real texture registers.
+    * Instead, tex inputs reuse the load/store pipeline registers, and
+    * tex outputs use work r0/r1. Note we still use TEXR/TEXW classes,
+    * noting that this handles interferences and sizes correctly. */
 
-        if (ctx->quirks & MIDGARD_INTERPIPE_REG_ALIASING) {
-                l->class_start[REG_CLASS_TEXR] = l->class_start[REG_CLASS_LDST];
-                l->class_start[REG_CLASS_TEXW] = l->class_start[REG_CLASS_WORK];
-        }
+   if (ctx->quirks & MIDGARD_INTERPIPE_REG_ALIASING) {
+      l->class_start[REG_CLASS_TEXR] = l->class_start[REG_CLASS_LDST];
+      l->class_start[REG_CLASS_TEXW] = l->class_start[REG_CLASS_WORK];
+   }
 
-        unsigned *found_class = calloc(sizeof(unsigned), ctx->temp_count);
-        unsigned *min_alignment = calloc(sizeof(unsigned), ctx->temp_count);
-        unsigned *min_bound = calloc(sizeof(unsigned), ctx->temp_count);
+   unsigned *found_class = calloc(sizeof(unsigned), ctx->temp_count);
+   unsigned *min_alignment = calloc(sizeof(unsigned), ctx->temp_count);
+   unsigned *min_bound = calloc(sizeof(unsigned), ctx->temp_count);
 
-        mir_foreach_instr_global(ctx, ins) {
-                /* Swizzles of 32-bit sources on 64-bit instructions need to be
-                 * aligned to either bottom (xy) or top (zw). More general
-                 * swizzle lowering should happen prior to scheduling (TODO),
-                 * but once we get RA we shouldn't disrupt this further. Align
-                 * sources of 64-bit instructions. */
+   mir_foreach_instr_global(ctx, ins) {
+      /* Swizzles of 32-bit sources on 64-bit instructions need to be
+       * aligned to either bottom (xy) or top (zw). More general
+       * swizzle lowering should happen prior to scheduling (TODO),
+       * but once we get RA we shouldn't disrupt this further. Align
+       * sources of 64-bit instructions. */
 
-                if (ins->type == TAG_ALU_4 && mir_is_64(ins)) {
-                        mir_foreach_src(ins, v) {
-                                unsigned s = ins->src[v];
+      if (ins->type == TAG_ALU_4 && mir_is_64(ins)) {
+         mir_foreach_src(ins, v) {
+            unsigned s = ins->src[v];
 
-                                if (s < ctx->temp_count)
-                                        min_alignment[s] = MAX2(3, min_alignment[s]);
-                        }
-                }
+            if (s < ctx->temp_count)
+               min_alignment[s] = MAX2(3, min_alignment[s]);
+         }
+      }
 
-                if (ins->type == TAG_LOAD_STORE_4 && OP_HAS_ADDRESS(ins->op)) {
-                        mir_foreach_src(ins, v) {
-                                unsigned s = ins->src[v];
-                                unsigned size = nir_alu_type_get_type_size(ins->src_types[v]);
+      if (ins->type == TAG_LOAD_STORE_4 && OP_HAS_ADDRESS(ins->op)) {
+         mir_foreach_src(ins, v) {
+            unsigned s = ins->src[v];
+            unsigned size = nir_alu_type_get_type_size(ins->src_types[v]);
 
-                                if (s < ctx->temp_count)
-                                        min_alignment[s] = MAX2((size == 64) ? 3 : 2, min_alignment[s]);
-                        }
-                }
+            if (s < ctx->temp_count)
+               min_alignment[s] = MAX2((size == 64) ? 3 : 2, min_alignment[s]);
+         }
+      }
 
-                /* Anything read as 16-bit needs proper alignment to ensure the
-                 * resulting code can be packed.
-                 */
-                mir_foreach_src(ins, s) {
-                        unsigned src_size = nir_alu_type_get_type_size(ins->src_types[s]);
-                        if (src_size == 16 && ins->src[s] < SSA_FIXED_MINIMUM)
-                                min_bound[ins->src[s]] = MAX2(min_bound[ins->src[s]], 8);
-                }
+      /* Anything read as 16-bit needs proper alignment to ensure the
+       * resulting code can be packed.
+       */
+      mir_foreach_src(ins, s) {
+         unsigned src_size = nir_alu_type_get_type_size(ins->src_types[s]);
+         if (src_size == 16 && ins->src[s] < SSA_FIXED_MINIMUM)
+            min_bound[ins->src[s]] = MAX2(min_bound[ins->src[s]], 8);
+      }
 
-                /* Everything after this concerns only the destination, not the
-                 * sources.
-                 */
-                if (ins->dest >= SSA_FIXED_MINIMUM) continue;
+      /* Everything after this concerns only the destination, not the
+       * sources.
+       */
+      if (ins->dest >= SSA_FIXED_MINIMUM)
+         continue;
 
-                unsigned size = nir_alu_type_get_type_size(ins->dest_type);
+      unsigned size = nir_alu_type_get_type_size(ins->dest_type);
 
-                if (ins->is_pack)
-                        size = 32;
+      if (ins->is_pack)
+         size = 32;
 
-                /* 0 for x, 1 for xy, 2 for xyz, 3 for xyzw */
-                int comps1 = util_logbase2(ins->mask);
+      /* 0 for x, 1 for xy, 2 for xyz, 3 for xyzw */
+      int comps1 = util_logbase2(ins->mask);
 
-                int bytes = (comps1 + 1) * (size / 8);
+      int bytes = (comps1 + 1) * (size / 8);
 
-                /* Use the largest class if there's ambiguity, this
-                 * handles partial writes */
+      /* Use the largest class if there's ambiguity, this
+       * handles partial writes */
 
-                int dest = ins->dest;
-                found_class[dest] = MAX2(found_class[dest], bytes);
+      int dest = ins->dest;
+      found_class[dest] = MAX2(found_class[dest], bytes);
 
-                min_alignment[dest] =
-                        MAX2(min_alignment[dest],
-                             (size == 16) ? 1 : /* (1 << 1) = 2-byte */
-                             (size == 32) ? 2 : /* (1 << 2) = 4-byte */
-                             (size == 64) ? 3 : /* (1 << 3) = 8-byte */
-                             3); /* 8-bit todo */
+      min_alignment[dest] =
+         MAX2(min_alignment[dest], (size == 16) ? 1 : /* (1 << 1) = 2-byte */
+                                      (size == 32) ? 2
+                                                   : /* (1 << 2) = 4-byte */
+                                      (size == 64) ? 3
+                                                   : /* (1 << 3) = 8-byte */
+                                      3);            /* 8-bit todo */
 
-                /* We can't cross xy/zw boundaries. TODO: vec8 can */
-                if (size == 16 && min_alignment[dest] != 4)
-                        min_bound[dest] = 8;
+      /* We can't cross xy/zw boundaries. TODO: vec8 can */
+      if (size == 16 && min_alignment[dest] != 4)
+         min_bound[dest] = 8;
 
-                /* We don't have a swizzle for the conditional and we don't
-                 * want to muck with the conditional itself, so just force
-                 * alignment for now */
+      /* We don't have a swizzle for the conditional and we don't
+       * want to muck with the conditional itself, so just force
+       * alignment for now */
 
-                if (ins->type == TAG_ALU_4 && OP_IS_CSEL_V(ins->op)) {
-                        min_alignment[dest] = 4; /* 1 << 4= 16-byte = vec4 */
+      if (ins->type == TAG_ALU_4 && OP_IS_CSEL_V(ins->op)) {
+         min_alignment[dest] = 4; /* 1 << 4= 16-byte = vec4 */
 
-                        /* LCRA assumes bound >= alignment */
-                        min_bound[dest] = 16;
-                }
+         /* LCRA assumes bound >= alignment */
+         min_bound[dest] = 16;
+      }
 
-                /* Since ld/st swizzles and masks are 32-bit only, we need them
-                 * aligned to enable final packing */
-                if (ins->type == TAG_LOAD_STORE_4)
-                        min_alignment[dest] = MAX2(min_alignment[dest], 2);
-        }
+      /* Since ld/st swizzles and masks are 32-bit only, we need them
+       * aligned to enable final packing */
+      if (ins->type == TAG_LOAD_STORE_4)
+         min_alignment[dest] = MAX2(min_alignment[dest], 2);
+   }
 
-        for (unsigned i = 0; i < ctx->temp_count; ++i) {
-                lcra_set_alignment(l, i, min_alignment[i] ? min_alignment[i] : 2,
-                                min_bound[i] ? min_bound[i] : 16);
-                lcra_restrict_range(l, i, found_class[i]);
-        }
-        
-        free(found_class);
-        free(min_alignment);
-        free(min_bound);
+   for (unsigned i = 0; i < ctx->temp_count; ++i) {
+      lcra_set_alignment(l, i, min_alignment[i] ? min_alignment[i] : 2,
+                         min_bound[i] ? min_bound[i] : 16);
+      lcra_restrict_range(l, i, found_class[i]);
+   }
 
-        /* Next, we'll determine semantic class. We default to zero (work).
-         * But, if we're used with a special operation, that will force us to a
-         * particular class. Each node must be assigned to exactly one class; a
-         * prepass before RA should have lowered what-would-have-been
-         * multiclass nodes into a series of moves to break it up into multiple
-         * nodes (TODO) */
+   free(found_class);
+   free(min_alignment);
+   free(min_bound);
 
-        mir_foreach_instr_global(ctx, ins) {
-                /* Check if this operation imposes any classes */
+   /* Next, we'll determine semantic class. We default to zero (work).
+    * But, if we're used with a special operation, that will force us to a
+    * particular class. Each node must be assigned to exactly one class; a
+    * prepass before RA should have lowered what-would-have-been
+    * multiclass nodes into a series of moves to break it up into multiple
+    * nodes (TODO) */
 
-                if (ins->type == TAG_LOAD_STORE_4) {
-                        set_class(l->class, ins->src[0], REG_CLASS_LDST);
-                        set_class(l->class, ins->src[1], REG_CLASS_LDST);
-                        set_class(l->class, ins->src[2], REG_CLASS_LDST);
-                        set_class(l->class, ins->src[3], REG_CLASS_LDST);
+   mir_foreach_instr_global(ctx, ins) {
+      /* Check if this operation imposes any classes */
 
-                        if (OP_IS_VEC4_ONLY(ins->op)) {
-                                lcra_restrict_range(l, ins->dest, 16);
-                                lcra_restrict_range(l, ins->src[0], 16);
-                                lcra_restrict_range(l, ins->src[1], 16);
-                                lcra_restrict_range(l, ins->src[2], 16);
-                                lcra_restrict_range(l, ins->src[3], 16);
-                        }
-                } else if (ins->type == TAG_TEXTURE_4) {
-                        set_class(l->class, ins->dest, REG_CLASS_TEXW);
-                        set_class(l->class, ins->src[0], REG_CLASS_TEXR);
-                        set_class(l->class, ins->src[1], REG_CLASS_TEXR);
-                        set_class(l->class, ins->src[2], REG_CLASS_TEXR);
-                        set_class(l->class, ins->src[3], REG_CLASS_TEXR);
-                }
-        }
+      if (ins->type == TAG_LOAD_STORE_4) {
+         set_class(l->class, ins->src[0], REG_CLASS_LDST);
+         set_class(l->class, ins->src[1], REG_CLASS_LDST);
+         set_class(l->class, ins->src[2], REG_CLASS_LDST);
+         set_class(l->class, ins->src[3], REG_CLASS_LDST);
 
-        /* Check that the semantics of the class are respected */
-        mir_foreach_instr_global(ctx, ins) {
-                assert(check_write_class(l->class, ins->type, ins->dest));
-                assert(check_read_class(l->class, ins->type, ins->src[0]));
-                assert(check_read_class(l->class, ins->type, ins->src[1]));
-                assert(check_read_class(l->class, ins->type, ins->src[2]));
-                assert(check_read_class(l->class, ins->type, ins->src[3]));
-        }
+         if (OP_IS_VEC4_ONLY(ins->op)) {
+            lcra_restrict_range(l, ins->dest, 16);
+            lcra_restrict_range(l, ins->src[0], 16);
+            lcra_restrict_range(l, ins->src[1], 16);
+            lcra_restrict_range(l, ins->src[2], 16);
+            lcra_restrict_range(l, ins->src[3], 16);
+         }
+      } else if (ins->type == TAG_TEXTURE_4) {
+         set_class(l->class, ins->dest, REG_CLASS_TEXW);
+         set_class(l->class, ins->src[0], REG_CLASS_TEXR);
+         set_class(l->class, ins->src[1], REG_CLASS_TEXR);
+         set_class(l->class, ins->src[2], REG_CLASS_TEXR);
+         set_class(l->class, ins->src[3], REG_CLASS_TEXR);
+      }
+   }
 
-        /* Mark writeout to r0, depth to r1.x, stencil to r1.y,
-         * render target to r1.z, unknown to r1.w */
-        mir_foreach_instr_global(ctx, ins) {
-                if (!(ins->compact_branch && ins->writeout)) continue;
+   /* Check that the semantics of the class are respected */
+   mir_foreach_instr_global(ctx, ins) {
+      assert(check_write_class(l->class, ins->type, ins->dest));
+      assert(check_read_class(l->class, ins->type, ins->src[0]));
+      assert(check_read_class(l->class, ins->type, ins->src[1]));
+      assert(check_read_class(l->class, ins->type, ins->src[2]));
+      assert(check_read_class(l->class, ins->type, ins->src[3]));
+   }
 
-                if (ins->src[0] < ctx->temp_count)
-                        l->solutions[ins->src[0]] = 0;
+   /* Mark writeout to r0, depth to r1.x, stencil to r1.y,
+    * render target to r1.z, unknown to r1.w */
+   mir_foreach_instr_global(ctx, ins) {
+      if (!(ins->compact_branch && ins->writeout))
+         continue;
 
-                if (ins->src[2] < ctx->temp_count)
-                        l->solutions[ins->src[2]] = (16 * 1) + COMPONENT_X * 4;
+      if (ins->src[0] < ctx->temp_count)
+         l->solutions[ins->src[0]] = 0;
 
-                if (ins->src[3] < ctx->temp_count)
-                        l->solutions[ins->src[3]] = (16 * 1) + COMPONENT_Y * 4;
+      if (ins->src[2] < ctx->temp_count)
+         l->solutions[ins->src[2]] = (16 * 1) + COMPONENT_X * 4;
 
-                if (ins->src[1] < ctx->temp_count)
-                        l->solutions[ins->src[1]] = (16 * 1) + COMPONENT_Z * 4;
+      if (ins->src[3] < ctx->temp_count)
+         l->solutions[ins->src[3]] = (16 * 1) + COMPONENT_Y * 4;
 
-                if (ins->dest < ctx->temp_count)
-                        l->solutions[ins->dest] = (16 * 1) + COMPONENT_W * 4;
-        }
+      if (ins->src[1] < ctx->temp_count)
+         l->solutions[ins->src[1]] = (16 * 1) + COMPONENT_Z * 4;
 
-        /* Destinations of instructions in a writeout block cannot be assigned
-         * to r1 unless they are actually used as r1 from the writeout itself,
-         * since the writes to r1 are special. A code sequence like:
-         *
-         *      sadd.fmov r1.x, [...]
-         *      vadd.fadd r0, r1, r2
-         *      [writeout branch]
-         *
-         * will misbehave since the r1.x write will be interpreted as a
-         * gl_FragDepth write so it won't show up correctly when r1 is read in
-         * the following segment. We model this as interference.
-         */
+      if (ins->dest < ctx->temp_count)
+         l->solutions[ins->dest] = (16 * 1) + COMPONENT_W * 4;
+   }
 
-        for (unsigned i = 0; i < 4; ++i)
-                l->solutions[ctx->temp_count + i] = (16 * i);
+   /* Destinations of instructions in a writeout block cannot be assigned
+    * to r1 unless they are actually used as r1 from the writeout itself,
+    * since the writes to r1 are special. A code sequence like:
+    *
+    *      sadd.fmov r1.x, [...]
+    *      vadd.fadd r0, r1, r2
+    *      [writeout branch]
+    *
+    * will misbehave since the r1.x write will be interpreted as a
+    * gl_FragDepth write so it won't show up correctly when r1 is read in
+    * the following segment. We model this as interference.
+    */
 
-        mir_foreach_block(ctx, _blk) {
-                midgard_block *blk = (midgard_block *) _blk;
+   for (unsigned i = 0; i < 4; ++i)
+      l->solutions[ctx->temp_count + i] = (16 * i);
 
-                mir_foreach_bundle_in_block(blk, v) {
-                        /* We need at least a writeout and nonwriteout instruction */
-                        if (v->instruction_count < 2)
-                                continue;
+   mir_foreach_block(ctx, _blk) {
+      midgard_block *blk = (midgard_block *)_blk;
 
-                        /* Branches always come at the end */
-                        midgard_instruction *br = v->instructions[v->instruction_count - 1];
+      mir_foreach_bundle_in_block(blk, v) {
+         /* We need at least a writeout and nonwriteout instruction */
+         if (v->instruction_count < 2)
+            continue;
 
-                        if (!br->writeout)
-                                continue;
+         /* Branches always come at the end */
+         midgard_instruction *br = v->instructions[v->instruction_count - 1];
 
-                        for (signed i = v->instruction_count - 2; i >= 0; --i) {
-                                midgard_instruction *ins = v->instructions[i];
+         if (!br->writeout)
+            continue;
 
-                                if (ins->dest >= ctx->temp_count)
-                                        continue;
+         for (signed i = v->instruction_count - 2; i >= 0; --i) {
+            midgard_instruction *ins = v->instructions[i];
 
-                                bool used_as_r1 = (br->dest == ins->dest);
+            if (ins->dest >= ctx->temp_count)
+               continue;
 
-                                mir_foreach_src(br, s)
-                                        used_as_r1 |= (s > 0) && (br->src[s] == ins->dest);
+            bool used_as_r1 = (br->dest == ins->dest);
 
-                                if (!used_as_r1)
-                                        lcra_add_node_interference(l, ins->dest, mir_bytemask(ins), node_r1, 0xFFFF);
-                        }
-                }
-        }
+            mir_foreach_src(br, s)
+               used_as_r1 |= (s > 0) && (br->src[s] == ins->dest);
 
-        /* Precolour blend input to r0. Note writeout is necessarily at the end
-         * and blend shaders are single-RT only so there is only a single
-         * writeout block, so this cannot conflict with the writeout r0 (there
-         * is no need to have an intermediate move) */
+            if (!used_as_r1)
+               lcra_add_node_interference(l, ins->dest, mir_bytemask(ins),
+                                          node_r1, 0xFFFF);
+         }
+      }
+   }
 
-        if (ctx->blend_input != ~0) {
-                assert(ctx->blend_input < ctx->temp_count);
-                l->solutions[ctx->blend_input] = 0;
-        }
+   /* Precolour blend input to r0. Note writeout is necessarily at the end
+    * and blend shaders are single-RT only so there is only a single
+    * writeout block, so this cannot conflict with the writeout r0 (there
+    * is no need to have an intermediate move) */
 
-        /* Same for the dual-source blend input/output, except here we use r2,
-         * which is also set in the fragment shader. */
+   if (ctx->blend_input != ~0) {
+      assert(ctx->blend_input < ctx->temp_count);
+      l->solutions[ctx->blend_input] = 0;
+   }
 
-        if (ctx->blend_src1 != ~0) {
-                assert(ctx->blend_src1 < ctx->temp_count);
-                l->solutions[ctx->blend_src1] = (16 * 2);
-                ctx->info->work_reg_count = MAX2(ctx->info->work_reg_count, 3);
-        }
+   /* Same for the dual-source blend input/output, except here we use r2,
+    * which is also set in the fragment shader. */
 
-        mir_compute_interference(ctx, l);
+   if (ctx->blend_src1 != ~0) {
+      assert(ctx->blend_src1 < ctx->temp_count);
+      l->solutions[ctx->blend_src1] = (16 * 2);
+      ctx->info->work_reg_count = MAX2(ctx->info->work_reg_count, 3);
+   }
 
-        *spilled = !lcra_solve(l);
-        return l;
+   mir_compute_interference(ctx, l);
+
+   *spilled = !lcra_solve(l);
+   return l;
 }
 
-
 /* Once registers have been decided via register allocation
  * (allocate_registers), we need to rewrite the MIR to use registers instead of
  * indices */
 
 static void
-install_registers_instr(
-        compiler_context *ctx,
-        struct lcra_state *l,
-        midgard_instruction *ins)
+install_registers_instr(compiler_context *ctx, struct lcra_state *l,
+                        midgard_instruction *ins)
 {
-        unsigned src_shift[MIR_SRC_COUNT];
+   unsigned src_shift[MIR_SRC_COUNT];
 
-        for (unsigned i = 0; i < MIR_SRC_COUNT; ++i) {
-                src_shift[i] =
-                        util_logbase2(nir_alu_type_get_type_size(ins->src_types[i]) / 8);
-        }
+   for (unsigned i = 0; i < MIR_SRC_COUNT; ++i) {
+      src_shift[i] =
+         util_logbase2(nir_alu_type_get_type_size(ins->src_types[i]) / 8);
+   }
 
-        unsigned dest_shift =
-                util_logbase2(nir_alu_type_get_type_size(ins->dest_type) / 8);
+   unsigned dest_shift =
+      util_logbase2(nir_alu_type_get_type_size(ins->dest_type) / 8);
 
-        switch (ins->type) {
-        case TAG_ALU_4:
-        case TAG_ALU_8:
-        case TAG_ALU_12:
-        case TAG_ALU_16: {
-                 if (ins->compact_branch)
-                         return;
+   switch (ins->type) {
+   case TAG_ALU_4:
+   case TAG_ALU_8:
+   case TAG_ALU_12:
+   case TAG_ALU_16: {
+      if (ins->compact_branch)
+         return;
 
-                struct phys_reg src1 = index_to_reg(ctx, l, ins->src[0], src_shift[0]);
-                struct phys_reg src2 = index_to_reg(ctx, l, ins->src[1], src_shift[1]);
-                struct phys_reg dest = index_to_reg(ctx, l, ins->dest, dest_shift);
+      struct phys_reg src1 = index_to_reg(ctx, l, ins->src[0], src_shift[0]);
+      struct phys_reg src2 = index_to_reg(ctx, l, ins->src[1], src_shift[1]);
+      struct phys_reg dest = index_to_reg(ctx, l, ins->dest, dest_shift);
 
-                mir_set_bytemask(ins, mir_bytemask(ins) << dest.offset);
+      mir_set_bytemask(ins, mir_bytemask(ins) << dest.offset);
 
-                unsigned dest_offset =
-                        GET_CHANNEL_COUNT(alu_opcode_props[ins->op].props) ? 0 :
-                        dest.offset;
+      unsigned dest_offset =
+         GET_CHANNEL_COUNT(alu_opcode_props[ins->op].props) ? 0 : dest.offset;
 
-                offset_swizzle(ins->swizzle[0], src1.offset, src1.shift, dest.shift, dest_offset);
-                if (!ins->has_inline_constant)
-                        offset_swizzle(ins->swizzle[1], src2.offset, src2.shift, dest.shift, dest_offset);
-                if (ins->src[0] != ~0)
-                        ins->src[0] = SSA_FIXED_REGISTER(src1.reg);
-                if (ins->src[1] != ~0)
-                        ins->src[1] = SSA_FIXED_REGISTER(src2.reg);
-                if (ins->dest != ~0)
-                        ins->dest = SSA_FIXED_REGISTER(dest.reg);
-                break;
-        }
+      offset_swizzle(ins->swizzle[0], src1.offset, src1.shift, dest.shift,
+                     dest_offset);
+      if (!ins->has_inline_constant)
+         offset_swizzle(ins->swizzle[1], src2.offset, src2.shift, dest.shift,
+                        dest_offset);
+      if (ins->src[0] != ~0)
+         ins->src[0] = SSA_FIXED_REGISTER(src1.reg);
+      if (ins->src[1] != ~0)
+         ins->src[1] = SSA_FIXED_REGISTER(src2.reg);
+      if (ins->dest != ~0)
+         ins->dest = SSA_FIXED_REGISTER(dest.reg);
+      break;
+   }
 
-        case TAG_LOAD_STORE_4: {
-                /* Which physical register we read off depends on
-                 * whether we are loading or storing -- think about the
-                 * logical dataflow */
+   case TAG_LOAD_STORE_4: {
+      /* Which physical register we read off depends on
+       * whether we are loading or storing -- think about the
+       * logical dataflow */
 
-                bool encodes_src = OP_IS_STORE(ins->op);
+      bool encodes_src = OP_IS_STORE(ins->op);
 
-                if (encodes_src) {
-                        struct phys_reg src = index_to_reg(ctx, l, ins->src[0], src_shift[0]);
-                        assert(src.reg == 26 || src.reg == 27);
+      if (encodes_src) {
+         struct phys_reg src = index_to_reg(ctx, l, ins->src[0], src_shift[0]);
+         assert(src.reg == 26 || src.reg == 27);
 
-                        ins->src[0] = SSA_FIXED_REGISTER(src.reg);
-                        offset_swizzle(ins->swizzle[0], src.offset, src.shift, 0, 0);
-               } else {
-                        struct phys_reg dst = index_to_reg(ctx, l, ins->dest, dest_shift);
+         ins->src[0] = SSA_FIXED_REGISTER(src.reg);
+         offset_swizzle(ins->swizzle[0], src.offset, src.shift, 0, 0);
+      } else {
+         struct phys_reg dst = index_to_reg(ctx, l, ins->dest, dest_shift);
 
-                        ins->dest = SSA_FIXED_REGISTER(dst.reg);
-                        offset_swizzle(ins->swizzle[0], 0, 2, dest_shift, dst.offset);
-                        mir_set_bytemask(ins, mir_bytemask(ins) << dst.offset);
-                }
+         ins->dest = SSA_FIXED_REGISTER(dst.reg);
+         offset_swizzle(ins->swizzle[0], 0, 2, dest_shift, dst.offset);
+         mir_set_bytemask(ins, mir_bytemask(ins) << dst.offset);
+      }
 
-                /* We also follow up by actual arguments */
+      /* We also follow up by actual arguments */
 
-                for (int i = 1; i <= 3; i++) {
-                        unsigned src_index = ins->src[i];
-                        if (src_index != ~0) {
-                                struct phys_reg src = index_to_reg(ctx, l, src_index, src_shift[i]);
-                                unsigned component = src.offset >> src.shift;
-                                assert(component << src.shift == src.offset);
-                                ins->src[i] = SSA_FIXED_REGISTER(src.reg);
-                                ins->swizzle[i][0] += component;
-                        }
-                }
+      for (int i = 1; i <= 3; i++) {
+         unsigned src_index = ins->src[i];
+         if (src_index != ~0) {
+            struct phys_reg src = index_to_reg(ctx, l, src_index, src_shift[i]);
+            unsigned component = src.offset >> src.shift;
+            assert(component << src.shift == src.offset);
+            ins->src[i] = SSA_FIXED_REGISTER(src.reg);
+            ins->swizzle[i][0] += component;
+         }
+      }
 
-                break;
-        }
+      break;
+   }
 
-        case TAG_TEXTURE_4: {
-                if (ins->op == midgard_tex_op_barrier)
-                        break;
+   case TAG_TEXTURE_4: {
+      if (ins->op == midgard_tex_op_barrier)
+         break;
 
-                /* Grab RA results */
-                struct phys_reg dest = index_to_reg(ctx, l, ins->dest, dest_shift);
-                struct phys_reg coord = index_to_reg(ctx, l, ins->src[1], src_shift[1]);
-                struct phys_reg lod = index_to_reg(ctx, l, ins->src[2], src_shift[2]);
-                struct phys_reg offset = index_to_reg(ctx, l, ins->src[3], src_shift[3]);
+      /* Grab RA results */
+      struct phys_reg dest = index_to_reg(ctx, l, ins->dest, dest_shift);
+      struct phys_reg coord = index_to_reg(ctx, l, ins->src[1], src_shift[1]);
+      struct phys_reg lod = index_to_reg(ctx, l, ins->src[2], src_shift[2]);
+      struct phys_reg offset = index_to_reg(ctx, l, ins->src[3], src_shift[3]);
 
-                /* First, install the texture coordinate */
-                if (ins->src[1] != ~0)
-                        ins->src[1] = SSA_FIXED_REGISTER(coord.reg);
-                offset_swizzle(ins->swizzle[1], coord.offset, coord.shift, dest.shift, 0);
+      /* First, install the texture coordinate */
+      if (ins->src[1] != ~0)
+         ins->src[1] = SSA_FIXED_REGISTER(coord.reg);
+      offset_swizzle(ins->swizzle[1], coord.offset, coord.shift, dest.shift, 0);
 
-                /* Next, install the destination */
-                if (ins->dest != ~0)
-                        ins->dest = SSA_FIXED_REGISTER(dest.reg);
-                offset_swizzle(ins->swizzle[0], 0, 2, dest.shift,
-                                dest_shift == 1 ? dest.offset % 8 :
-                                dest.offset);
-                mir_set_bytemask(ins, mir_bytemask(ins) << dest.offset);
+      /* Next, install the destination */
+      if (ins->dest != ~0)
+         ins->dest = SSA_FIXED_REGISTER(dest.reg);
+      offset_swizzle(ins->swizzle[0], 0, 2, dest.shift,
+                     dest_shift == 1 ? dest.offset % 8 : dest.offset);
+      mir_set_bytemask(ins, mir_bytemask(ins) << dest.offset);
 
-                /* If there is a register LOD/bias, use it */
-                if (ins->src[2] != ~0) {
-                        assert(!(lod.offset & 3));
-                        ins->src[2] = SSA_FIXED_REGISTER(lod.reg);
-                        ins->swizzle[2][0] = lod.offset / 4;
-                }
+      /* If there is a register LOD/bias, use it */
+      if (ins->src[2] != ~0) {
+         assert(!(lod.offset & 3));
+         ins->src[2] = SSA_FIXED_REGISTER(lod.reg);
+         ins->swizzle[2][0] = lod.offset / 4;
+      }
 
-                /* If there is an offset register, install it */
-                if (ins->src[3] != ~0) {
-                        ins->src[3] = SSA_FIXED_REGISTER(offset.reg);
-                        ins->swizzle[3][0] = offset.offset / 4;
-                }
+      /* If there is an offset register, install it */
+      if (ins->src[3] != ~0) {
+         ins->src[3] = SSA_FIXED_REGISTER(offset.reg);
+         ins->swizzle[3][0] = offset.offset / 4;
+      }
 
-                break;
-        }
+      break;
+   }
 
-        default:
-                break;
-        }
+   default:
+      break;
+   }
 }
 
 static void
 install_registers(compiler_context *ctx, struct lcra_state *l)
 {
-        mir_foreach_instr_global(ctx, ins)
-                install_registers_instr(ctx, l, ins);
+   mir_foreach_instr_global(ctx, ins)
+      install_registers_instr(ctx, l, ins);
 }
 
-
 /* If register allocation fails, find the best spill node */
 
 static signed
-mir_choose_spill_node(
-                compiler_context *ctx,
-                struct lcra_state *l)
+mir_choose_spill_node(compiler_context *ctx, struct lcra_state *l)
 {
-        /* We can't spill a previously spilled value or an unspill */
+   /* We can't spill a previously spilled value or an unspill */
 
-        mir_foreach_instr_global(ctx, ins) {
-                if (ins->no_spill & (1 << l->spill_class)) {
-                        lcra_set_node_spill_cost(l, ins->dest, -1);
+   mir_foreach_instr_global(ctx, ins) {
+      if (ins->no_spill & (1 << l->spill_class)) {
+         lcra_set_node_spill_cost(l, ins->dest, -1);
 
-                        if (l->spill_class != REG_CLASS_WORK) {
-                                mir_foreach_src(ins, s)
-                                        lcra_set_node_spill_cost(l, ins->src[s], -1);
-                        }
-                }
-        }
+         if (l->spill_class != REG_CLASS_WORK) {
+            mir_foreach_src(ins, s)
+               lcra_set_node_spill_cost(l, ins->src[s], -1);
+         }
+      }
+   }
 
-        return lcra_get_best_spill_node(l);
+   return lcra_get_best_spill_node(l);
 }
 
 /* Once we've chosen a spill node, spill it */
 
 static void
-mir_spill_register(
-                compiler_context *ctx,
-                unsigned spill_node,
-                unsigned spill_class,
-                unsigned *spill_count)
+mir_spill_register(compiler_context *ctx, unsigned spill_node,
+                   unsigned spill_class, unsigned *spill_count)
 {
-        if (spill_class == REG_CLASS_WORK && ctx->inputs->is_blend)
-                unreachable("Blend shader spilling is currently unimplemented");
+   if (spill_class == REG_CLASS_WORK && ctx->inputs->is_blend)
+      unreachable("Blend shader spilling is currently unimplemented");
 
-        unsigned spill_index = ctx->temp_count;
+   unsigned spill_index = ctx->temp_count;
 
-        /* We have a spill node, so check the class. Work registers
-         * legitimately spill to TLS, but special registers just spill to work
-         * registers */
+   /* We have a spill node, so check the class. Work registers
+    * legitimately spill to TLS, but special registers just spill to work
+    * registers */
 
-        bool is_special = spill_class != REG_CLASS_WORK;
-        bool is_special_w = spill_class == REG_CLASS_TEXW;
+   bool is_special = spill_class != REG_CLASS_WORK;
+   bool is_special_w = spill_class == REG_CLASS_TEXW;
 
-        /* Allocate TLS slot (maybe) */
-        unsigned spill_slot = !is_special ? (*spill_count)++ : 0;
+   /* Allocate TLS slot (maybe) */
+   unsigned spill_slot = !is_special ? (*spill_count)++ : 0;
 
-        /* For special reads, figure out how many bytes we need */
-        unsigned read_bytemask = 0;
+   /* For special reads, figure out how many bytes we need */
+   unsigned read_bytemask = 0;
 
-        /* If multiple instructions write to this destination, we'll have to
-         * fill from TLS before writing */
-        unsigned write_count = 0;
+   /* If multiple instructions write to this destination, we'll have to
+    * fill from TLS before writing */
+   unsigned write_count = 0;
 
-        mir_foreach_instr_global_safe(ctx, ins) {
-                read_bytemask |= mir_bytemask_of_read_components(ins, spill_node);
-                if (ins->dest == spill_node)
-                        ++write_count;
-        }
+   mir_foreach_instr_global_safe(ctx, ins) {
+      read_bytemask |= mir_bytemask_of_read_components(ins, spill_node);
+      if (ins->dest == spill_node)
+         ++write_count;
+   }
 
-        /* For TLS, replace all stores to the spilled node. For
-         * special reads, just keep as-is; the class will be demoted
-         * implicitly. For special writes, spill to a work register */
+   /* For TLS, replace all stores to the spilled node. For
+    * special reads, just keep as-is; the class will be demoted
+    * implicitly. For special writes, spill to a work register */
 
-        if (!is_special || is_special_w) {
-                if (is_special_w)
-                        spill_slot = spill_index++;
+   if (!is_special || is_special_w) {
+      if (is_special_w)
+         spill_slot = spill_index++;
 
-                unsigned last_id = ~0;
-                unsigned last_fill = ~0;
-                unsigned last_spill_index = ~0;
-                midgard_instruction *last_spill = NULL;
+      unsigned last_id = ~0;
+      unsigned last_fill = ~0;
+      unsigned last_spill_index = ~0;
+      midgard_instruction *last_spill = NULL;
 
-                mir_foreach_block(ctx, _block) {
-                midgard_block *block = (midgard_block *) _block;
-                mir_foreach_instr_in_block_safe(block, ins) {
-                        if (ins->dest != spill_node) continue;
+      mir_foreach_block(ctx, _block) {
+         midgard_block *block = (midgard_block *)_block;
+         mir_foreach_instr_in_block_safe(block, ins) {
+            if (ins->dest != spill_node)
+               continue;
 
-                        /* Note: it's important to match the mask of the spill
-                         * with the mask of the instruction whose destination
-                         * we're spilling, or otherwise we'll read invalid
-                         * components and can fail RA in a subsequent iteration
-                         */
+            /* Note: it's important to match the mask of the spill
+             * with the mask of the instruction whose destination
+             * we're spilling, or otherwise we'll read invalid
+             * components and can fail RA in a subsequent iteration
+             */
 
-                        if (is_special_w) {
-                                midgard_instruction st = v_mov(spill_node, spill_slot);
-                                st.no_spill |= (1 << spill_class);
-                                st.mask = ins->mask;
-                                st.dest_type = st.src_types[1] = ins->dest_type;
+            if (is_special_w) {
+               midgard_instruction st = v_mov(spill_node, spill_slot);
+               st.no_spill |= (1 << spill_class);
+               st.mask = ins->mask;
+               st.dest_type = st.src_types[1] = ins->dest_type;
 
-                                /* Hint: don't rewrite this node */
-                                st.hint = true;
+               /* Hint: don't rewrite this node */
+               st.hint = true;
 
-                                mir_insert_instruction_after_scheduled(ctx, block, ins, st);
-                        } else {
-                                unsigned bundle = ins->bundle_id;
-                                unsigned dest = (bundle == last_id)? last_spill_index : spill_index++;
+               mir_insert_instruction_after_scheduled(ctx, block, ins, st);
+            } else {
+               unsigned bundle = ins->bundle_id;
+               unsigned dest =
+                  (bundle == last_id) ? last_spill_index : spill_index++;
 
-                                unsigned bytemask = mir_bytemask(ins);
-                                unsigned write_mask = mir_from_bytemask(mir_round_bytemask_up(
-                                                                           bytemask, 32), 32);
+               unsigned bytemask = mir_bytemask(ins);
+               unsigned write_mask =
+                  mir_from_bytemask(mir_round_bytemask_up(bytemask, 32), 32);
 
-                                if (write_count > 1 && bytemask != 0xFFFF && bundle != last_fill) {
-                                        midgard_instruction read =
-                                                v_load_store_scratch(dest, spill_slot, false, 0xF);
-                                        mir_insert_instruction_before_scheduled(ctx, block, ins, read);
-                                        write_mask = 0xF;
-                                        last_fill = bundle;
-                                }
+               if (write_count > 1 && bytemask != 0xFFFF &&
+                   bundle != last_fill) {
+                  midgard_instruction read =
+                     v_load_store_scratch(dest, spill_slot, false, 0xF);
+                  mir_insert_instruction_before_scheduled(ctx, block, ins,
+                                                          read);
+                  write_mask = 0xF;
+                  last_fill = bundle;
+               }
 
-                                ins->dest = dest;
-                                ins->no_spill |= (1 << spill_class);
+               ins->dest = dest;
+               ins->no_spill |= (1 << spill_class);
 
-                                bool move = false;
+               bool move = false;
 
-                                /* In the same bundle, reads of the destination
-                                 * of the spilt instruction need to be direct */
-                                midgard_instruction *it = ins;
-                                while ((it = list_first_entry(&it->link, midgard_instruction, link))
-                                       && (it->bundle_id == bundle)) {
+               /* In the same bundle, reads of the destination
+                * of the spilt instruction need to be direct */
+               midgard_instruction *it = ins;
+               while ((it = list_first_entry(&it->link, midgard_instruction,
+                                             link)) &&
+                      (it->bundle_id == bundle)) {
 
-                                        if (!mir_has_arg(it, spill_node)) continue;
+                  if (!mir_has_arg(it, spill_node))
+                     continue;
 
-                                        mir_rewrite_index_src_single(it, spill_node, dest);
+                  mir_rewrite_index_src_single(it, spill_node, dest);
 
-                                        /* The spilt instruction will write to
-                                         * a work register for `it` to read but
-                                         * the spill needs an LD/ST register */
-                                        move = true;
-                                }
+                  /* The spilt instruction will write to
+                   * a work register for `it` to read but
+                   * the spill needs an LD/ST register */
+                  move = true;
+               }
 
-                                if (move)
-                                        dest = spill_index++;
+               if (move)
+                  dest = spill_index++;
 
-                                if (last_id == bundle) {
-                                        last_spill->mask |= write_mask;
-                                        u_foreach_bit(c, write_mask)
-                                                last_spill->swizzle[0][c] = c;
-                                } else {
-                                        midgard_instruction st =
-                                                v_load_store_scratch(dest, spill_slot, true, write_mask);
-                                        last_spill = mir_insert_instruction_after_scheduled(ctx, block, ins, st);
-                                }
+               if (last_id == bundle) {
+                  last_spill->mask |= write_mask;
+                  u_foreach_bit(c, write_mask)
+                     last_spill->swizzle[0][c] = c;
+               } else {
+                  midgard_instruction st =
+                     v_load_store_scratch(dest, spill_slot, true, write_mask);
+                  last_spill = mir_insert_instruction_after_scheduled(
+                     ctx, block, ins, st);
+               }
 
-                                if (move) {
-                                        midgard_instruction mv = v_mov(ins->dest, dest);
-                                        mv.no_spill |= (1 << spill_class);
+               if (move) {
+                  midgard_instruction mv = v_mov(ins->dest, dest);
+                  mv.no_spill |= (1 << spill_class);
 
-                                        mir_insert_instruction_after_scheduled(ctx, block, ins, mv);
-                                }
+                  mir_insert_instruction_after_scheduled(ctx, block, ins, mv);
+               }
 
-                                last_id = bundle;
-                                last_spill_index = ins->dest;
-                        }
+               last_id = bundle;
+               last_spill_index = ins->dest;
+            }
 
-                        if (!is_special)
-                                ctx->spills++;
-                }
-                }
-        }
+            if (!is_special)
+               ctx->spills++;
+         }
+      }
+   }
 
-        /* Insert a load from TLS before the first consecutive
-         * use of the node, rewriting to use spilled indices to
-         * break up the live range. Or, for special, insert a
-         * move. Ironically the latter *increases* register
-         * pressure, but the two uses of the spilling mechanism
-         * are somewhat orthogonal. (special spilling is to use
-         * work registers to back special registers; TLS
-         * spilling is to use memory to back work registers) */
+   /* Insert a load from TLS before the first consecutive
+    * use of the node, rewriting to use spilled indices to
+    * break up the live range. Or, for special, insert a
+    * move. Ironically the latter *increases* register
+    * pressure, but the two uses of the spilling mechanism
+    * are somewhat orthogonal. (special spilling is to use
+    * work registers to back special registers; TLS
+    * spilling is to use memory to back work registers) */
 
-        mir_foreach_block(ctx, _block) {
-                midgard_block *block = (midgard_block *) _block;
-                mir_foreach_instr_in_block(block, ins) {
-                        /* We can't rewrite the moves used to spill in the
-                         * first place. These moves are hinted. */
-                        if (ins->hint) continue;
+   mir_foreach_block(ctx, _block) {
+      midgard_block *block = (midgard_block *)_block;
+      mir_foreach_instr_in_block(block, ins) {
+         /* We can't rewrite the moves used to spill in the
+          * first place. These moves are hinted. */
+         if (ins->hint)
+            continue;
 
-                        /* If we don't use the spilled value, nothing to do */
-                        if (!mir_has_arg(ins, spill_node)) continue;
+         /* If we don't use the spilled value, nothing to do */
+         if (!mir_has_arg(ins, spill_node))
+            continue;
 
-                        unsigned index = 0;
+         unsigned index = 0;
 
-                        if (!is_special_w) {
-                                index = ++spill_index;
+         if (!is_special_w) {
+            index = ++spill_index;
 
-                                midgard_instruction *before = ins;
-                                midgard_instruction st;
+            midgard_instruction *before = ins;
+            midgard_instruction st;
 
-                                if (is_special) {
-                                        /* Move */
-                                        st = v_mov(spill_node, index);
-                                        st.no_spill |= (1 << spill_class);
-                                } else {
-                                        /* TLS load */
-                                        st = v_load_store_scratch(index, spill_slot, false, 0xF);
-                                }
+            if (is_special) {
+               /* Move */
+               st = v_mov(spill_node, index);
+               st.no_spill |= (1 << spill_class);
+            } else {
+               /* TLS load */
+               st = v_load_store_scratch(index, spill_slot, false, 0xF);
+            }
 
-                                /* Mask the load based on the component count
-                                 * actually needed to prevent RA loops */
+            /* Mask the load based on the component count
+             * actually needed to prevent RA loops */
 
-                                st.mask = mir_from_bytemask(mir_round_bytemask_up(
-                                                        read_bytemask, 32), 32);
+            st.mask =
+               mir_from_bytemask(mir_round_bytemask_up(read_bytemask, 32), 32);
 
-                                mir_insert_instruction_before_scheduled(ctx, block, before, st);
-                        } else {
-                                /* Special writes already have their move spilled in */
-                                index = spill_slot;
-                        }
+            mir_insert_instruction_before_scheduled(ctx, block, before, st);
+         } else {
+            /* Special writes already have their move spilled in */
+            index = spill_slot;
+         }
 
+         /* Rewrite to use */
+         mir_rewrite_index_src_single(ins, spill_node, index);
 
-                        /* Rewrite to use */
-                        mir_rewrite_index_src_single(ins, spill_node, index);
+         if (!is_special)
+            ctx->fills++;
+      }
+   }
 
-                        if (!is_special)
-                                ctx->fills++;
-                }
-        }
+   /* Reset hints */
 
-        /* Reset hints */
-
-        mir_foreach_instr_global(ctx, ins) {
-                ins->hint = false;
-        }
+   mir_foreach_instr_global(ctx, ins) {
+      ins->hint = false;
+   }
 }
 
 static void
 mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
 {
-        unsigned uniforms = ctx->info->push.count / 4;
-        unsigned old_work_count = 16 - MAX2(uniforms - 8, 0);
-        unsigned work_count = 16 - MAX2((new_cutoff - 8), 0);
+   unsigned uniforms = ctx->info->push.count / 4;
+   unsigned old_work_count = 16 - MAX2(uniforms - 8, 0);
+   unsigned work_count = 16 - MAX2((new_cutoff - 8), 0);
 
-        unsigned min_demote = SSA_FIXED_REGISTER(old_work_count);
-        unsigned max_demote = SSA_FIXED_REGISTER(work_count);
+   unsigned min_demote = SSA_FIXED_REGISTER(old_work_count);
+   unsigned max_demote = SSA_FIXED_REGISTER(work_count);
 
-        mir_foreach_block(ctx, _block) {
-                midgard_block *block = (midgard_block *) _block;
-                mir_foreach_instr_in_block(block, ins) {
-                        mir_foreach_src(ins, i) {
-                                if (ins->src[i] < min_demote || ins->src[i] >= max_demote)
-                                        continue;
+   mir_foreach_block(ctx, _block) {
+      midgard_block *block = (midgard_block *)_block;
+      mir_foreach_instr_in_block(block, ins) {
+         mir_foreach_src(ins, i) {
+            if (ins->src[i] < min_demote || ins->src[i] >= max_demote)
+               continue;
 
-                                midgard_instruction *before = ins;
+            midgard_instruction *before = ins;
 
-                                unsigned temp = make_compiler_temp(ctx);
-                                unsigned idx = (23 - SSA_REG_FROM_FIXED(ins->src[i])) * 4;
-                                assert(idx < ctx->info->push.count);
+            unsigned temp = make_compiler_temp(ctx);
+            unsigned idx = (23 - SSA_REG_FROM_FIXED(ins->src[i])) * 4;
+            assert(idx < ctx->info->push.count);
 
-                                ctx->ubo_mask |= BITSET_BIT(ctx->info->push.words[idx].ubo);
+            ctx->ubo_mask |= BITSET_BIT(ctx->info->push.words[idx].ubo);
 
-                                midgard_instruction ld = {
-                                        .type = TAG_LOAD_STORE_4,
-                                        .mask = 0xF,
-                                        .dest = temp,
-                                        .dest_type = ins->src_types[i],
-                                        .src = { ~0, ~0, ~0, ~0 },
-                                        .swizzle = SWIZZLE_IDENTITY_4,
-                                        .op = midgard_op_ld_ubo_128,
-                                        .load_store = {
-                                                .index_reg = REGISTER_LDST_ZERO,
-                                        },
-                                        .constants.u32[0] = ctx->info->push.words[idx].offset,
-                                };
+            midgard_instruction ld = {
+               .type = TAG_LOAD_STORE_4,
+               .mask = 0xF,
+               .dest = temp,
+               .dest_type = ins->src_types[i],
+               .src = {~0, ~0, ~0, ~0},
+               .swizzle = SWIZZLE_IDENTITY_4,
+               .op = midgard_op_ld_ubo_128,
+               .load_store =
+                  {
+                     .index_reg = REGISTER_LDST_ZERO,
+                  },
+               .constants.u32[0] = ctx->info->push.words[idx].offset,
+            };
 
-                                midgard_pack_ubo_index_imm(&ld.load_store,
-                                                           ctx->info->push.words[idx].ubo);
+            midgard_pack_ubo_index_imm(&ld.load_store,
+                                       ctx->info->push.words[idx].ubo);
 
-                                mir_insert_instruction_before_scheduled(ctx, block, before, ld);
+            mir_insert_instruction_before_scheduled(ctx, block, before, ld);
 
-                                mir_rewrite_index_src_single(ins, ins->src[i], temp);
-                        }
-                }
-        }
+            mir_rewrite_index_src_single(ins, ins->src[i], temp);
+         }
+      }
+   }
 
-        ctx->info->push.count = MIN2(ctx->info->push.count, new_cutoff * 4);
+   ctx->info->push.count = MIN2(ctx->info->push.count, new_cutoff * 4);
 }
 
 /* Run register allocation in a loop, spilling until we succeed */
@@ -1201,56 +1210,57 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
 void
 mir_ra(compiler_context *ctx)
 {
-        struct lcra_state *l = NULL;
-        bool spilled = false;
-        int iter_count = 1000; /* max iterations */
+   struct lcra_state *l = NULL;
+   bool spilled = false;
+   int iter_count = 1000; /* max iterations */
 
-        /* Number of 128-bit slots in memory we've spilled into */
-        unsigned spill_count = DIV_ROUND_UP(ctx->info->tls_size, 16);
+   /* Number of 128-bit slots in memory we've spilled into */
+   unsigned spill_count = DIV_ROUND_UP(ctx->info->tls_size, 16);
 
+   mir_create_pipeline_registers(ctx);
 
-        mir_create_pipeline_registers(ctx);
+   do {
+      if (spilled) {
+         signed spill_node = mir_choose_spill_node(ctx, l);
+         unsigned uniforms = ctx->info->push.count / 4;
 
-        do {
-                if (spilled) {
-                        signed spill_node = mir_choose_spill_node(ctx, l);
-                        unsigned uniforms = ctx->info->push.count / 4;
+         /* It's a lot cheaper to demote uniforms to get more
+          * work registers than to spill to TLS. */
+         if (l->spill_class == REG_CLASS_WORK && uniforms > 8) {
+            mir_demote_uniforms(ctx, MAX2(uniforms - 4, 8));
+         } else if (spill_node == -1) {
+            fprintf(stderr, "ERROR: Failed to choose spill node\n");
+            lcra_free(l);
+            return;
+         } else {
+            mir_spill_register(ctx, spill_node, l->spill_class, &spill_count);
+         }
+      }
 
-                        /* It's a lot cheaper to demote uniforms to get more
-                         * work registers than to spill to TLS. */
-                        if (l->spill_class == REG_CLASS_WORK && uniforms > 8) {
-                                mir_demote_uniforms(ctx, MAX2(uniforms - 4, 8));
-                        } else if (spill_node == -1) {
-                                fprintf(stderr, "ERROR: Failed to choose spill node\n");
-                                lcra_free(l);
-                                return;
-                        } else {
-                                mir_spill_register(ctx, spill_node, l->spill_class, &spill_count);
-                        }
-                }
+      mir_squeeze_index(ctx);
+      mir_invalidate_liveness(ctx);
 
-                mir_squeeze_index(ctx);
-                mir_invalidate_liveness(ctx);
+      if (l) {
+         lcra_free(l);
+         l = NULL;
+      }
 
-                if (l) {
-                        lcra_free(l);
-                        l = NULL;
-                }
+      l = allocate_registers(ctx, &spilled);
+   } while (spilled && ((iter_count--) > 0));
 
-                l = allocate_registers(ctx, &spilled);
-        } while(spilled && ((iter_count--) > 0));
+   if (iter_count <= 0) {
+      fprintf(
+         stderr,
+         "panfrost: Gave up allocating registers, rendering will be incomplete\n");
+      assert(0);
+   }
 
-        if (iter_count <= 0) {
-                fprintf(stderr, "panfrost: Gave up allocating registers, rendering will be incomplete\n");
-                assert(0);
-        }
+   /* Report spilling information. spill_count is in 128-bit slots (vec4 x
+    * fp32), but tls_size is in bytes, so multiply by 16 */
 
-        /* Report spilling information. spill_count is in 128-bit slots (vec4 x
-         * fp32), but tls_size is in bytes, so multiply by 16 */
+   ctx->info->tls_size = spill_count * 16;
 
-        ctx->info->tls_size = spill_count * 16;
+   install_registers(ctx, l);
 
-        install_registers(ctx, l);
-
-        lcra_free(l);
+   lcra_free(l);
 }
diff --git a/src/panfrost/midgard/midgard_ra_pipeline.c b/src/panfrost/midgard/midgard_ra_pipeline.c
index 6f21ee1a699..3b0d07f2966 100644
--- a/src/panfrost/midgard/midgard_ra_pipeline.c
+++ b/src/panfrost/midgard/midgard_ra_pipeline.c
@@ -39,106 +39,108 @@
  */
 
 static bool
-mir_pipeline_ins(
-        compiler_context *ctx,
-        midgard_block *block,
-        midgard_bundle *bundle, unsigned i,
-        unsigned pipeline_count)
+mir_pipeline_ins(compiler_context *ctx, midgard_block *block,
+                 midgard_bundle *bundle, unsigned i, unsigned pipeline_count)
 {
-        midgard_instruction *ins = bundle->instructions[i];
+   midgard_instruction *ins = bundle->instructions[i];
 
-        /* Our goal is to create a pipeline register. Pipeline registers are
-         * created at the start of the bundle and are destroyed at the end. So
-         * we conservatively require:
-         *
-         *  1. Each component read in the second stage is written in the first stage.
-         *  2. The index is not live after the bundle.
-         *  3. We're not a special index (writeout, conditionals, ..)
-         *
-         * Rationale: #1 ensures that there is no need to go before the
-         * creation of the bundle, so the pipeline register can exist. #2 is
-         * since the pipeline register will be destroyed at the end. This
-         * ensures that nothing will try to read/write the pipeline register
-         * once it is not live, and that there's no need to go earlier. */
+   /* Our goal is to create a pipeline register. Pipeline registers are
+    * created at the start of the bundle and are destroyed at the end. So
+    * we conservatively require:
+    *
+    *  1. Each component read in the second stage is written in the first stage.
+    *  2. The index is not live after the bundle.
+    *  3. We're not a special index (writeout, conditionals, ..)
+    *
+    * Rationale: #1 ensures that there is no need to go before the
+    * creation of the bundle, so the pipeline register can exist. #2 is
+    * since the pipeline register will be destroyed at the end. This
+    * ensures that nothing will try to read/write the pipeline register
+    * once it is not live, and that there's no need to go earlier. */
 
-        unsigned node = ins->dest;
-        unsigned read_mask = 0;
+   unsigned node = ins->dest;
+   unsigned read_mask = 0;
 
-        if (node >= SSA_FIXED_MINIMUM)
-                return false;
+   if (node >= SSA_FIXED_MINIMUM)
+      return false;
 
-        if (node == ctx->blend_src1)
-                return false;
+   if (node == ctx->blend_src1)
+      return false;
 
-        /* Analyze the bundle for a per-byte read mask */
+   /* Analyze the bundle for a per-byte read mask */
 
-        for (unsigned j = 0; j < bundle->instruction_count; ++j) {
-                midgard_instruction *q = bundle->instructions[j];
+   for (unsigned j = 0; j < bundle->instruction_count; ++j) {
+      midgard_instruction *q = bundle->instructions[j];
 
-                /* The fragment colour can't be pipelined (well, it is
-                 * pipelined in r0, but this is a delicate dance with
-                 * scheduling and RA, not for us to worry about) */
+      /* The fragment colour can't be pipelined (well, it is
+       * pipelined in r0, but this is a delicate dance with
+       * scheduling and RA, not for us to worry about) */
 
-                if (q->compact_branch && q->writeout && mir_has_arg(q, node))
-                        return false;
+      if (q->compact_branch && q->writeout && mir_has_arg(q, node))
+         return false;
 
-                if (q->unit < UNIT_VADD) continue;
-                read_mask |= mir_bytemask_of_read_components(q, node);
-        }
+      if (q->unit < UNIT_VADD)
+         continue;
+      read_mask |= mir_bytemask_of_read_components(q, node);
+   }
 
-        /* Now check what's written in the beginning stage  */
-        for (unsigned j = 0; j < bundle->instruction_count; ++j) {
-                midgard_instruction *q = bundle->instructions[j];
-                if (q->unit >= UNIT_VADD) break;
-                if (q->dest != node) continue;
+   /* Now check what's written in the beginning stage  */
+   for (unsigned j = 0; j < bundle->instruction_count; ++j) {
+      midgard_instruction *q = bundle->instructions[j];
+      if (q->unit >= UNIT_VADD)
+         break;
+      if (q->dest != node)
+         continue;
 
-                /* Remove the written mask from the read requirements */
-                read_mask &= ~mir_bytemask(q);
-        }
+      /* Remove the written mask from the read requirements */
+      read_mask &= ~mir_bytemask(q);
+   }
 
-        /* Check for leftovers */
-        if (read_mask)
-                return false;
+   /* Check for leftovers */
+   if (read_mask)
+      return false;
 
-        /* We want to know if we live after this bundle, so check if
-         * we're live after the last instruction of the bundle */
+   /* We want to know if we live after this bundle, so check if
+    * we're live after the last instruction of the bundle */
 
-        midgard_instruction *end = bundle->instructions[
-                                    bundle->instruction_count - 1];
+   midgard_instruction *end =
+      bundle->instructions[bundle->instruction_count - 1];
 
-        if (mir_is_live_after(ctx, block, end, ins->dest))
-                return false;
+   if (mir_is_live_after(ctx, block, end, ins->dest))
+      return false;
 
-        /* We're only live in this bundle -- pipeline! */
-        unsigned preg = SSA_FIXED_REGISTER(24 + pipeline_count);
+   /* We're only live in this bundle -- pipeline! */
+   unsigned preg = SSA_FIXED_REGISTER(24 + pipeline_count);
 
-        for (unsigned j = 0; j < bundle->instruction_count; ++j) {
-                midgard_instruction *q = bundle->instructions[j];
+   for (unsigned j = 0; j < bundle->instruction_count; ++j) {
+      midgard_instruction *q = bundle->instructions[j];
 
-                if (q->unit >= UNIT_VADD)
-                        mir_rewrite_index_src_single(q, node, preg);
-                else
-                        mir_rewrite_index_dst_single(q, node, preg);
-        }
+      if (q->unit >= UNIT_VADD)
+         mir_rewrite_index_src_single(q, node, preg);
+      else
+         mir_rewrite_index_dst_single(q, node, preg);
+   }
 
-        return true;
+   return true;
 }
 
 void
 mir_create_pipeline_registers(compiler_context *ctx)
 {
-        mir_invalidate_liveness(ctx);
+   mir_invalidate_liveness(ctx);
 
-        mir_foreach_block(ctx, _block) {
-                midgard_block *block = (midgard_block *) _block;
+   mir_foreach_block(ctx, _block) {
+      midgard_block *block = (midgard_block *)_block;
 
-                mir_foreach_bundle_in_block(block, bundle) {
-                        if (!mir_is_alu_bundle(bundle)) continue;
-                        if (bundle->instruction_count < 2) continue;
+      mir_foreach_bundle_in_block(block, bundle) {
+         if (!mir_is_alu_bundle(bundle))
+            continue;
+         if (bundle->instruction_count < 2)
+            continue;
 
-                        /* Only first 2 instructions could pipeline */
-                        bool succ = mir_pipeline_ins(ctx, block, bundle, 0, 0);
-                        mir_pipeline_ins(ctx, block, bundle, 1, succ);
-                }
-        }
+         /* Only first 2 instructions could pipeline */
+         bool succ = mir_pipeline_ins(ctx, block, bundle, 0, 0);
+         mir_pipeline_ins(ctx, block, bundle, 1, succ);
+      }
+   }
 }
diff --git a/src/panfrost/midgard/midgard_schedule.c b/src/panfrost/midgard/midgard_schedule.c
index 078c30fb54d..ad10fbbc94d 100644
--- a/src/panfrost/midgard/midgard_schedule.c
+++ b/src/panfrost/midgard/midgard_schedule.c
@@ -22,12 +22,12 @@
  * SOFTWARE.
  */
 
+#include "util/half_float.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 #include "compiler.h"
 #include "midgard_ops.h"
 #include "midgard_quirks.h"
-#include "util/u_memory.h"
-#include "util/u_math.h"
-#include "util/half_float.h"
 
 /* Scheduling for Midgard is complicated, to say the least. ALU instructions
  * must be grouped into VLIW bundles according to following model:
@@ -63,148 +63,159 @@
 #define BYTE_COUNT 16
 
 static void
-add_dependency(struct util_dynarray *table, unsigned index, uint16_t mask, midgard_instruction **instructions, unsigned child)
+add_dependency(struct util_dynarray *table, unsigned index, uint16_t mask,
+               midgard_instruction **instructions, unsigned child)
 {
-        for (unsigned i = 0; i < BYTE_COUNT; ++i) {
-                if (!(mask & (1 << i)))
-                        continue;
+   for (unsigned i = 0; i < BYTE_COUNT; ++i) {
+      if (!(mask & (1 << i)))
+         continue;
 
-                struct util_dynarray *parents = &table[(BYTE_COUNT * index) + i];
+      struct util_dynarray *parents = &table[(BYTE_COUNT * index) + i];
 
-                util_dynarray_foreach(parents, unsigned, parent) {
-                        BITSET_WORD *dependents = instructions[*parent]->dependents;
+      util_dynarray_foreach(parents, unsigned, parent) {
+         BITSET_WORD *dependents = instructions[*parent]->dependents;
 
-                        /* Already have the dependency */
-                        if (BITSET_TEST(dependents, child))
-                                continue;
+         /* Already have the dependency */
+         if (BITSET_TEST(dependents, child))
+            continue;
 
-                        BITSET_SET(dependents, child);
-                        instructions[child]->nr_dependencies++;
-                }
-        }
+         BITSET_SET(dependents, child);
+         instructions[child]->nr_dependencies++;
+      }
+   }
 }
 
 static void
-mark_access(struct util_dynarray *table, unsigned index, uint16_t mask, unsigned parent)
+mark_access(struct util_dynarray *table, unsigned index, uint16_t mask,
+            unsigned parent)
 {
-        for (unsigned i = 0; i < BYTE_COUNT; ++i) {
-                if (!(mask & (1 << i)))
-                        continue;
+   for (unsigned i = 0; i < BYTE_COUNT; ++i) {
+      if (!(mask & (1 << i)))
+         continue;
 
-                util_dynarray_append(&table[(BYTE_COUNT * index) + i], unsigned, parent);
-        }
+      util_dynarray_append(&table[(BYTE_COUNT * index) + i], unsigned, parent);
+   }
 }
 
 static void
-mir_create_dependency_graph(midgard_instruction **instructions, unsigned count, unsigned node_count)
+mir_create_dependency_graph(midgard_instruction **instructions, unsigned count,
+                            unsigned node_count)
 {
-        size_t sz = node_count * BYTE_COUNT;
+   size_t sz = node_count * BYTE_COUNT;
 
-        struct util_dynarray *last_read = calloc(sizeof(struct util_dynarray), sz);
-        struct util_dynarray *last_write = calloc(sizeof(struct util_dynarray), sz);
+   struct util_dynarray *last_read = calloc(sizeof(struct util_dynarray), sz);
+   struct util_dynarray *last_write = calloc(sizeof(struct util_dynarray), sz);
 
-        for (unsigned i = 0; i < sz; ++i) {
-                util_dynarray_init(&last_read[i], NULL);
-                util_dynarray_init(&last_write[i], NULL);
-        }
+   for (unsigned i = 0; i < sz; ++i) {
+      util_dynarray_init(&last_read[i], NULL);
+      util_dynarray_init(&last_write[i], NULL);
+   }
 
-        /* Initialize dependency graph */
-        for (unsigned i = 0; i < count; ++i) {
-                instructions[i]->dependents =
-                        calloc(BITSET_WORDS(count), sizeof(BITSET_WORD));
+   /* Initialize dependency graph */
+   for (unsigned i = 0; i < count; ++i) {
+      instructions[i]->dependents =
+         calloc(BITSET_WORDS(count), sizeof(BITSET_WORD));
 
-                instructions[i]->nr_dependencies = 0;
-        }
+      instructions[i]->nr_dependencies = 0;
+   }
 
-        unsigned prev_ldst[3] = {~0, ~0, ~0};
+   unsigned prev_ldst[3] = {~0, ~0, ~0};
 
-        /* Populate dependency graph */
-        for (signed i = count - 1; i >= 0; --i) {
-                if (instructions[i]->compact_branch)
-                        continue;
+   /* Populate dependency graph */
+   for (signed i = count - 1; i >= 0; --i) {
+      if (instructions[i]->compact_branch)
+         continue;
 
-                unsigned dest = instructions[i]->dest;
-                unsigned mask = mir_bytemask(instructions[i]);
+      unsigned dest = instructions[i]->dest;
+      unsigned mask = mir_bytemask(instructions[i]);
 
-                mir_foreach_src((*instructions), s) {
-                        unsigned src = instructions[i]->src[s];
+      mir_foreach_src((*instructions), s) {
+         unsigned src = instructions[i]->src[s];
 
-                        if (src < node_count) {
-                                unsigned readmask = mir_bytemask_of_read_components(instructions[i], src);
-                                add_dependency(last_write, src, readmask, instructions, i);
-                        }
-                }
+         if (src < node_count) {
+            unsigned readmask =
+               mir_bytemask_of_read_components(instructions[i], src);
+            add_dependency(last_write, src, readmask, instructions, i);
+         }
+      }
 
-                /* Create a list of dependencies for each type of load/store
-                 * instruction to prevent reordering. */
-                if (instructions[i]->type == TAG_LOAD_STORE_4 &&
-                    load_store_opcode_props[instructions[i]->op].props & LDST_ADDRESS) {
+      /* Create a list of dependencies for each type of load/store
+       * instruction to prevent reordering. */
+      if (instructions[i]->type == TAG_LOAD_STORE_4 &&
+          load_store_opcode_props[instructions[i]->op].props & LDST_ADDRESS) {
 
-                        unsigned type = instructions[i]->load_store.arg_reg |
-                                        instructions[i]->load_store.arg_comp;
+         unsigned type = instructions[i]->load_store.arg_reg |
+                         instructions[i]->load_store.arg_comp;
 
-                        unsigned idx;
-                        switch (type) {
-                        case LDST_SHARED: idx = 0; break;
-                        case LDST_SCRATCH: idx = 1; break;
-                        default: idx = 2; break;
-                        }
+         unsigned idx;
+         switch (type) {
+         case LDST_SHARED:
+            idx = 0;
+            break;
+         case LDST_SCRATCH:
+            idx = 1;
+            break;
+         default:
+            idx = 2;
+            break;
+         }
 
-                        unsigned prev = prev_ldst[idx];
+         unsigned prev = prev_ldst[idx];
 
-                        if (prev != ~0) {
-                                BITSET_WORD *dependents = instructions[prev]->dependents;
+         if (prev != ~0) {
+            BITSET_WORD *dependents = instructions[prev]->dependents;
 
-                                /* Already have the dependency */
-                                if (BITSET_TEST(dependents, i))
-                                        continue;
+            /* Already have the dependency */
+            if (BITSET_TEST(dependents, i))
+               continue;
 
-                                BITSET_SET(dependents, i);
-                                instructions[i]->nr_dependencies++;
-                        }
+            BITSET_SET(dependents, i);
+            instructions[i]->nr_dependencies++;
+         }
 
-                        prev_ldst[idx] = i;
-                }
+         prev_ldst[idx] = i;
+      }
 
-                if (dest < node_count) {
-                        add_dependency(last_read, dest, mask, instructions, i);
-                        add_dependency(last_write, dest, mask, instructions, i);
-                        mark_access(last_write, dest, mask, i);
-                }
+      if (dest < node_count) {
+         add_dependency(last_read, dest, mask, instructions, i);
+         add_dependency(last_write, dest, mask, instructions, i);
+         mark_access(last_write, dest, mask, i);
+      }
 
-                mir_foreach_src((*instructions), s) {
-                        unsigned src = instructions[i]->src[s];
+      mir_foreach_src((*instructions), s) {
+         unsigned src = instructions[i]->src[s];
 
-                        if (src < node_count) {
-                                unsigned readmask = mir_bytemask_of_read_components(instructions[i], src);
-                                mark_access(last_read, src, readmask, i);
-                        }
-                }
-        }
+         if (src < node_count) {
+            unsigned readmask =
+               mir_bytemask_of_read_components(instructions[i], src);
+            mark_access(last_read, src, readmask, i);
+         }
+      }
+   }
 
-        /* If there is a branch, all instructions depend on it, as interblock
-         * execution must be purely in-order */
+   /* If there is a branch, all instructions depend on it, as interblock
+    * execution must be purely in-order */
 
-        if (instructions[count - 1]->compact_branch) {
-                BITSET_WORD *dependents = instructions[count - 1]->dependents;
+   if (instructions[count - 1]->compact_branch) {
+      BITSET_WORD *dependents = instructions[count - 1]->dependents;
 
-                for (signed i = count - 2; i >= 0; --i) {
-                        if (BITSET_TEST(dependents, i))
-                                continue;
+      for (signed i = count - 2; i >= 0; --i) {
+         if (BITSET_TEST(dependents, i))
+            continue;
 
-                        BITSET_SET(dependents, i);
-                        instructions[i]->nr_dependencies++;
-                }
-        }
+         BITSET_SET(dependents, i);
+         instructions[i]->nr_dependencies++;
+      }
+   }
 
-        /* Free the intermediate structures */
-        for (unsigned i = 0; i < sz; ++i) {
-                util_dynarray_fini(&last_read[i]);
-                util_dynarray_fini(&last_write[i]);
-        }
+   /* Free the intermediate structures */
+   for (unsigned i = 0; i < sz; ++i) {
+      util_dynarray_fini(&last_read[i]);
+      util_dynarray_fini(&last_write[i]);
+   }
 
-        free(last_read);
-        free(last_write);
+   free(last_read);
+   free(last_write);
 }
 
 /* Does the mask cover more than a scalar? */
@@ -212,14 +223,14 @@ mir_create_dependency_graph(midgard_instruction **instructions, unsigned count,
 static bool
 is_single_component_mask(unsigned mask)
 {
-        int components = 0;
+   int components = 0;
 
-        for (int c = 0; c < 8; ++c) {
-                if (mask & (1 << c))
-                        components++;
-        }
+   for (int c = 0; c < 8; ++c) {
+      if (mask & (1 << c))
+         components++;
+   }
 
-        return components == 1;
+   return components == 1;
 }
 
 /* Helpers for scheudling */
@@ -227,29 +238,30 @@ is_single_component_mask(unsigned mask)
 static bool
 mir_is_scalar(midgard_instruction *ains)
 {
-        /* Do we try to use it as a vector op? */
-        if (!is_single_component_mask(ains->mask))
-                return false;
+   /* Do we try to use it as a vector op? */
+   if (!is_single_component_mask(ains->mask))
+      return false;
 
-        /* Otherwise, check mode hazards */
-        bool could_scalar = true;
-        unsigned szd = nir_alu_type_get_type_size(ains->dest_type);
-        unsigned sz0 = nir_alu_type_get_type_size(ains->src_types[0]);
-        unsigned sz1 = nir_alu_type_get_type_size(ains->src_types[1]);
+   /* Otherwise, check mode hazards */
+   bool could_scalar = true;
+   unsigned szd = nir_alu_type_get_type_size(ains->dest_type);
+   unsigned sz0 = nir_alu_type_get_type_size(ains->src_types[0]);
+   unsigned sz1 = nir_alu_type_get_type_size(ains->src_types[1]);
 
-        /* Only 16/32-bit can run on a scalar unit */
-        could_scalar &= (szd == 16) || (szd == 32);
+   /* Only 16/32-bit can run on a scalar unit */
+   could_scalar &= (szd == 16) || (szd == 32);
 
-        if (ains->src[0] != ~0)
-                could_scalar &= (sz0 == 16) || (sz0 == 32);
+   if (ains->src[0] != ~0)
+      could_scalar &= (sz0 == 16) || (sz0 == 32);
 
-        if (ains->src[1] != ~0)
-                could_scalar &= (sz1 == 16) || (sz1 == 32);
+   if (ains->src[1] != ~0)
+      could_scalar &= (sz1 == 16) || (sz1 == 32);
 
-        if (midgard_is_integer_out_op(ains->op) && ains->outmod != midgard_outmod_keeplo)
-                return false;
+   if (midgard_is_integer_out_op(ains->op) &&
+       ains->outmod != midgard_outmod_keeplo)
+      return false;
 
-        return could_scalar;
+   return could_scalar;
 }
 
 /* How many bytes does this ALU instruction add to the bundle? */
@@ -257,14 +269,14 @@ mir_is_scalar(midgard_instruction *ains)
 static unsigned
 bytes_for_instruction(midgard_instruction *ains)
 {
-        if (ains->unit & UNITS_ANY_VECTOR)
-                return sizeof(midgard_reg_info) + sizeof(midgard_vector_alu);
-        else if (ains->unit == ALU_ENAB_BRANCH)
-                return sizeof(midgard_branch_extended);
-        else if (ains->compact_branch)
-                return sizeof(uint16_t);
-        else
-                return sizeof(midgard_reg_info) + sizeof(midgard_scalar_alu);
+   if (ains->unit & UNITS_ANY_VECTOR)
+      return sizeof(midgard_reg_info) + sizeof(midgard_vector_alu);
+   else if (ains->unit == ALU_ENAB_BRANCH)
+      return sizeof(midgard_branch_extended);
+   else if (ains->compact_branch)
+      return sizeof(uint16_t);
+   else
+      return sizeof(midgard_reg_info) + sizeof(midgard_scalar_alu);
 }
 
 /* We would like to flatten the linked list of midgard_instructions in a bundle
@@ -273,32 +285,33 @@ bytes_for_instruction(midgard_instruction *ains)
 static midgard_instruction **
 flatten_mir(midgard_block *block, unsigned *len)
 {
-        *len = list_length(&block->base.instructions);
+   *len = list_length(&block->base.instructions);
 
-        if (!(*len))
-                return NULL;
+   if (!(*len))
+      return NULL;
 
-        midgard_instruction **instructions =
-                calloc(sizeof(midgard_instruction *), *len);
+   midgard_instruction **instructions =
+      calloc(sizeof(midgard_instruction *), *len);
 
-        unsigned i = 0;
+   unsigned i = 0;
 
-        mir_foreach_instr_in_block(block, ins)
-                instructions[i++] = ins;
+   mir_foreach_instr_in_block(block, ins)
+      instructions[i++] = ins;
 
-        return instructions;
+   return instructions;
 }
 
 /* The worklist is the set of instructions that can be scheduled now; that is,
  * the set of instructions with no remaining dependencies */
 
 static void
-mir_initialize_worklist(BITSET_WORD *worklist, midgard_instruction **instructions, unsigned count)
+mir_initialize_worklist(BITSET_WORD *worklist,
+                        midgard_instruction **instructions, unsigned count)
 {
-        for (unsigned i = 0; i < count; ++i) {
-                if (instructions[i]->nr_dependencies == 0)
-                        BITSET_SET(worklist, i);
-        }
+   for (unsigned i = 0; i < count; ++i) {
+      if (instructions[i]->nr_dependencies == 0)
+         BITSET_SET(worklist, i);
+   }
 }
 
 /* Update the worklist after an instruction terminates. Remove its edges from
@@ -306,37 +319,37 @@ mir_initialize_worklist(BITSET_WORD *worklist, midgard_instruction **instruction
  * worklist */
 
 static void
-mir_update_worklist(
-                BITSET_WORD *worklist, unsigned count,
-                midgard_instruction **instructions, midgard_instruction *done)
+mir_update_worklist(BITSET_WORD *worklist, unsigned count,
+                    midgard_instruction **instructions,
+                    midgard_instruction *done)
 {
-        /* Sanity check: if no instruction terminated, there is nothing to do.
-         * If the instruction that terminated had dependencies, that makes no
-         * sense and means we messed up the worklist. Finally, as the purpose
-         * of this routine is to update dependents, we abort early if there are
-         * no dependents defined. */
+   /* Sanity check: if no instruction terminated, there is nothing to do.
+    * If the instruction that terminated had dependencies, that makes no
+    * sense and means we messed up the worklist. Finally, as the purpose
+    * of this routine is to update dependents, we abort early if there are
+    * no dependents defined. */
 
-        if (!done)
-                return;
+   if (!done)
+      return;
 
-        assert(done->nr_dependencies == 0);
+   assert(done->nr_dependencies == 0);
 
-        if (!done->dependents)
-                return;
+   if (!done->dependents)
+      return;
 
-        /* We have an instruction with dependents. Iterate each dependent to
-         * remove one dependency (`done`), adding dependents to the worklist
-         * where possible. */
+   /* We have an instruction with dependents. Iterate each dependent to
+    * remove one dependency (`done`), adding dependents to the worklist
+    * where possible. */
 
-        unsigned i;
-        BITSET_FOREACH_SET(i, done->dependents, count) {
-                assert(instructions[i]->nr_dependencies);
+   unsigned i;
+   BITSET_FOREACH_SET(i, done->dependents, count) {
+      assert(instructions[i]->nr_dependencies);
 
-                if (!(--instructions[i]->nr_dependencies))
-                        BITSET_SET(worklist, i);
-        }
+      if (!(--instructions[i]->nr_dependencies))
+         BITSET_SET(worklist, i);
+   }
 
-        free(done->dependents);
+   free(done->dependents);
 }
 
 /* While scheduling, we need to choose instructions satisfying certain
@@ -345,184 +358,181 @@ mir_update_worklist(
  * given predicate. */
 
 struct midgard_predicate {
-        /* TAG or ~0 for dont-care */
-        unsigned tag;
+   /* TAG or ~0 for dont-care */
+   unsigned tag;
 
-        /* True if we want to pop off the chosen instruction */
-        bool destructive;
+   /* True if we want to pop off the chosen instruction */
+   bool destructive;
 
-        /* For ALU, choose only this unit */
-        unsigned unit;
+   /* For ALU, choose only this unit */
+   unsigned unit;
 
-        /* State for bundle constants. constants is the actual constants
-         * for the bundle. constant_count is the number of bytes (up to
-         * 16) currently in use for constants. When picking in destructive
-         * mode, the constants array will be updated, and the instruction
-         * will be adjusted to index into the constants array */
+   /* State for bundle constants. constants is the actual constants
+    * for the bundle. constant_count is the number of bytes (up to
+    * 16) currently in use for constants. When picking in destructive
+    * mode, the constants array will be updated, and the instruction
+    * will be adjusted to index into the constants array */
 
-        midgard_constants *constants;
-        unsigned constant_mask;
+   midgard_constants *constants;
+   unsigned constant_mask;
 
-        /* Exclude this destination (if not ~0) */
-        unsigned exclude;
+   /* Exclude this destination (if not ~0) */
+   unsigned exclude;
 
-        /* Don't schedule instructions consuming conditionals (since we already
-         * scheduled one). Excludes conditional branches and csel */
-        bool no_cond;
+   /* Don't schedule instructions consuming conditionals (since we already
+    * scheduled one). Excludes conditional branches and csel */
+   bool no_cond;
 
-        /* Require (or reject) a minimal mask and (if nonzero) given
-         * destination. Used for writeout optimizations */
+   /* Require (or reject) a minimal mask and (if nonzero) given
+    * destination. Used for writeout optimizations */
 
-        unsigned mask;
-        unsigned no_mask;
-        unsigned dest;
+   unsigned mask;
+   unsigned no_mask;
+   unsigned dest;
 
-        /* Whether to not-care/only/never schedule imov/fmov instructions This
-         * allows non-move instructions to get priority on each unit */
-        unsigned move_mode;
+   /* Whether to not-care/only/never schedule imov/fmov instructions This
+    * allows non-move instructions to get priority on each unit */
+   unsigned move_mode;
 
-        /* For load/store: how many pipeline registers are in use? The two
-         * scheduled instructions cannot use more than the 256-bits of pipeline
-         * space available or RA will fail (as it would run out of pipeline
-         * registers and fail to spill without breaking the schedule) */
+   /* For load/store: how many pipeline registers are in use? The two
+    * scheduled instructions cannot use more than the 256-bits of pipeline
+    * space available or RA will fail (as it would run out of pipeline
+    * registers and fail to spill without breaking the schedule) */
 
-        unsigned pipeline_count;
+   unsigned pipeline_count;
 
-        /* For load/store: is a ST_VARY.a32 instruction scheduled into the
-         * bundle? is a non-ST_VARY.a32 instruction scheduled? Potential
-         * hardware issue, unknown cause.
-         */
-        bool any_st_vary_a32, any_non_st_vary_a32;
+   /* For load/store: is a ST_VARY.a32 instruction scheduled into the
+    * bundle? is a non-ST_VARY.a32 instruction scheduled? Potential
+    * hardware issue, unknown cause.
+    */
+   bool any_st_vary_a32, any_non_st_vary_a32;
 };
 
 static bool
 mir_adjust_constant(midgard_instruction *ins, unsigned src,
-                unsigned *bundle_constant_mask,
-                unsigned *comp_mapping,
-                uint8_t *bundle_constants,
-                bool upper)
+                    unsigned *bundle_constant_mask, unsigned *comp_mapping,
+                    uint8_t *bundle_constants, bool upper)
 {
-        unsigned type_size = nir_alu_type_get_type_size(ins->src_types[src]) / 8;
-        unsigned type_shift = util_logbase2(type_size);
-        unsigned max_comp = mir_components_for_type(ins->src_types[src]);
-        unsigned comp_mask = mir_from_bytemask(mir_round_bytemask_up(
-                                mir_bytemask_of_read_components_index(ins, src),
-                                type_size * 8),
-                                               type_size * 8);
-        unsigned type_mask = (1 << type_size) - 1;
+   unsigned type_size = nir_alu_type_get_type_size(ins->src_types[src]) / 8;
+   unsigned type_shift = util_logbase2(type_size);
+   unsigned max_comp = mir_components_for_type(ins->src_types[src]);
+   unsigned comp_mask = mir_from_bytemask(
+      mir_round_bytemask_up(mir_bytemask_of_read_components_index(ins, src),
+                            type_size * 8),
+      type_size * 8);
+   unsigned type_mask = (1 << type_size) - 1;
 
-        /* Upper only makes sense for 16-bit */
-        if (type_size != 16 && upper)
-                return false;
+   /* Upper only makes sense for 16-bit */
+   if (type_size != 16 && upper)
+      return false;
 
-        /* For 16-bit, we need to stay on either upper or lower halves to avoid
-         * disrupting the swizzle */
-        unsigned start = upper ? 8 : 0;
-        unsigned length = (type_size == 2) ? 8 : 16;
+   /* For 16-bit, we need to stay on either upper or lower halves to avoid
+    * disrupting the swizzle */
+   unsigned start = upper ? 8 : 0;
+   unsigned length = (type_size == 2) ? 8 : 16;
 
-        for (unsigned comp = 0; comp < max_comp; comp++) {
-                if (!(comp_mask & (1 << comp)))
-                        continue;
+   for (unsigned comp = 0; comp < max_comp; comp++) {
+      if (!(comp_mask & (1 << comp)))
+         continue;
 
-                uint8_t *constantp = ins->constants.u8 + (type_size * comp);
-                unsigned best_reuse_bytes = 0;
-                signed best_place = -1;
-                unsigned i, j;
+      uint8_t *constantp = ins->constants.u8 + (type_size * comp);
+      unsigned best_reuse_bytes = 0;
+      signed best_place = -1;
+      unsigned i, j;
 
-                for (i = start; i < (start + length); i += type_size) {
-                        unsigned reuse_bytes = 0;
+      for (i = start; i < (start + length); i += type_size) {
+         unsigned reuse_bytes = 0;
 
-                        for (j = 0; j < type_size; j++) {
-                                if (!(*bundle_constant_mask & (1 << (i + j))))
-                                        continue;
-                                if (constantp[j] != bundle_constants[i + j])
-                                        break;
-                                if ((i + j) > (start + length))
-                                        break;
+         for (j = 0; j < type_size; j++) {
+            if (!(*bundle_constant_mask & (1 << (i + j))))
+               continue;
+            if (constantp[j] != bundle_constants[i + j])
+               break;
+            if ((i + j) > (start + length))
+               break;
 
-                                reuse_bytes++;
-                        }
+            reuse_bytes++;
+         }
 
-                        /* Select the place where existing bytes can be
-                         * reused so we leave empty slots to others
-                         */
-                        if (j == type_size &&
-                            (reuse_bytes > best_reuse_bytes || best_place < 0)) {
-                                best_reuse_bytes = reuse_bytes;
-                                best_place = i;
-                                break;
-                        }
-                }
+         /* Select the place where existing bytes can be
+          * reused so we leave empty slots to others
+          */
+         if (j == type_size &&
+             (reuse_bytes > best_reuse_bytes || best_place < 0)) {
+            best_reuse_bytes = reuse_bytes;
+            best_place = i;
+            break;
+         }
+      }
 
-                /* This component couldn't fit in the remaining constant slot,
-                 * no need check the remaining components, bail out now
-                 */
-                if (best_place < 0)
-                        return false;
+      /* This component couldn't fit in the remaining constant slot,
+       * no need check the remaining components, bail out now
+       */
+      if (best_place < 0)
+         return false;
 
-                memcpy(&bundle_constants[i], constantp, type_size);
-                *bundle_constant_mask |= type_mask << best_place;
-                comp_mapping[comp] = best_place >> type_shift;
-        }
+      memcpy(&bundle_constants[i], constantp, type_size);
+      *bundle_constant_mask |= type_mask << best_place;
+      comp_mapping[comp] = best_place >> type_shift;
+   }
 
-        return true;
+   return true;
 }
 
 /* For an instruction that can fit, adjust it to fit and update the constants
  * array, in destructive mode. Returns whether the fitting was successful. */
 
 static bool
-mir_adjust_constants(midgard_instruction *ins,
-                struct midgard_predicate *pred,
-                bool destructive)
+mir_adjust_constants(midgard_instruction *ins, struct midgard_predicate *pred,
+                     bool destructive)
 {
-        /* No constant, nothing to adjust */
-        if (!ins->has_constants)
-                return true;
+   /* No constant, nothing to adjust */
+   if (!ins->has_constants)
+      return true;
 
-        unsigned r_constant = SSA_FIXED_REGISTER(REGISTER_CONSTANT);
-        unsigned bundle_constant_mask = pred->constant_mask;
-        unsigned comp_mapping[2][16] = { };
-        uint8_t bundle_constants[16];
+   unsigned r_constant = SSA_FIXED_REGISTER(REGISTER_CONSTANT);
+   unsigned bundle_constant_mask = pred->constant_mask;
+   unsigned comp_mapping[2][16] = {};
+   uint8_t bundle_constants[16];
 
-        memcpy(bundle_constants, pred->constants, 16);
+   memcpy(bundle_constants, pred->constants, 16);
 
-        /* Let's try to find a place for each active component of the constant
-         * register.
-         */
-        for (unsigned src = 0; src < 2; ++src) {
-                if (ins->src[src] != SSA_FIXED_REGISTER(REGISTER_CONSTANT))
-                        continue;
+   /* Let's try to find a place for each active component of the constant
+    * register.
+    */
+   for (unsigned src = 0; src < 2; ++src) {
+      if (ins->src[src] != SSA_FIXED_REGISTER(REGISTER_CONSTANT))
+         continue;
 
-                /* First, try lower half (or whole for !16) */
-                if (mir_adjust_constant(ins, src, &bundle_constant_mask,
-                                comp_mapping[src], bundle_constants, false))
-                        continue;
+      /* First, try lower half (or whole for !16) */
+      if (mir_adjust_constant(ins, src, &bundle_constant_mask,
+                              comp_mapping[src], bundle_constants, false))
+         continue;
 
-                /* Next, try upper half */
-                if (mir_adjust_constant(ins, src, &bundle_constant_mask,
-                                comp_mapping[src], bundle_constants, true))
-                        continue;
+      /* Next, try upper half */
+      if (mir_adjust_constant(ins, src, &bundle_constant_mask,
+                              comp_mapping[src], bundle_constants, true))
+         continue;
 
-                /* Otherwise bail */
-                return false;
-        }
+      /* Otherwise bail */
+      return false;
+   }
 
-        /* If non-destructive, we're done */
-        if (!destructive)
-                return true;
+   /* If non-destructive, we're done */
+   if (!destructive)
+      return true;
 
-	/* Otherwise update the constant_mask and constant values */
-        pred->constant_mask = bundle_constant_mask;
-        memcpy(pred->constants, bundle_constants, 16);
+   /* Otherwise update the constant_mask and constant values */
+   pred->constant_mask = bundle_constant_mask;
+   memcpy(pred->constants, bundle_constants, 16);
 
-        /* Use comp_mapping as a swizzle */
-        mir_foreach_src(ins, s) {
-                if (ins->src[s] == r_constant)
-                        mir_compose_swizzle(ins->swizzle[s], comp_mapping[s], ins->swizzle[s]);
-        }
+   /* Use comp_mapping as a swizzle */
+   mir_foreach_src(ins, s) {
+      if (ins->src[s] == r_constant)
+         mir_compose_swizzle(ins->swizzle[s], comp_mapping[s], ins->swizzle[s]);
+   }
 
-        return true;
+   return true;
 }
 
 /* Conservative estimate of the pipeline registers required for load/store */
@@ -530,27 +540,28 @@ mir_adjust_constants(midgard_instruction *ins,
 static unsigned
 mir_pipeline_count(midgard_instruction *ins)
 {
-        unsigned bytecount = 0;
+   unsigned bytecount = 0;
 
-        mir_foreach_src(ins, i) {
-                /* Skip empty source  */
-                if (ins->src[i] == ~0) continue;
+   mir_foreach_src(ins, i) {
+      /* Skip empty source  */
+      if (ins->src[i] == ~0)
+         continue;
 
-                if (i == 0) {
-                        /* First source is a vector, worst-case the mask */
-                        unsigned bytemask = mir_bytemask_of_read_components_index(ins, i);
-                        unsigned max = util_logbase2(bytemask) + 1;
-                        bytecount += max;
-                } else {
-                        /* Sources 1 on are scalars */
-                        bytecount += 4;
-                }
-        }
+      if (i == 0) {
+         /* First source is a vector, worst-case the mask */
+         unsigned bytemask = mir_bytemask_of_read_components_index(ins, i);
+         unsigned max = util_logbase2(bytemask) + 1;
+         bytecount += max;
+      } else {
+         /* Sources 1 on are scalars */
+         bytecount += 4;
+      }
+   }
 
-        unsigned dwords = DIV_ROUND_UP(bytecount, 16);
-        assert(dwords <= 2);
+   unsigned dwords = DIV_ROUND_UP(bytecount, 16);
+   assert(dwords <= 2);
 
-        return dwords;
+   return dwords;
 }
 
 /* Matches FADD x, x with modifiers compatible. Since x + x = x * 2, for
@@ -559,56 +570,56 @@ mir_pipeline_count(midgard_instruction *ins)
 static bool
 mir_is_add_2(midgard_instruction *ins)
 {
-        if (ins->op != midgard_alu_op_fadd)
-                return false;
+   if (ins->op != midgard_alu_op_fadd)
+      return false;
 
-        if (ins->src[0] != ins->src[1])
-                return false;
+   if (ins->src[0] != ins->src[1])
+      return false;
 
-        if (ins->src_types[0] != ins->src_types[1])
-                return false;
+   if (ins->src_types[0] != ins->src_types[1])
+      return false;
 
-        for (unsigned i = 0; i < MIR_VEC_COMPONENTS; ++i) {
-                if (ins->swizzle[0][i] != ins->swizzle[1][i])
-                        return false;
-        }
+   for (unsigned i = 0; i < MIR_VEC_COMPONENTS; ++i) {
+      if (ins->swizzle[0][i] != ins->swizzle[1][i])
+         return false;
+   }
 
-        if (ins->src_abs[0] != ins->src_abs[1])
-                return false;
+   if (ins->src_abs[0] != ins->src_abs[1])
+      return false;
 
-        if (ins->src_neg[0] != ins->src_neg[1])
-                return false;
+   if (ins->src_neg[0] != ins->src_neg[1])
+      return false;
 
-        return true;
+   return true;
 }
 
 static void
 mir_adjust_unit(midgard_instruction *ins, unsigned unit)
 {
-        /* FADD x, x = FMUL x, #2 */
-        if (mir_is_add_2(ins) && (unit & (UNITS_MUL | UNIT_VLUT))) {
-                ins->op = midgard_alu_op_fmul;
+   /* FADD x, x = FMUL x, #2 */
+   if (mir_is_add_2(ins) && (unit & (UNITS_MUL | UNIT_VLUT))) {
+      ins->op = midgard_alu_op_fmul;
 
-                ins->src[1] = ~0;
-                ins->src_abs[1] = false;
-                ins->src_neg[1] = false;
+      ins->src[1] = ~0;
+      ins->src_abs[1] = false;
+      ins->src_neg[1] = false;
 
-                ins->has_inline_constant = true;
-                ins->inline_constant = _mesa_float_to_half(2.0);
-        }
+      ins->has_inline_constant = true;
+      ins->inline_constant = _mesa_float_to_half(2.0);
+   }
 }
 
 static unsigned
 mir_has_unit(midgard_instruction *ins, unsigned unit)
 {
-        if (alu_opcode_props[ins->op].props & unit)
-                return true;
+   if (alu_opcode_props[ins->op].props & unit)
+      return true;
 
-        /* FADD x, x can run on any adder or any multiplier */
-        if (mir_is_add_2(ins))
-                return true;
+   /* FADD x, x can run on any adder or any multiplier */
+   if (mir_is_add_2(ins))
+      return true;
 
-        return false;
+   return false;
 }
 
 /* Net change in liveness if an instruction were scheduled. Loosely based on
@@ -617,265 +628,265 @@ mir_has_unit(midgard_instruction *ins, unsigned unit)
 static int
 mir_live_effect(uint16_t *liveness, midgard_instruction *ins, bool destructive)
 {
-        /* TODO: what if dest is used multiple times? */
-        int free_live = 0;
+   /* TODO: what if dest is used multiple times? */
+   int free_live = 0;
 
-        if (ins->dest < SSA_FIXED_MINIMUM) {
-                unsigned bytemask = mir_bytemask(ins);
-                bytemask = util_next_power_of_two(bytemask + 1) - 1;
-                free_live += util_bitcount(liveness[ins->dest] & bytemask);
+   if (ins->dest < SSA_FIXED_MINIMUM) {
+      unsigned bytemask = mir_bytemask(ins);
+      bytemask = util_next_power_of_two(bytemask + 1) - 1;
+      free_live += util_bitcount(liveness[ins->dest] & bytemask);
 
-                if (destructive)
-                        liveness[ins->dest] &= ~bytemask;
-        }
+      if (destructive)
+         liveness[ins->dest] &= ~bytemask;
+   }
 
-        int new_live = 0;
+   int new_live = 0;
 
-        mir_foreach_src(ins, s) {
-                unsigned S = ins->src[s];
+   mir_foreach_src(ins, s) {
+      unsigned S = ins->src[s];
 
-                bool dupe = false;
+      bool dupe = false;
 
-                for (unsigned q = 0; q < s; ++q)
-                        dupe |= (ins->src[q] == S);
+      for (unsigned q = 0; q < s; ++q)
+         dupe |= (ins->src[q] == S);
 
-                if (dupe)
-                        continue;
+      if (dupe)
+         continue;
 
-                if (S < SSA_FIXED_MINIMUM) {
-                        unsigned bytemask = mir_bytemask_of_read_components(ins, S);
-                        bytemask = util_next_power_of_two(bytemask + 1) - 1;
+      if (S < SSA_FIXED_MINIMUM) {
+         unsigned bytemask = mir_bytemask_of_read_components(ins, S);
+         bytemask = util_next_power_of_two(bytemask + 1) - 1;
 
-                        /* Count only the new components */
-                        new_live += util_bitcount(bytemask & ~(liveness[S]));
+         /* Count only the new components */
+         new_live += util_bitcount(bytemask & ~(liveness[S]));
 
-                        if (destructive)
-                                liveness[S] |= bytemask;
-                }
-        }
+         if (destructive)
+            liveness[S] |= bytemask;
+      }
+   }
 
-        return new_live - free_live;
+   return new_live - free_live;
 }
 
 static midgard_instruction *
-mir_choose_instruction(
-                midgard_instruction **instructions,
-                uint16_t *liveness,
-                BITSET_WORD *worklist, unsigned count,
-                struct midgard_predicate *predicate)
+mir_choose_instruction(midgard_instruction **instructions, uint16_t *liveness,
+                       BITSET_WORD *worklist, unsigned count,
+                       struct midgard_predicate *predicate)
 {
-        /* Parse the predicate */
-        unsigned tag = predicate->tag;
-        unsigned unit = predicate->unit;
-        bool scalar = (unit != ~0) && (unit & UNITS_SCALAR);
-        bool no_cond = predicate->no_cond;
+   /* Parse the predicate */
+   unsigned tag = predicate->tag;
+   unsigned unit = predicate->unit;
+   bool scalar = (unit != ~0) && (unit & UNITS_SCALAR);
+   bool no_cond = predicate->no_cond;
 
-        unsigned mask = predicate->mask;
-        unsigned dest = predicate->dest;
-        bool needs_dest = mask & 0xF;
+   unsigned mask = predicate->mask;
+   unsigned dest = predicate->dest;
+   bool needs_dest = mask & 0xF;
 
-        /* Iterate to find the best instruction satisfying the predicate */
-        unsigned i;
+   /* Iterate to find the best instruction satisfying the predicate */
+   unsigned i;
 
-        signed best_index = -1;
-        signed best_effect = INT_MAX;
-        bool best_conditional = false;
+   signed best_index = -1;
+   signed best_effect = INT_MAX;
+   bool best_conditional = false;
 
-        /* Enforce a simple metric limiting distance to keep down register
-         * pressure. TOOD: replace with liveness tracking for much better
-         * results */
+   /* Enforce a simple metric limiting distance to keep down register
+    * pressure. TOOD: replace with liveness tracking for much better
+    * results */
 
-        unsigned max_active = 0;
-        unsigned max_distance = 36;
+   unsigned max_active = 0;
+   unsigned max_distance = 36;
 
 #ifndef NDEBUG
-        /* Force in-order scheduling */
-        if (midgard_debug & MIDGARD_DBG_INORDER)
-                max_distance = 1;
+   /* Force in-order scheduling */
+   if (midgard_debug & MIDGARD_DBG_INORDER)
+      max_distance = 1;
 #endif
 
-        BITSET_FOREACH_SET(i, worklist, count) {
-                max_active = MAX2(max_active, i);
-        }
+   BITSET_FOREACH_SET(i, worklist, count) {
+      max_active = MAX2(max_active, i);
+   }
 
-        BITSET_FOREACH_SET(i, worklist, count) {
-                if ((max_active - i) >= max_distance)
-                        continue;
+   BITSET_FOREACH_SET(i, worklist, count) {
+      if ((max_active - i) >= max_distance)
+         continue;
 
-                if (tag != ~0 && instructions[i]->type != tag)
-                        continue;
+      if (tag != ~0 && instructions[i]->type != tag)
+         continue;
 
-                bool alu = (instructions[i]->type == TAG_ALU_4);
-                bool ldst = (instructions[i]->type == TAG_LOAD_STORE_4);
+      bool alu = (instructions[i]->type == TAG_ALU_4);
+      bool ldst = (instructions[i]->type == TAG_LOAD_STORE_4);
 
-                bool branch = alu && (unit == ALU_ENAB_BR_COMPACT);
-                bool is_move = alu &&
-                        (instructions[i]->op == midgard_alu_op_imov ||
-                         instructions[i]->op == midgard_alu_op_fmov);
+      bool branch = alu && (unit == ALU_ENAB_BR_COMPACT);
+      bool is_move = alu && (instructions[i]->op == midgard_alu_op_imov ||
+                             instructions[i]->op == midgard_alu_op_fmov);
 
-                if (predicate->exclude != ~0 && instructions[i]->dest == predicate->exclude)
-                        continue;
+      if (predicate->exclude != ~0 &&
+          instructions[i]->dest == predicate->exclude)
+         continue;
 
-                if (alu && !branch && unit != ~0 && !(mir_has_unit(instructions[i], unit)))
-                        continue;
+      if (alu && !branch && unit != ~0 &&
+          !(mir_has_unit(instructions[i], unit)))
+         continue;
 
-                /* 0: don't care, 1: no moves, 2: only moves */
-                if (predicate->move_mode && ((predicate->move_mode - 1) != is_move))
-                        continue;
+      /* 0: don't care, 1: no moves, 2: only moves */
+      if (predicate->move_mode && ((predicate->move_mode - 1) != is_move))
+         continue;
 
-                if (branch && !instructions[i]->compact_branch)
-                        continue;
+      if (branch && !instructions[i]->compact_branch)
+         continue;
 
-                if (alu && scalar && !mir_is_scalar(instructions[i]))
-                        continue;
+      if (alu && scalar && !mir_is_scalar(instructions[i]))
+         continue;
 
-                if (alu && predicate->constants && !mir_adjust_constants(instructions[i], predicate, false))
-                        continue;
+      if (alu && predicate->constants &&
+          !mir_adjust_constants(instructions[i], predicate, false))
+         continue;
 
-                if (needs_dest && instructions[i]->dest != dest)
-                        continue;
+      if (needs_dest && instructions[i]->dest != dest)
+         continue;
 
-                if (mask && ((~instructions[i]->mask) & mask))
-                        continue;
+      if (mask && ((~instructions[i]->mask) & mask))
+         continue;
 
-                if (instructions[i]->mask & predicate->no_mask)
-                        continue;
+      if (instructions[i]->mask & predicate->no_mask)
+         continue;
 
-                if (ldst && mir_pipeline_count(instructions[i]) + predicate->pipeline_count > 2)
-                        continue;
+      if (ldst &&
+          mir_pipeline_count(instructions[i]) + predicate->pipeline_count > 2)
+         continue;
 
-                bool st_vary_a32 = (instructions[i]->op == midgard_op_st_vary_32);
+      bool st_vary_a32 = (instructions[i]->op == midgard_op_st_vary_32);
 
-                if (ldst && predicate->any_non_st_vary_a32 && st_vary_a32)
-                        continue;
+      if (ldst && predicate->any_non_st_vary_a32 && st_vary_a32)
+         continue;
 
-                if (ldst && predicate->any_st_vary_a32 && !st_vary_a32)
-                        continue;
+      if (ldst && predicate->any_st_vary_a32 && !st_vary_a32)
+         continue;
 
-                bool conditional = alu && !branch && OP_IS_CSEL(instructions[i]->op);
-                conditional |= (branch && instructions[i]->branch.conditional);
+      bool conditional = alu && !branch && OP_IS_CSEL(instructions[i]->op);
+      conditional |= (branch && instructions[i]->branch.conditional);
 
-                if (conditional && no_cond)
-                        continue;
+      if (conditional && no_cond)
+         continue;
 
-                int effect = mir_live_effect(liveness, instructions[i], false);
+      int effect = mir_live_effect(liveness, instructions[i], false);
 
-                if (effect > best_effect)
-                        continue;
+      if (effect > best_effect)
+         continue;
 
-                if (effect == best_effect && (signed) i < best_index)
-                        continue;
+      if (effect == best_effect && (signed)i < best_index)
+         continue;
 
-                best_effect = effect;
-                best_index = i;
-                best_conditional = conditional;
-        }
+      best_effect = effect;
+      best_index = i;
+      best_conditional = conditional;
+   }
 
-        /* Did we find anything?  */
+   /* Did we find anything?  */
 
-        if (best_index < 0)
-                return NULL;
+   if (best_index < 0)
+      return NULL;
 
-        /* If we found something, remove it from the worklist */
-        assert(best_index < count);
-        midgard_instruction *I = instructions[best_index];
+   /* If we found something, remove it from the worklist */
+   assert(best_index < count);
+   midgard_instruction *I = instructions[best_index];
 
-        if (predicate->destructive) {
-                BITSET_CLEAR(worklist, best_index);
+   if (predicate->destructive) {
+      BITSET_CLEAR(worklist, best_index);
 
-                if (I->type == TAG_ALU_4)
-                        mir_adjust_constants(instructions[best_index], predicate, true);
+      if (I->type == TAG_ALU_4)
+         mir_adjust_constants(instructions[best_index], predicate, true);
 
-                if (I->type == TAG_LOAD_STORE_4) {
-                        predicate->pipeline_count += mir_pipeline_count(instructions[best_index]);
+      if (I->type == TAG_LOAD_STORE_4) {
+         predicate->pipeline_count +=
+            mir_pipeline_count(instructions[best_index]);
 
-                        if (instructions[best_index]->op == midgard_op_st_vary_32)
-                                predicate->any_st_vary_a32 = true;
-                        else
-                                predicate->any_non_st_vary_a32 = true;
-                }
+         if (instructions[best_index]->op == midgard_op_st_vary_32)
+            predicate->any_st_vary_a32 = true;
+         else
+            predicate->any_non_st_vary_a32 = true;
+      }
 
-                if (I->type == TAG_ALU_4)
-                        mir_adjust_unit(instructions[best_index], unit);
+      if (I->type == TAG_ALU_4)
+         mir_adjust_unit(instructions[best_index], unit);
 
-                /* Once we schedule a conditional, we can't again */
-                predicate->no_cond |= best_conditional;
-                mir_live_effect(liveness, instructions[best_index], true);
-        }
+      /* Once we schedule a conditional, we can't again */
+      predicate->no_cond |= best_conditional;
+      mir_live_effect(liveness, instructions[best_index], true);
+   }
 
-        return I;
+   return I;
 }
 
 /* Still, we don't choose instructions in a vacuum. We need a way to choose the
  * best bundle type (ALU, load/store, texture). Nondestructive. */
 
 static unsigned
-mir_choose_bundle(
-                midgard_instruction **instructions,
-                uint16_t *liveness,
-                BITSET_WORD *worklist, unsigned count,
-                unsigned num_ldst)
+mir_choose_bundle(midgard_instruction **instructions, uint16_t *liveness,
+                  BITSET_WORD *worklist, unsigned count, unsigned num_ldst)
 {
-        /* At the moment, our algorithm is very simple - use the bundle of the
-         * best instruction, regardless of what else could be scheduled
-         * alongside it. This is not optimal but it works okay for in-order */
+   /* At the moment, our algorithm is very simple - use the bundle of the
+    * best instruction, regardless of what else could be scheduled
+    * alongside it. This is not optimal but it works okay for in-order */
 
-        struct midgard_predicate predicate = {
-                .tag = ~0,
-                .unit = ~0,
-                .destructive = false,
-                .exclude = ~0,
-        };
+   struct midgard_predicate predicate = {
+      .tag = ~0,
+      .unit = ~0,
+      .destructive = false,
+      .exclude = ~0,
+   };
 
-        midgard_instruction *chosen = mir_choose_instruction(instructions, liveness, worklist, count, &predicate);
+   midgard_instruction *chosen = mir_choose_instruction(
+      instructions, liveness, worklist, count, &predicate);
 
-        if (chosen && chosen->type == TAG_LOAD_STORE_4 && !(num_ldst % 2)) {
-                /* Try to schedule load/store ops in pairs */
+   if (chosen && chosen->type == TAG_LOAD_STORE_4 && !(num_ldst % 2)) {
+      /* Try to schedule load/store ops in pairs */
 
-                predicate.exclude = chosen->dest;
-                predicate.tag = TAG_LOAD_STORE_4;
+      predicate.exclude = chosen->dest;
+      predicate.tag = TAG_LOAD_STORE_4;
 
-                chosen = mir_choose_instruction(instructions, liveness, worklist, count, &predicate);
-                if (chosen)
-                        return TAG_LOAD_STORE_4;
+      chosen = mir_choose_instruction(instructions, liveness, worklist, count,
+                                      &predicate);
+      if (chosen)
+         return TAG_LOAD_STORE_4;
 
-                predicate.tag = ~0;
+      predicate.tag = ~0;
 
-                chosen = mir_choose_instruction(instructions, liveness, worklist, count, &predicate);
-                assert(chosen == NULL || chosen->type != TAG_LOAD_STORE_4);
+      chosen = mir_choose_instruction(instructions, liveness, worklist, count,
+                                      &predicate);
+      assert(chosen == NULL || chosen->type != TAG_LOAD_STORE_4);
 
-                if (chosen)
-                        return chosen->type;
-                else
-                        return TAG_LOAD_STORE_4;
-        }
+      if (chosen)
+         return chosen->type;
+      else
+         return TAG_LOAD_STORE_4;
+   }
 
-        if (chosen)
-                return chosen->type;
-        else
-                return ~0;
+   if (chosen)
+      return chosen->type;
+   else
+      return ~0;
 }
 
 /* We want to choose an ALU instruction filling a given unit */
 static void
-mir_choose_alu(midgard_instruction **slot,
-                midgard_instruction **instructions,
-                uint16_t *liveness,
-                BITSET_WORD *worklist, unsigned len,
-                struct midgard_predicate *predicate,
-                unsigned unit)
+mir_choose_alu(midgard_instruction **slot, midgard_instruction **instructions,
+               uint16_t *liveness, BITSET_WORD *worklist, unsigned len,
+               struct midgard_predicate *predicate, unsigned unit)
 {
-        /* Did we already schedule to this slot? */
-        if ((*slot) != NULL)
-                return;
+   /* Did we already schedule to this slot? */
+   if ((*slot) != NULL)
+      return;
 
-        /* Try to schedule something, if not */
-        predicate->unit = unit;
-        *slot = mir_choose_instruction(instructions, liveness, worklist, len, predicate);
+   /* Try to schedule something, if not */
+   predicate->unit = unit;
+   *slot =
+      mir_choose_instruction(instructions, liveness, worklist, len, predicate);
 
-        /* Store unit upon scheduling */
-        if (*slot && !((*slot)->compact_branch))
-                (*slot)->unit = unit;
+   /* Store unit upon scheduling */
+   if (*slot && !((*slot)->compact_branch))
+      (*slot)->unit = unit;
 }
 
 /* When we are scheduling a branch/csel, we need the consumed condition in the
@@ -893,54 +904,51 @@ mir_choose_alu(midgard_instruction **slot,
  */
 
 static unsigned
-mir_comparison_mobile(
-                compiler_context *ctx,
-                midgard_instruction **instructions,
-                struct midgard_predicate *predicate,
-                unsigned count,
-                unsigned cond)
+mir_comparison_mobile(compiler_context *ctx, midgard_instruction **instructions,
+                      struct midgard_predicate *predicate, unsigned count,
+                      unsigned cond)
 {
-        if (!mir_single_use(ctx, cond))
-                return ~0;
+   if (!mir_single_use(ctx, cond))
+      return ~0;
 
-        unsigned ret = ~0;
+   unsigned ret = ~0;
 
-        for (unsigned i = 0; i < count; ++i) {
-                if (instructions[i]->dest != cond)
-                        continue;
+   for (unsigned i = 0; i < count; ++i) {
+      if (instructions[i]->dest != cond)
+         continue;
 
-                /* Must fit in an ALU bundle */
-                if (instructions[i]->type != TAG_ALU_4)
-                        return ~0;
+      /* Must fit in an ALU bundle */
+      if (instructions[i]->type != TAG_ALU_4)
+         return ~0;
 
-                /* If it would itself require a condition, that's recursive */
-                if (OP_IS_CSEL(instructions[i]->op))
-                        return ~0;
+      /* If it would itself require a condition, that's recursive */
+      if (OP_IS_CSEL(instructions[i]->op))
+         return ~0;
 
-                /* We'll need to rewrite to .w but that doesn't work for vector
-                 * ops that don't replicate (ball/bany), so bail there */
+      /* We'll need to rewrite to .w but that doesn't work for vector
+       * ops that don't replicate (ball/bany), so bail there */
 
-                if (GET_CHANNEL_COUNT(alu_opcode_props[instructions[i]->op].props))
-                        return ~0;
+      if (GET_CHANNEL_COUNT(alu_opcode_props[instructions[i]->op].props))
+         return ~0;
 
-                /* Ensure it will fit with constants */
+      /* Ensure it will fit with constants */
 
-                if (!mir_adjust_constants(instructions[i], predicate, false))
-                        return ~0;
+      if (!mir_adjust_constants(instructions[i], predicate, false))
+         return ~0;
 
-                /* Ensure it is written only once */
+      /* Ensure it is written only once */
 
-                if (ret != ~0)
-                        return ~0;
-                else
-                        ret = i;
-        }
+      if (ret != ~0)
+         return ~0;
+      else
+         ret = i;
+   }
 
-        /* Inject constants now that we are sure we want to */
-        if (ret != ~0)
-                mir_adjust_constants(instructions[ret], predicate, true);
+   /* Inject constants now that we are sure we want to */
+   if (ret != ~0)
+      mir_adjust_constants(instructions[ret], predicate, true);
 
-        return ret;
+   return ret;
 }
 
 /* Using the information about the moveable conditional itself, we either pop
@@ -948,33 +956,33 @@ mir_comparison_mobile(
  * artificially schedule instead as a fallback */
 
 static midgard_instruction *
-mir_schedule_comparison(
-                compiler_context *ctx,
-                midgard_instruction **instructions,
-                struct midgard_predicate *predicate,
-                BITSET_WORD *worklist, unsigned count,
-                unsigned cond, bool vector, unsigned *swizzle,
-                midgard_instruction *user)
+mir_schedule_comparison(compiler_context *ctx,
+                        midgard_instruction **instructions,
+                        struct midgard_predicate *predicate,
+                        BITSET_WORD *worklist, unsigned count, unsigned cond,
+                        bool vector, unsigned *swizzle,
+                        midgard_instruction *user)
 {
-        /* TODO: swizzle when scheduling */
-        unsigned comp_i =
-                (!vector && (swizzle[0] == 0)) ?
-                mir_comparison_mobile(ctx, instructions, predicate, count, cond) : ~0;
+   /* TODO: swizzle when scheduling */
+   unsigned comp_i =
+      (!vector && (swizzle[0] == 0))
+         ? mir_comparison_mobile(ctx, instructions, predicate, count, cond)
+         : ~0;
 
-        /* If we can, schedule the condition immediately */
-        if ((comp_i != ~0) && BITSET_TEST(worklist, comp_i)) {
-                assert(comp_i < count);
-                BITSET_CLEAR(worklist, comp_i);
-                return instructions[comp_i];
-        }
+   /* If we can, schedule the condition immediately */
+   if ((comp_i != ~0) && BITSET_TEST(worklist, comp_i)) {
+      assert(comp_i < count);
+      BITSET_CLEAR(worklist, comp_i);
+      return instructions[comp_i];
+   }
 
-        /* Otherwise, we insert a move */
+   /* Otherwise, we insert a move */
 
-        midgard_instruction mov = v_mov(cond, cond);
-        mov.mask = vector ? 0xF : 0x1;
-        memcpy(mov.swizzle[1], swizzle, sizeof(mov.swizzle[1]));
+   midgard_instruction mov = v_mov(cond, cond);
+   mov.mask = vector ? 0xF : 0x1;
+   memcpy(mov.swizzle[1], swizzle, sizeof(mov.swizzle[1]));
 
-        return mir_insert_instruction_before(ctx, user, mov);
+   return mir_insert_instruction_before(ctx, user, mov);
 }
 
 /* Most generally, we need instructions writing to r31 in the appropriate
@@ -982,625 +990,632 @@ mir_schedule_comparison(
 
 static midgard_instruction *
 mir_schedule_condition(compiler_context *ctx,
-                struct midgard_predicate *predicate,
-                BITSET_WORD *worklist, unsigned count,
-                midgard_instruction **instructions,
-                midgard_instruction *last)
+                       struct midgard_predicate *predicate,
+                       BITSET_WORD *worklist, unsigned count,
+                       midgard_instruction **instructions,
+                       midgard_instruction *last)
 {
-        /* For a branch, the condition is the only argument; for csel, third */
-        bool branch = last->compact_branch;
-        unsigned condition_index = branch ? 0 : 2;
+   /* For a branch, the condition is the only argument; for csel, third */
+   bool branch = last->compact_branch;
+   unsigned condition_index = branch ? 0 : 2;
 
-        /* csel_v is vector; otherwise, conditions are scalar */
-        bool vector = !branch && OP_IS_CSEL_V(last->op);
+   /* csel_v is vector; otherwise, conditions are scalar */
+   bool vector = !branch && OP_IS_CSEL_V(last->op);
 
-        /* Grab the conditional instruction */
+   /* Grab the conditional instruction */
 
-        midgard_instruction *cond = mir_schedule_comparison(
-                        ctx, instructions, predicate, worklist, count, last->src[condition_index],
-                        vector, last->swizzle[condition_index], last);
+   midgard_instruction *cond = mir_schedule_comparison(
+      ctx, instructions, predicate, worklist, count, last->src[condition_index],
+      vector, last->swizzle[condition_index], last);
 
-        /* We have exclusive reign over this (possibly move) conditional
-         * instruction. We can rewrite into a pipeline conditional register */
+   /* We have exclusive reign over this (possibly move) conditional
+    * instruction. We can rewrite into a pipeline conditional register */
 
-        predicate->exclude = cond->dest;
-        cond->dest = SSA_FIXED_REGISTER(31);
+   predicate->exclude = cond->dest;
+   cond->dest = SSA_FIXED_REGISTER(31);
 
-        if (!vector) {
-                cond->mask = (1 << COMPONENT_W);
+   if (!vector) {
+      cond->mask = (1 << COMPONENT_W);
 
-                mir_foreach_src(cond, s) {
-                        if (cond->src[s] == ~0)
-                                continue;
+      mir_foreach_src(cond, s) {
+         if (cond->src[s] == ~0)
+            continue;
 
-                        for (unsigned q = 0; q < 4; ++q)
-                                cond->swizzle[s][q + COMPONENT_W] = cond->swizzle[s][q];
-                }
-        }
+         for (unsigned q = 0; q < 4; ++q)
+            cond->swizzle[s][q + COMPONENT_W] = cond->swizzle[s][q];
+      }
+   }
 
-        /* Schedule the unit: csel is always in the latter pipeline, so a csel
-         * condition must be in the former pipeline stage (vmul/sadd),
-         * depending on scalar/vector of the instruction itself. A branch must
-         * be written from the latter pipeline stage and a branch condition is
-         * always scalar, so it is always in smul (exception: ball/bany, which
-         * will be vadd) */
+   /* Schedule the unit: csel is always in the latter pipeline, so a csel
+    * condition must be in the former pipeline stage (vmul/sadd),
+    * depending on scalar/vector of the instruction itself. A branch must
+    * be written from the latter pipeline stage and a branch condition is
+    * always scalar, so it is always in smul (exception: ball/bany, which
+    * will be vadd) */
 
-        if (branch)
-                cond->unit = UNIT_SMUL;
-        else
-                cond->unit = vector ? UNIT_VMUL : UNIT_SADD;
+   if (branch)
+      cond->unit = UNIT_SMUL;
+   else
+      cond->unit = vector ? UNIT_VMUL : UNIT_SADD;
 
-        return cond;
+   return cond;
 }
 
 /* Schedules a single bundle of the given type */
 
 static midgard_bundle
-mir_schedule_texture(
-                midgard_instruction **instructions,
-                uint16_t *liveness,
-                BITSET_WORD *worklist, unsigned len,
-                bool is_vertex)
+mir_schedule_texture(midgard_instruction **instructions, uint16_t *liveness,
+                     BITSET_WORD *worklist, unsigned len, bool is_vertex)
 {
-        struct midgard_predicate predicate = {
-                .tag = TAG_TEXTURE_4,
-                .destructive = true,
-                .exclude = ~0,
-        };
+   struct midgard_predicate predicate = {
+      .tag = TAG_TEXTURE_4,
+      .destructive = true,
+      .exclude = ~0,
+   };
 
-        midgard_instruction *ins =
-                mir_choose_instruction(instructions, liveness, worklist, len, &predicate);
+   midgard_instruction *ins =
+      mir_choose_instruction(instructions, liveness, worklist, len, &predicate);
 
-        mir_update_worklist(worklist, len, instructions, ins);
+   mir_update_worklist(worklist, len, instructions, ins);
 
-        struct midgard_bundle out = {
-                .tag = ins->op == midgard_tex_op_barrier ?
-                        TAG_TEXTURE_4_BARRIER :
-                        (ins->op == midgard_tex_op_fetch) || is_vertex ?
-                        TAG_TEXTURE_4_VTX : TAG_TEXTURE_4,
-                .instruction_count = 1,
-                .instructions = { ins },
-        };
+   struct midgard_bundle out = {
+      .tag = ins->op == midgard_tex_op_barrier ? TAG_TEXTURE_4_BARRIER
+             : (ins->op == midgard_tex_op_fetch) || is_vertex
+                ? TAG_TEXTURE_4_VTX
+                : TAG_TEXTURE_4,
+      .instruction_count = 1,
+      .instructions = {ins},
+   };
 
-        return out;
+   return out;
 }
 
 static midgard_bundle
-mir_schedule_ldst(
-                midgard_instruction **instructions,
-                uint16_t *liveness,
-                BITSET_WORD *worklist, unsigned len,
-                unsigned *num_ldst)
+mir_schedule_ldst(midgard_instruction **instructions, uint16_t *liveness,
+                  BITSET_WORD *worklist, unsigned len, unsigned *num_ldst)
 {
-        struct midgard_predicate predicate = {
-                .tag = TAG_LOAD_STORE_4,
-                .destructive = true,
-                .exclude = ~0,
-        };
+   struct midgard_predicate predicate = {
+      .tag = TAG_LOAD_STORE_4,
+      .destructive = true,
+      .exclude = ~0,
+   };
 
-        /* Try to pick two load/store ops. Second not gauranteed to exist */
+   /* Try to pick two load/store ops. Second not gauranteed to exist */
 
-        midgard_instruction *ins =
-                mir_choose_instruction(instructions, liveness, worklist, len, &predicate);
+   midgard_instruction *ins =
+      mir_choose_instruction(instructions, liveness, worklist, len, &predicate);
 
-        midgard_instruction *pair =
-                mir_choose_instruction(instructions, liveness, worklist, len, &predicate);
+   midgard_instruction *pair =
+      mir_choose_instruction(instructions, liveness, worklist, len, &predicate);
 
-        assert(ins != NULL);
+   assert(ins != NULL);
 
-        struct midgard_bundle out = {
-                .tag = TAG_LOAD_STORE_4,
-                .instruction_count = pair ? 2 : 1,
-                .instructions = { ins, pair },
-        };
+   struct midgard_bundle out = {
+      .tag = TAG_LOAD_STORE_4,
+      .instruction_count = pair ? 2 : 1,
+      .instructions = {ins, pair},
+   };
 
-        *num_ldst -= out.instruction_count;
+   *num_ldst -= out.instruction_count;
 
-        /* We have to update the worklist atomically, since the two
-         * instructions run concurrently (TODO: verify it's not pipelined) */
+   /* We have to update the worklist atomically, since the two
+    * instructions run concurrently (TODO: verify it's not pipelined) */
 
-        mir_update_worklist(worklist, len, instructions, ins);
-        mir_update_worklist(worklist, len, instructions, pair);
+   mir_update_worklist(worklist, len, instructions, ins);
+   mir_update_worklist(worklist, len, instructions, pair);
 
-        return out;
+   return out;
 }
 
 static void
-mir_schedule_zs_write(
-                compiler_context *ctx,
-                struct midgard_predicate *predicate,
-                midgard_instruction **instructions,
-                uint16_t *liveness,
-                BITSET_WORD *worklist, unsigned len,
-                midgard_instruction *branch,
-                midgard_instruction **smul,
-                midgard_instruction **vadd,
-                midgard_instruction **vlut,
-                bool stencil)
+mir_schedule_zs_write(compiler_context *ctx,
+                      struct midgard_predicate *predicate,
+                      midgard_instruction **instructions, uint16_t *liveness,
+                      BITSET_WORD *worklist, unsigned len,
+                      midgard_instruction *branch, midgard_instruction **smul,
+                      midgard_instruction **vadd, midgard_instruction **vlut,
+                      bool stencil)
 {
-        bool success = false;
-        unsigned idx = stencil ? 3 : 2;
-        unsigned src = (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(1) : branch->src[idx];
+   bool success = false;
+   unsigned idx = stencil ? 3 : 2;
+   unsigned src =
+      (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(1) : branch->src[idx];
 
-        predicate->dest = src;
-        predicate->mask = 0x1;
+   predicate->dest = src;
+   predicate->mask = 0x1;
 
-        midgard_instruction **units[] = { smul, vadd, vlut };
-        unsigned unit_names[] = { UNIT_SMUL, UNIT_VADD, UNIT_VLUT };
+   midgard_instruction **units[] = {smul, vadd, vlut};
+   unsigned unit_names[] = {UNIT_SMUL, UNIT_VADD, UNIT_VLUT};
 
-        for (unsigned i = 0; i < 3; ++i) {
-                if (*(units[i]))
-                        continue;
+   for (unsigned i = 0; i < 3; ++i) {
+      if (*(units[i]))
+         continue;
 
-                predicate->unit = unit_names[i];
-                midgard_instruction *ins =
-                        mir_choose_instruction(instructions, liveness, worklist, len, predicate);
+      predicate->unit = unit_names[i];
+      midgard_instruction *ins = mir_choose_instruction(
+         instructions, liveness, worklist, len, predicate);
 
-                if (ins) {
-                        ins->unit = unit_names[i];
-                        *(units[i]) = ins;
-                        success |= true;
-                        break;
-                }
-        }
+      if (ins) {
+         ins->unit = unit_names[i];
+         *(units[i]) = ins;
+         success |= true;
+         break;
+      }
+   }
 
-        predicate->dest = predicate->mask = 0;
+   predicate->dest = predicate->mask = 0;
 
-        if (success)
-                return;
+   if (success)
+      return;
 
-        midgard_instruction *mov = ralloc(ctx, midgard_instruction);
-        *mov = v_mov(src, make_compiler_temp(ctx));
-        mov->mask = 0x1;
+   midgard_instruction *mov = ralloc(ctx, midgard_instruction);
+   *mov = v_mov(src, make_compiler_temp(ctx));
+   mov->mask = 0x1;
 
-        branch->src[idx] = mov->dest;
+   branch->src[idx] = mov->dest;
 
-        if (stencil) {
-                unsigned swizzle = (branch->src[0] == ~0) ? COMPONENT_Y : COMPONENT_X;
+   if (stencil) {
+      unsigned swizzle = (branch->src[0] == ~0) ? COMPONENT_Y : COMPONENT_X;
 
-                for (unsigned c = 0; c < 16; ++c)
-                        mov->swizzle[1][c] = swizzle;
-        }
+      for (unsigned c = 0; c < 16; ++c)
+         mov->swizzle[1][c] = swizzle;
+   }
 
-        for (unsigned i = 0; i < 3; ++i) {
-                if (!(*(units[i]))) {
-                        *(units[i]) = mov;
-                        mov->unit = unit_names[i];
-                        return;
-                }
-        }
+   for (unsigned i = 0; i < 3; ++i) {
+      if (!(*(units[i]))) {
+         *(units[i]) = mov;
+         mov->unit = unit_names[i];
+         return;
+      }
+   }
 
-        unreachable("Could not schedule Z/S move to any unit");
+   unreachable("Could not schedule Z/S move to any unit");
 }
 
 static midgard_bundle
-mir_schedule_alu(
-                compiler_context *ctx,
-                midgard_instruction **instructions,
-                uint16_t *liveness,
-                BITSET_WORD *worklist, unsigned len)
+mir_schedule_alu(compiler_context *ctx, midgard_instruction **instructions,
+                 uint16_t *liveness, BITSET_WORD *worklist, unsigned len)
 {
-        struct midgard_bundle bundle = {};
+   struct midgard_bundle bundle = {};
 
-        unsigned bytes_emitted = sizeof(bundle.control);
+   unsigned bytes_emitted = sizeof(bundle.control);
 
-        struct midgard_predicate predicate = {
-                .tag = TAG_ALU_4,
-                .destructive = true,
-                .exclude = ~0,
-                .constants = &bundle.constants,
-        };
+   struct midgard_predicate predicate = {
+      .tag = TAG_ALU_4,
+      .destructive = true,
+      .exclude = ~0,
+      .constants = &bundle.constants,
+   };
 
-        midgard_instruction *vmul = NULL;
-        midgard_instruction *vadd = NULL;
-        midgard_instruction *vlut = NULL;
-        midgard_instruction *smul = NULL;
-        midgard_instruction *sadd = NULL;
-        midgard_instruction *branch = NULL;
+   midgard_instruction *vmul = NULL;
+   midgard_instruction *vadd = NULL;
+   midgard_instruction *vlut = NULL;
+   midgard_instruction *smul = NULL;
+   midgard_instruction *sadd = NULL;
+   midgard_instruction *branch = NULL;
 
-        mir_choose_alu(&branch, instructions, liveness, worklist, len, &predicate, ALU_ENAB_BR_COMPACT);
-        mir_update_worklist(worklist, len, instructions, branch);
-        unsigned writeout = branch ? branch->writeout : 0;
+   mir_choose_alu(&branch, instructions, liveness, worklist, len, &predicate,
+                  ALU_ENAB_BR_COMPACT);
+   mir_update_worklist(worklist, len, instructions, branch);
+   unsigned writeout = branch ? branch->writeout : 0;
 
-        if (branch && branch->branch.conditional) {
-                midgard_instruction *cond = mir_schedule_condition(ctx, &predicate, worklist, len, instructions, branch);
+   if (branch && branch->branch.conditional) {
+      midgard_instruction *cond = mir_schedule_condition(
+         ctx, &predicate, worklist, len, instructions, branch);
 
-                if (cond->unit == UNIT_VADD)
-                        vadd = cond;
-                else if (cond->unit == UNIT_SMUL)
-                        smul = cond;
-                else
-                        unreachable("Bad condition");
-        }
+      if (cond->unit == UNIT_VADD)
+         vadd = cond;
+      else if (cond->unit == UNIT_SMUL)
+         smul = cond;
+      else
+         unreachable("Bad condition");
+   }
 
-        /* If we have a render target reference, schedule a move for it. Since
-         * this will be in sadd, we boost this to prevent scheduling csel into
-         * smul */
+   /* If we have a render target reference, schedule a move for it. Since
+    * this will be in sadd, we boost this to prevent scheduling csel into
+    * smul */
 
-        if (writeout && (branch->constants.u32[0] || ctx->inputs->is_blend)) {
-                sadd = ralloc(ctx, midgard_instruction);
-                *sadd = v_mov(~0, make_compiler_temp(ctx));
-                sadd->unit = UNIT_SADD;
-                sadd->mask = 0x1;
-                sadd->has_inline_constant = true;
-                sadd->inline_constant = branch->constants.u32[0];
-                branch->src[1] = sadd->dest;
-                branch->src_types[1] = sadd->dest_type;
-        }
+   if (writeout && (branch->constants.u32[0] || ctx->inputs->is_blend)) {
+      sadd = ralloc(ctx, midgard_instruction);
+      *sadd = v_mov(~0, make_compiler_temp(ctx));
+      sadd->unit = UNIT_SADD;
+      sadd->mask = 0x1;
+      sadd->has_inline_constant = true;
+      sadd->inline_constant = branch->constants.u32[0];
+      branch->src[1] = sadd->dest;
+      branch->src_types[1] = sadd->dest_type;
+   }
 
-        if (writeout) {
-                /* Propagate up */
-                bundle.last_writeout = branch->last_writeout;
+   if (writeout) {
+      /* Propagate up */
+      bundle.last_writeout = branch->last_writeout;
 
-                /* Mask off any conditionals.
-                 * This prevents csel and csel_v being scheduled into smul
-                 * since we might not have room for a conditional in vmul/sadd.
-                 * This is important because both writeout and csel have same-bundle
-                 * requirements on their dependencies. */
-                predicate.no_cond = true;
-        }
+      /* Mask off any conditionals.
+       * This prevents csel and csel_v being scheduled into smul
+       * since we might not have room for a conditional in vmul/sadd.
+       * This is important because both writeout and csel have same-bundle
+       * requirements on their dependencies. */
+      predicate.no_cond = true;
+   }
 
-        /* Set r1.w to the return address so we can return from blend shaders */
-        if (writeout) {
-                vadd = ralloc(ctx, midgard_instruction);
-                *vadd = v_mov(~0, make_compiler_temp(ctx));
+   /* Set r1.w to the return address so we can return from blend shaders */
+   if (writeout) {
+      vadd = ralloc(ctx, midgard_instruction);
+      *vadd = v_mov(~0, make_compiler_temp(ctx));
 
-                if (!ctx->inputs->is_blend) {
-                        vadd->op = midgard_alu_op_iadd;
-                        vadd->src[0] = SSA_FIXED_REGISTER(31);
-                        vadd->src_types[0] = nir_type_uint32;
+      if (!ctx->inputs->is_blend) {
+         vadd->op = midgard_alu_op_iadd;
+         vadd->src[0] = SSA_FIXED_REGISTER(31);
+         vadd->src_types[0] = nir_type_uint32;
 
-                        for (unsigned c = 0; c < 16; ++c)
-                                vadd->swizzle[0][c] = COMPONENT_X;
+         for (unsigned c = 0; c < 16; ++c)
+            vadd->swizzle[0][c] = COMPONENT_X;
 
-                        vadd->has_inline_constant = true;
-                        vadd->inline_constant = 0;
-                } else {
-                        vadd->src[1] = SSA_FIXED_REGISTER(1);
-                        vadd->src_types[0] = nir_type_uint32;
+         vadd->has_inline_constant = true;
+         vadd->inline_constant = 0;
+      } else {
+         vadd->src[1] = SSA_FIXED_REGISTER(1);
+         vadd->src_types[0] = nir_type_uint32;
 
-                        for (unsigned c = 0; c < 16; ++c)
-                                vadd->swizzle[1][c] = COMPONENT_W;
-                }
+         for (unsigned c = 0; c < 16; ++c)
+            vadd->swizzle[1][c] = COMPONENT_W;
+      }
 
-                vadd->unit = UNIT_VADD;
-                vadd->mask = 0x1;
-                branch->dest = vadd->dest;
-                branch->dest_type = vadd->dest_type;
-        }
+      vadd->unit = UNIT_VADD;
+      vadd->mask = 0x1;
+      branch->dest = vadd->dest;
+      branch->dest_type = vadd->dest_type;
+   }
 
-        if (writeout & PAN_WRITEOUT_Z)
-                mir_schedule_zs_write(ctx, &predicate, instructions, liveness, worklist, len, branch, &smul, &vadd, &vlut, false);
+   if (writeout & PAN_WRITEOUT_Z)
+      mir_schedule_zs_write(ctx, &predicate, instructions, liveness, worklist,
+                            len, branch, &smul, &vadd, &vlut, false);
 
-        if (writeout & PAN_WRITEOUT_S)
-                mir_schedule_zs_write(ctx, &predicate, instructions, liveness, worklist, len, branch, &smul, &vadd, &vlut, true);
+   if (writeout & PAN_WRITEOUT_S)
+      mir_schedule_zs_write(ctx, &predicate, instructions, liveness, worklist,
+                            len, branch, &smul, &vadd, &vlut, true);
 
-        mir_choose_alu(&smul, instructions, liveness, worklist, len, &predicate, UNIT_SMUL);
+   mir_choose_alu(&smul, instructions, liveness, worklist, len, &predicate,
+                  UNIT_SMUL);
 
-        for (unsigned mode = 1; mode < 3; ++mode) {
-                predicate.move_mode = mode;
-                predicate.no_mask = writeout ? (1 << 3) : 0;
-                mir_choose_alu(&vlut, instructions, liveness, worklist, len, &predicate, UNIT_VLUT);
-                predicate.no_mask = 0;
-                mir_choose_alu(&vadd, instructions, liveness, worklist, len, &predicate, UNIT_VADD);
-        }
+   for (unsigned mode = 1; mode < 3; ++mode) {
+      predicate.move_mode = mode;
+      predicate.no_mask = writeout ? (1 << 3) : 0;
+      mir_choose_alu(&vlut, instructions, liveness, worklist, len, &predicate,
+                     UNIT_VLUT);
+      predicate.no_mask = 0;
+      mir_choose_alu(&vadd, instructions, liveness, worklist, len, &predicate,
+                     UNIT_VADD);
+   }
 
-        /* Reset */
-        predicate.move_mode = 0;
+   /* Reset */
+   predicate.move_mode = 0;
 
-        mir_update_worklist(worklist, len, instructions, vlut);
-        mir_update_worklist(worklist, len, instructions, vadd);
-        mir_update_worklist(worklist, len, instructions, smul);
+   mir_update_worklist(worklist, len, instructions, vlut);
+   mir_update_worklist(worklist, len, instructions, vadd);
+   mir_update_worklist(worklist, len, instructions, smul);
 
-        bool vadd_csel = vadd && OP_IS_CSEL(vadd->op);
-        bool smul_csel = smul && OP_IS_CSEL(smul->op);
+   bool vadd_csel = vadd && OP_IS_CSEL(vadd->op);
+   bool smul_csel = smul && OP_IS_CSEL(smul->op);
 
-        if (vadd_csel || smul_csel) {
-                midgard_instruction *ins = vadd_csel ? vadd : smul;
-                midgard_instruction *cond = mir_schedule_condition(ctx, &predicate, worklist, len, instructions, ins);
+   if (vadd_csel || smul_csel) {
+      midgard_instruction *ins = vadd_csel ? vadd : smul;
+      midgard_instruction *cond = mir_schedule_condition(
+         ctx, &predicate, worklist, len, instructions, ins);
 
-                if (cond->unit == UNIT_VMUL)
-                        vmul = cond;
-                else if (cond->unit == UNIT_SADD)
-                        sadd = cond;
-                else
-                        unreachable("Bad condition");
-        }
+      if (cond->unit == UNIT_VMUL)
+         vmul = cond;
+      else if (cond->unit == UNIT_SADD)
+         sadd = cond;
+      else
+         unreachable("Bad condition");
+   }
 
-        /* Stage 2, let's schedule sadd before vmul for writeout */
-        mir_choose_alu(&sadd, instructions, liveness, worklist, len, &predicate, UNIT_SADD);
+   /* Stage 2, let's schedule sadd before vmul for writeout */
+   mir_choose_alu(&sadd, instructions, liveness, worklist, len, &predicate,
+                  UNIT_SADD);
 
-        /* Check if writeout reads its own register */
+   /* Check if writeout reads its own register */
 
-        if (writeout) {
-                midgard_instruction *stages[] = { sadd, vadd, smul, vlut };
-                unsigned src = (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(0) : branch->src[0];
-                unsigned writeout_mask = 0x0;
-                bool bad_writeout = false;
+   if (writeout) {
+      midgard_instruction *stages[] = {sadd, vadd, smul, vlut};
+      unsigned src =
+         (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(0) : branch->src[0];
+      unsigned writeout_mask = 0x0;
+      bool bad_writeout = false;
 
-                for (unsigned i = 0; i < ARRAY_SIZE(stages); ++i) {
-                        if (!stages[i])
-                                continue;
+      for (unsigned i = 0; i < ARRAY_SIZE(stages); ++i) {
+         if (!stages[i])
+            continue;
 
-                        if (stages[i]->dest != src)
-                                continue;
+         if (stages[i]->dest != src)
+            continue;
 
-                        writeout_mask |= stages[i]->mask;
-                        bad_writeout |= mir_has_arg(stages[i], branch->src[0]);
-                }
+         writeout_mask |= stages[i]->mask;
+         bad_writeout |= mir_has_arg(stages[i], branch->src[0]);
+      }
 
-                /* It's possible we'll be able to schedule something into vmul
-                 * to fill r0. Let's peak into the future, trying to schedule
-                 * vmul specially that way. */
+      /* It's possible we'll be able to schedule something into vmul
+       * to fill r0. Let's peak into the future, trying to schedule
+       * vmul specially that way. */
 
-                unsigned full_mask = 0xF;
+      unsigned full_mask = 0xF;
 
-                if (!bad_writeout && writeout_mask != full_mask) {
-                        predicate.unit = UNIT_VMUL;
-                        predicate.dest = src;
-                        predicate.mask = writeout_mask ^ full_mask;
+      if (!bad_writeout && writeout_mask != full_mask) {
+         predicate.unit = UNIT_VMUL;
+         predicate.dest = src;
+         predicate.mask = writeout_mask ^ full_mask;
 
-                        struct midgard_instruction *peaked =
-                                mir_choose_instruction(instructions, liveness, worklist, len, &predicate);
+         struct midgard_instruction *peaked = mir_choose_instruction(
+            instructions, liveness, worklist, len, &predicate);
 
-                        if (peaked) {
-                                vmul = peaked;
-                                vmul->unit = UNIT_VMUL;
-                                writeout_mask |= predicate.mask;
-                                assert(writeout_mask == full_mask);
-                        }
+         if (peaked) {
+            vmul = peaked;
+            vmul->unit = UNIT_VMUL;
+            writeout_mask |= predicate.mask;
+            assert(writeout_mask == full_mask);
+         }
 
-                        /* Cleanup */
-                        predicate.dest = predicate.mask = 0;
-                }
+         /* Cleanup */
+         predicate.dest = predicate.mask = 0;
+      }
 
-                /* Finally, add a move if necessary */
-                if (bad_writeout || writeout_mask != full_mask) {
-                        unsigned temp = (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(0) : make_compiler_temp(ctx);
+      /* Finally, add a move if necessary */
+      if (bad_writeout || writeout_mask != full_mask) {
+         unsigned temp = (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(0)
+                                                : make_compiler_temp(ctx);
 
-                        vmul = ralloc(ctx, midgard_instruction);
-                        *vmul = v_mov(src, temp);
-                        vmul->unit = UNIT_VMUL;
-                        vmul->mask = full_mask ^ writeout_mask;
+         vmul = ralloc(ctx, midgard_instruction);
+         *vmul = v_mov(src, temp);
+         vmul->unit = UNIT_VMUL;
+         vmul->mask = full_mask ^ writeout_mask;
 
-                        /* Rewrite to use our temp */
+         /* Rewrite to use our temp */
 
-                        for (unsigned i = 0; i < ARRAY_SIZE(stages); ++i) {
-                                if (stages[i]) {
-                                        mir_rewrite_index_dst_single(stages[i], src, temp);
-                                        mir_rewrite_index_src_single(stages[i], src, temp);
-                                }
-                        }
+         for (unsigned i = 0; i < ARRAY_SIZE(stages); ++i) {
+            if (stages[i]) {
+               mir_rewrite_index_dst_single(stages[i], src, temp);
+               mir_rewrite_index_src_single(stages[i], src, temp);
+            }
+         }
 
-                        mir_rewrite_index_src_single(branch, src, temp);
-                }
-        }
+         mir_rewrite_index_src_single(branch, src, temp);
+      }
+   }
 
-        mir_choose_alu(&vmul, instructions, liveness, worklist, len, &predicate, UNIT_VMUL);
+   mir_choose_alu(&vmul, instructions, liveness, worklist, len, &predicate,
+                  UNIT_VMUL);
 
-        mir_update_worklist(worklist, len, instructions, vmul);
-        mir_update_worklist(worklist, len, instructions, sadd);
+   mir_update_worklist(worklist, len, instructions, vmul);
+   mir_update_worklist(worklist, len, instructions, sadd);
 
-        bundle.has_embedded_constants = predicate.constant_mask != 0;
+   bundle.has_embedded_constants = predicate.constant_mask != 0;
 
-        unsigned padding = 0;
+   unsigned padding = 0;
 
-        /* Now that we have finished scheduling, build up the bundle */
-        midgard_instruction *stages[] = { vmul, sadd, vadd, smul, vlut, branch };
+   /* Now that we have finished scheduling, build up the bundle */
+   midgard_instruction *stages[] = {vmul, sadd, vadd, smul, vlut, branch};
 
-        for (unsigned i = 0; i < ARRAY_SIZE(stages); ++i) {
-                if (stages[i]) {
-                        bundle.control |= stages[i]->unit;
-                        bytes_emitted += bytes_for_instruction(stages[i]);
-                        bundle.instructions[bundle.instruction_count++] = stages[i];
+   for (unsigned i = 0; i < ARRAY_SIZE(stages); ++i) {
+      if (stages[i]) {
+         bundle.control |= stages[i]->unit;
+         bytes_emitted += bytes_for_instruction(stages[i]);
+         bundle.instructions[bundle.instruction_count++] = stages[i];
 
-                        /* If we branch, we can't spill to TLS since the store
-                         * instruction will never get executed. We could try to
-                         * break the bundle but this is probably easier for
-                         * now. */
+         /* If we branch, we can't spill to TLS since the store
+          * instruction will never get executed. We could try to
+          * break the bundle but this is probably easier for
+          * now. */
 
-                        if (branch)
-                                stages[i]->no_spill |= (1 << REG_CLASS_WORK);
-                }
-        }
+         if (branch)
+            stages[i]->no_spill |= (1 << REG_CLASS_WORK);
+      }
+   }
 
-        /* Pad ALU op to nearest word */
+   /* Pad ALU op to nearest word */
 
-        if (bytes_emitted & 15) {
-                padding = 16 - (bytes_emitted & 15);
-                bytes_emitted += padding;
-        }
+   if (bytes_emitted & 15) {
+      padding = 16 - (bytes_emitted & 15);
+      bytes_emitted += padding;
+   }
 
-        /* Constants must always be quadwords */
-        if (bundle.has_embedded_constants)
-                bytes_emitted += 16;
+   /* Constants must always be quadwords */
+   if (bundle.has_embedded_constants)
+      bytes_emitted += 16;
 
-        /* Size ALU instruction for tag */
-        bundle.tag = (TAG_ALU_4) + (bytes_emitted / 16) - 1;
+   /* Size ALU instruction for tag */
+   bundle.tag = (TAG_ALU_4) + (bytes_emitted / 16) - 1;
 
-        bool tilebuf_wait = branch && branch->compact_branch &&
-           branch->branch.target_type == TARGET_TILEBUF_WAIT;
+   bool tilebuf_wait = branch && branch->compact_branch &&
+                       branch->branch.target_type == TARGET_TILEBUF_WAIT;
 
-        /* MRT capable GPUs use a special writeout procedure */
-        if ((writeout || tilebuf_wait) && !(ctx->quirks & MIDGARD_NO_UPPER_ALU))
-                bundle.tag += 4;
+   /* MRT capable GPUs use a special writeout procedure */
+   if ((writeout || tilebuf_wait) && !(ctx->quirks & MIDGARD_NO_UPPER_ALU))
+      bundle.tag += 4;
 
-        bundle.padding = padding;
-        bundle.control |= bundle.tag;
+   bundle.padding = padding;
+   bundle.control |= bundle.tag;
 
-        return bundle;
+   return bundle;
 }
 
 /* Schedule a single block by iterating its instruction to create bundles.
  * While we go, tally about the bundle sizes to compute the block size. */
 
-
 static void
 schedule_block(compiler_context *ctx, midgard_block *block)
 {
-        /* Copy list to dynamic array */
-        unsigned len = 0;
-        midgard_instruction **instructions = flatten_mir(block, &len);
+   /* Copy list to dynamic array */
+   unsigned len = 0;
+   midgard_instruction **instructions = flatten_mir(block, &len);
 
-        if (!len)
-                return;
+   if (!len)
+      return;
 
-        /* Calculate dependencies and initial worklist */
-        unsigned node_count = ctx->temp_count + 1;
-        mir_create_dependency_graph(instructions, len, node_count);
+   /* Calculate dependencies and initial worklist */
+   unsigned node_count = ctx->temp_count + 1;
+   mir_create_dependency_graph(instructions, len, node_count);
 
-        /* Allocate the worklist */
-        size_t sz = BITSET_WORDS(len) * sizeof(BITSET_WORD);
-        BITSET_WORD *worklist = calloc(sz, 1);
-        uint16_t *liveness = calloc(node_count, 2);
-        mir_initialize_worklist(worklist, instructions, len);
+   /* Allocate the worklist */
+   size_t sz = BITSET_WORDS(len) * sizeof(BITSET_WORD);
+   BITSET_WORD *worklist = calloc(sz, 1);
+   uint16_t *liveness = calloc(node_count, 2);
+   mir_initialize_worklist(worklist, instructions, len);
 
-        /* Count the number of load/store instructions so we know when it's
-         * worth trying to schedule them in pairs. */
-        unsigned num_ldst = 0;
-        for (unsigned i = 0; i < len; ++i) {
-                if (instructions[i]->type == TAG_LOAD_STORE_4)
-                        ++num_ldst;
-        }
+   /* Count the number of load/store instructions so we know when it's
+    * worth trying to schedule them in pairs. */
+   unsigned num_ldst = 0;
+   for (unsigned i = 0; i < len; ++i) {
+      if (instructions[i]->type == TAG_LOAD_STORE_4)
+         ++num_ldst;
+   }
 
-        struct util_dynarray bundles;
-        util_dynarray_init(&bundles, NULL);
+   struct util_dynarray bundles;
+   util_dynarray_init(&bundles, NULL);
 
-        block->quadword_count = 0;
+   block->quadword_count = 0;
 
-        for (;;) {
-                unsigned tag = mir_choose_bundle(instructions, liveness, worklist, len, num_ldst);
-                midgard_bundle bundle;
+   for (;;) {
+      unsigned tag =
+         mir_choose_bundle(instructions, liveness, worklist, len, num_ldst);
+      midgard_bundle bundle;
 
-                if (tag == TAG_TEXTURE_4)
-                        bundle = mir_schedule_texture(instructions, liveness, worklist, len, ctx->stage != MESA_SHADER_FRAGMENT);
-                else if (tag == TAG_LOAD_STORE_4)
-                        bundle = mir_schedule_ldst(instructions, liveness, worklist, len, &num_ldst);
-                else if (tag == TAG_ALU_4)
-                        bundle = mir_schedule_alu(ctx, instructions, liveness, worklist, len);
-                else
-                        break;
+      if (tag == TAG_TEXTURE_4)
+         bundle = mir_schedule_texture(instructions, liveness, worklist, len,
+                                       ctx->stage != MESA_SHADER_FRAGMENT);
+      else if (tag == TAG_LOAD_STORE_4)
+         bundle =
+            mir_schedule_ldst(instructions, liveness, worklist, len, &num_ldst);
+      else if (tag == TAG_ALU_4)
+         bundle = mir_schedule_alu(ctx, instructions, liveness, worklist, len);
+      else
+         break;
 
-                for (unsigned i = 0; i < bundle.instruction_count; ++i)
-                        bundle.instructions[i]->bundle_id =
-                                ctx->quadword_count + block->quadword_count;
+      for (unsigned i = 0; i < bundle.instruction_count; ++i)
+         bundle.instructions[i]->bundle_id =
+            ctx->quadword_count + block->quadword_count;
 
-                util_dynarray_append(&bundles, midgard_bundle, bundle);
-                block->quadword_count += midgard_tag_props[bundle.tag].size;
-        }
+      util_dynarray_append(&bundles, midgard_bundle, bundle);
+      block->quadword_count += midgard_tag_props[bundle.tag].size;
+   }
 
-        assert(num_ldst == 0);
+   assert(num_ldst == 0);
 
-        /* We emitted bundles backwards; copy into the block in reverse-order */
+   /* We emitted bundles backwards; copy into the block in reverse-order */
 
-        util_dynarray_init(&block->bundles, block);
-        util_dynarray_foreach_reverse(&bundles, midgard_bundle, bundle) {
-                util_dynarray_append(&block->bundles, midgard_bundle, *bundle);
-        }
-        util_dynarray_fini(&bundles);
+   util_dynarray_init(&block->bundles, block);
+   util_dynarray_foreach_reverse(&bundles, midgard_bundle, bundle)
+   {
+      util_dynarray_append(&block->bundles, midgard_bundle, *bundle);
+   }
+   util_dynarray_fini(&bundles);
 
-        block->scheduled = true;
-        ctx->quadword_count += block->quadword_count;
+   block->scheduled = true;
+   ctx->quadword_count += block->quadword_count;
 
-        /* Reorder instructions to match bundled. First remove existing
-         * instructions and then recreate the list */
+   /* Reorder instructions to match bundled. First remove existing
+    * instructions and then recreate the list */
 
-        mir_foreach_instr_in_block_safe(block, ins) {
-                list_del(&ins->link);
-        }
+   mir_foreach_instr_in_block_safe(block, ins) {
+      list_del(&ins->link);
+   }
 
-        mir_foreach_instr_in_block_scheduled_rev(block, ins) {
-                list_add(&ins->link, &block->base.instructions);
-        }
+   mir_foreach_instr_in_block_scheduled_rev(block, ins) {
+      list_add(&ins->link, &block->base.instructions);
+   }
 
-	free(instructions); /* Allocated by flatten_mir() */
-	free(worklist);
-        free(liveness);
+   free(instructions); /* Allocated by flatten_mir() */
+   free(worklist);
+   free(liveness);
 }
 
 /* Insert moves to ensure we can register allocate load/store registers */
 static void
 mir_lower_ldst(compiler_context *ctx)
 {
-        mir_foreach_instr_global_safe(ctx, I) {
-                if (I->type != TAG_LOAD_STORE_4) continue;
+   mir_foreach_instr_global_safe(ctx, I) {
+      if (I->type != TAG_LOAD_STORE_4)
+         continue;
 
-                mir_foreach_src(I, s) {
-                        if (s == 0) continue;
-                        if (I->src[s] == ~0) continue;
-                        if (I->swizzle[s][0] == 0) continue;
+      mir_foreach_src(I, s) {
+         if (s == 0)
+            continue;
+         if (I->src[s] == ~0)
+            continue;
+         if (I->swizzle[s][0] == 0)
+            continue;
 
-                        unsigned temp = make_compiler_temp(ctx);
-                        midgard_instruction mov = v_mov(I->src[s], temp);
-                        mov.mask = 0x1;
-                        mov.dest_type = I->src_types[s];
-                        for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; ++c)
-                                mov.swizzle[1][c] = I->swizzle[s][0];
+         unsigned temp = make_compiler_temp(ctx);
+         midgard_instruction mov = v_mov(I->src[s], temp);
+         mov.mask = 0x1;
+         mov.dest_type = I->src_types[s];
+         for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; ++c)
+            mov.swizzle[1][c] = I->swizzle[s][0];
 
-                        mir_insert_instruction_before(ctx, I, mov);
-                        I->src[s] = mov.dest;
-                        I->swizzle[s][0] = 0;
-                }
-        }
+         mir_insert_instruction_before(ctx, I, mov);
+         I->src[s] = mov.dest;
+         I->swizzle[s][0] = 0;
+      }
+   }
 }
 
 /* Insert moves to ensure we can register allocate blend writeout */
 static void
 mir_lower_blend_input(compiler_context *ctx)
 {
-        mir_foreach_block(ctx, _blk) {
-                midgard_block *blk = (midgard_block *) _blk;
+   mir_foreach_block(ctx, _blk) {
+      midgard_block *blk = (midgard_block *)_blk;
 
-                if (list_is_empty(&blk->base.instructions))
-                        continue;
+      if (list_is_empty(&blk->base.instructions))
+         continue;
 
-                midgard_instruction *I = mir_last_in_block(blk);
+      midgard_instruction *I = mir_last_in_block(blk);
 
-                if (!I || I->type != TAG_ALU_4 || !I->writeout)
-                        continue;
+      if (!I || I->type != TAG_ALU_4 || !I->writeout)
+         continue;
 
-                mir_foreach_src(I, s) {
-                        unsigned src = I->src[s];
+      mir_foreach_src(I, s) {
+         unsigned src = I->src[s];
 
-                        if (src >= ctx->temp_count)
-                                continue;
+         if (src >= ctx->temp_count)
+            continue;
 
-                        if (!_blk->live_out[src])
-                                continue;
+         if (!_blk->live_out[src])
+            continue;
 
-                        unsigned temp = make_compiler_temp(ctx);
-                        midgard_instruction mov = v_mov(src, temp);
-                        mov.mask = 0xF;
-                        mov.dest_type = nir_type_uint32;
-                        mir_insert_instruction_before(ctx, I, mov);
-                        I->src[s] = mov.dest;
-                }
-        }
+         unsigned temp = make_compiler_temp(ctx);
+         midgard_instruction mov = v_mov(src, temp);
+         mov.mask = 0xF;
+         mov.dest_type = nir_type_uint32;
+         mir_insert_instruction_before(ctx, I, mov);
+         I->src[s] = mov.dest;
+      }
+   }
 }
 
 void
 midgard_schedule_program(compiler_context *ctx)
 {
-        mir_lower_ldst(ctx);
-        midgard_promote_uniforms(ctx);
+   mir_lower_ldst(ctx);
+   midgard_promote_uniforms(ctx);
 
-        /* Must be lowered right before scheduling */
-        mir_squeeze_index(ctx);
-        mir_lower_special_reads(ctx);
+   /* Must be lowered right before scheduling */
+   mir_squeeze_index(ctx);
+   mir_lower_special_reads(ctx);
 
-        if (ctx->stage == MESA_SHADER_FRAGMENT) {
-                mir_invalidate_liveness(ctx);
-                mir_compute_liveness(ctx);
-                mir_lower_blend_input(ctx);
-        }
+   if (ctx->stage == MESA_SHADER_FRAGMENT) {
+      mir_invalidate_liveness(ctx);
+      mir_compute_liveness(ctx);
+      mir_lower_blend_input(ctx);
+   }
 
-        mir_squeeze_index(ctx);
+   mir_squeeze_index(ctx);
 
-        /* Lowering can introduce some dead moves */
+   /* Lowering can introduce some dead moves */
 
-        mir_foreach_block(ctx, _block) {
-                midgard_block *block = (midgard_block *) _block;
-                midgard_opt_dead_move_eliminate(ctx, block);
-                schedule_block(ctx, block);
-        }
+   mir_foreach_block(ctx, _block) {
+      midgard_block *block = (midgard_block *)_block;
+      midgard_opt_dead_move_eliminate(ctx, block);
+      schedule_block(ctx, block);
+   }
 }
diff --git a/src/panfrost/midgard/mir.c b/src/panfrost/midgard/mir.c
index a4ea28fcdc8..07cbfc89236 100644
--- a/src/panfrost/midgard/mir.c
+++ b/src/panfrost/midgard/mir.c
@@ -25,86 +25,93 @@
 #include "compiler.h"
 #include "midgard_ops.h"
 
-void mir_rewrite_index_src_single(midgard_instruction *ins, unsigned old, unsigned new)
+void
+mir_rewrite_index_src_single(midgard_instruction *ins, unsigned old,
+                             unsigned new)
 {
-        mir_foreach_src(ins, i) {
-                if (ins->src[i] == old)
-                        ins->src[i] = new;
-        }
+   mir_foreach_src(ins, i) {
+      if (ins->src[i] == old)
+         ins->src[i] = new;
+   }
 }
 
-void mir_rewrite_index_dst_single(midgard_instruction *ins, unsigned old, unsigned new)
+void
+mir_rewrite_index_dst_single(midgard_instruction *ins, unsigned old,
+                             unsigned new)
 {
-        if (ins->dest == old)
-                ins->dest = new;
+   if (ins->dest == old)
+      ins->dest = new;
 }
 
 static void
-mir_rewrite_index_src_single_swizzle(midgard_instruction *ins, unsigned old, unsigned new, unsigned *swizzle)
+mir_rewrite_index_src_single_swizzle(midgard_instruction *ins, unsigned old,
+                                     unsigned new, unsigned *swizzle)
 {
-        for (unsigned i = 0; i < ARRAY_SIZE(ins->src); ++i) {
-                if (ins->src[i] != old) continue;
+   for (unsigned i = 0; i < ARRAY_SIZE(ins->src); ++i) {
+      if (ins->src[i] != old)
+         continue;
 
-                ins->src[i] = new;
-                mir_compose_swizzle(ins->swizzle[i], swizzle, ins->swizzle[i]);
-        }
+      ins->src[i] = new;
+      mir_compose_swizzle(ins->swizzle[i], swizzle, ins->swizzle[i]);
+   }
 }
 
 void
 mir_rewrite_index_src(compiler_context *ctx, unsigned old, unsigned new)
 {
-        mir_foreach_instr_global(ctx, ins) {
-                mir_rewrite_index_src_single(ins, old, new);
-        }
+   mir_foreach_instr_global(ctx, ins) {
+      mir_rewrite_index_src_single(ins, old, new);
+   }
 }
 
 void
-mir_rewrite_index_src_swizzle(compiler_context *ctx, unsigned old, unsigned new, unsigned *swizzle)
+mir_rewrite_index_src_swizzle(compiler_context *ctx, unsigned old, unsigned new,
+                              unsigned *swizzle)
 {
-        mir_foreach_instr_global(ctx, ins) {
-                mir_rewrite_index_src_single_swizzle(ins, old, new, swizzle);
-        }
+   mir_foreach_instr_global(ctx, ins) {
+      mir_rewrite_index_src_single_swizzle(ins, old, new, swizzle);
+   }
 }
 
 void
 mir_rewrite_index_dst(compiler_context *ctx, unsigned old, unsigned new)
 {
-        mir_foreach_instr_global(ctx, ins) {
-                mir_rewrite_index_dst_single(ins, old, new);
-        }
+   mir_foreach_instr_global(ctx, ins) {
+      mir_rewrite_index_dst_single(ins, old, new);
+   }
 
-        /* Implicitly written before the shader */
-        if (ctx->blend_input == old)
-                ctx->blend_input = new;
+   /* Implicitly written before the shader */
+   if (ctx->blend_input == old)
+      ctx->blend_input = new;
 
-        if (ctx->blend_src1 == old)
-                ctx->blend_src1 = new;
+   if (ctx->blend_src1 == old)
+      ctx->blend_src1 = new;
 }
 
 void
 mir_rewrite_index(compiler_context *ctx, unsigned old, unsigned new)
 {
-        mir_rewrite_index_src(ctx, old, new);
-        mir_rewrite_index_dst(ctx, old, new);
+   mir_rewrite_index_src(ctx, old, new);
+   mir_rewrite_index_dst(ctx, old, new);
 }
 
 unsigned
 mir_use_count(compiler_context *ctx, unsigned value)
 {
-        unsigned used_count = 0;
+   unsigned used_count = 0;
 
-        mir_foreach_instr_global(ctx, ins) {
-                if (mir_has_arg(ins, value))
-                        ++used_count;
-        }
+   mir_foreach_instr_global(ctx, ins) {
+      if (mir_has_arg(ins, value))
+         ++used_count;
+   }
 
-        if (ctx->blend_input == value)
-                ++used_count;
+   if (ctx->blend_input == value)
+      ++used_count;
 
-        if (ctx->blend_src1 == value)
-                ++used_count;
+   if (ctx->blend_src1 == value)
+      ++used_count;
 
-        return used_count;
+   return used_count;
 }
 
 /* Checks if a value is used only once (or totally dead), which is an important
@@ -113,50 +120,56 @@ mir_use_count(compiler_context *ctx, unsigned value)
 bool
 mir_single_use(compiler_context *ctx, unsigned value)
 {
-        /* We can replicate constants in places so who cares */
-        if (value == SSA_FIXED_REGISTER(REGISTER_CONSTANT))
-                return true;
+   /* We can replicate constants in places so who cares */
+   if (value == SSA_FIXED_REGISTER(REGISTER_CONSTANT))
+      return true;
 
-        return mir_use_count(ctx, value) <= 1;
+   return mir_use_count(ctx, value) <= 1;
 }
 
 bool
 mir_nontrivial_mod(midgard_instruction *ins, unsigned i, bool check_swizzle)
 {
-        bool is_int = midgard_is_integer_op(ins->op);
+   bool is_int = midgard_is_integer_op(ins->op);
 
-        if (is_int) {
-                if (ins->src_shift[i]) return true;
-        } else {
-                if (ins->src_neg[i]) return true;
-                if (ins->src_abs[i]) return true;
-        }
+   if (is_int) {
+      if (ins->src_shift[i])
+         return true;
+   } else {
+      if (ins->src_neg[i])
+         return true;
+      if (ins->src_abs[i])
+         return true;
+   }
 
-        if (ins->dest_type != ins->src_types[i]) return true;
+   if (ins->dest_type != ins->src_types[i])
+      return true;
 
-        if (check_swizzle) {
-                for (unsigned c = 0; c < 16; ++c) {
-                        if (!(ins->mask & (1 << c))) continue;
-                        if (ins->swizzle[i][c] != c) return true;
-                }
-        }
+   if (check_swizzle) {
+      for (unsigned c = 0; c < 16; ++c) {
+         if (!(ins->mask & (1 << c)))
+            continue;
+         if (ins->swizzle[i][c] != c)
+            return true;
+      }
+   }
 
-        return false;
+   return false;
 }
 
 bool
 mir_nontrivial_outmod(midgard_instruction *ins)
 {
-        bool is_int = midgard_is_integer_op(ins->op);
-        unsigned mod = ins->outmod;
+   bool is_int = midgard_is_integer_op(ins->op);
+   unsigned mod = ins->outmod;
 
-        if (ins->dest_type != ins->src_types[1])
-                return true;
+   if (ins->dest_type != ins->src_types[1])
+      return true;
 
-        if (is_int)
-                return mod != midgard_outmod_keeplo;
-        else
-                return mod != midgard_outmod_none;
+   if (is_int)
+      return mod != midgard_outmod_keeplo;
+   else
+      return mod != midgard_outmod_none;
 }
 
 /* 128 / sz = exp2(log2(128 / sz))
@@ -168,32 +181,32 @@ mir_nontrivial_outmod(midgard_instruction *ins)
 static unsigned
 mir_components_for_bits(unsigned bits)
 {
-        return 1 << (7 - util_logbase2(bits));
+   return 1 << (7 - util_logbase2(bits));
 }
 
 unsigned
 mir_components_for_type(nir_alu_type T)
 {
-        unsigned sz = nir_alu_type_get_type_size(T);
-        return mir_components_for_bits(sz);
+   unsigned sz = nir_alu_type_get_type_size(T);
+   return mir_components_for_bits(sz);
 }
 
 uint16_t
 mir_from_bytemask(uint16_t bytemask, unsigned bits)
 {
-        unsigned value = 0;
-        unsigned count = bits / 8;
+   unsigned value = 0;
+   unsigned count = bits / 8;
 
-        for (unsigned c = 0, d = 0; c < 16; c += count, ++d) {
-                bool a = (bytemask & (1 << c)) != 0;
+   for (unsigned c = 0, d = 0; c < 16; c += count, ++d) {
+      bool a = (bytemask & (1 << c)) != 0;
 
-                for (unsigned q = c; q < count; ++q)
-                        assert(((bytemask & (1 << q)) != 0) == a);
+      for (unsigned q = c; q < count; ++q)
+         assert(((bytemask & (1 << q)) != 0) == a);
 
-                value |= (a << d);
-        }
+      value |= (a << d);
+   }
 
-        return value;
+   return value;
 }
 
 /* Rounds up a bytemask to fill a given component count. Iterate each
@@ -202,18 +215,18 @@ mir_from_bytemask(uint16_t bytemask, unsigned bits)
 uint16_t
 mir_round_bytemask_up(uint16_t mask, unsigned bits)
 {
-        unsigned bytes = bits / 8;
-        unsigned maxmask = mask_of(bytes);
-        unsigned channels = mir_components_for_bits(bits);
+   unsigned bytes = bits / 8;
+   unsigned maxmask = mask_of(bytes);
+   unsigned channels = mir_components_for_bits(bits);
 
-        for (unsigned c = 0; c < channels; ++c) {
-                unsigned submask = maxmask << (c * bytes);
+   for (unsigned c = 0; c < channels; ++c) {
+      unsigned submask = maxmask << (c * bytes);
 
-                if (mask & submask)
-                        mask |= submask;
-        }
+      if (mask & submask)
+         mask |= submask;
+   }
 
-        return mask;
+   return mask;
 }
 
 /* Grabs the per-byte mask of an instruction (as opposed to per-component) */
@@ -221,15 +234,15 @@ mir_round_bytemask_up(uint16_t mask, unsigned bits)
 uint16_t
 mir_bytemask(midgard_instruction *ins)
 {
-        unsigned type_size = nir_alu_type_get_type_size(ins->dest_type);
-        return pan_to_bytemask(type_size, ins->mask);
+   unsigned type_size = nir_alu_type_get_type_size(ins->dest_type);
+   return pan_to_bytemask(type_size, ins->mask);
 }
 
 void
 mir_set_bytemask(midgard_instruction *ins, uint16_t bytemask)
 {
-        unsigned type_size = nir_alu_type_get_type_size(ins->dest_type);
-        ins->mask = mir_from_bytemask(bytemask, type_size);
+   unsigned type_size = nir_alu_type_get_type_size(ins->dest_type);
+   ins->mask = mir_from_bytemask(bytemask, type_size);
 }
 
 /*
@@ -240,24 +253,24 @@ mir_set_bytemask(midgard_instruction *ins, uint16_t bytemask)
 signed
 mir_upper_override(midgard_instruction *ins, unsigned inst_size)
 {
-        unsigned type_size = nir_alu_type_get_type_size(ins->dest_type);
+   unsigned type_size = nir_alu_type_get_type_size(ins->dest_type);
 
-        /* If the sizes are the same, there's nothing to override */
-        if (type_size == inst_size)
-                return -1;
+   /* If the sizes are the same, there's nothing to override */
+   if (type_size == inst_size)
+      return -1;
 
-        /* There are 16 bytes per vector, so there are (16/bytes)
-         * components per vector. So the magic half is half of
-         * (16/bytes), which simplifies to 8/bytes = 8 / (bits / 8) = 64 / bits
-         * */
+   /* There are 16 bytes per vector, so there are (16/bytes)
+    * components per vector. So the magic half is half of
+    * (16/bytes), which simplifies to 8/bytes = 8 / (bits / 8) = 64 / bits
+    * */
 
-        unsigned threshold = mir_components_for_bits(type_size) >> 1;
+   unsigned threshold = mir_components_for_bits(type_size) >> 1;
 
-        /* How many components did we shift over? */
-        unsigned zeroes = __builtin_ctz(ins->mask);
+   /* How many components did we shift over? */
+   unsigned zeroes = __builtin_ctz(ins->mask);
 
-        /* Did we hit the threshold? */
-        return (zeroes >= threshold) ? threshold : 0;
+   /* Did we hit the threshold? */
+   return (zeroes >= threshold) ? threshold : 0;
 }
 
 /* Creates a mask of the components of a node read by an instruction, by
@@ -269,60 +282,64 @@ mir_upper_override(midgard_instruction *ins, unsigned inst_size)
  */
 
 static uint16_t
-mir_bytemask_of_read_components_single(unsigned *swizzle, unsigned inmask, unsigned bits)
+mir_bytemask_of_read_components_single(unsigned *swizzle, unsigned inmask,
+                                       unsigned bits)
 {
-        unsigned cmask = 0;
+   unsigned cmask = 0;
 
-        for (unsigned c = 0; c < MIR_VEC_COMPONENTS; ++c) {
-                if (!(inmask & (1 << c))) continue;
-                cmask |= (1 << swizzle[c]);
-        }
+   for (unsigned c = 0; c < MIR_VEC_COMPONENTS; ++c) {
+      if (!(inmask & (1 << c)))
+         continue;
+      cmask |= (1 << swizzle[c]);
+   }
 
-        return pan_to_bytemask(bits, cmask);
+   return pan_to_bytemask(bits, cmask);
 }
 
 uint16_t
 mir_bytemask_of_read_components_index(midgard_instruction *ins, unsigned i)
 {
-        /* Conditional branches read one 32-bit component = 4 bytes (TODO: multi branch??) */
-        if (ins->compact_branch && ins->branch.conditional && (i == 0))
-                return 0xF;
+   /* Conditional branches read one 32-bit component = 4 bytes (TODO: multi
+    * branch??) */
+   if (ins->compact_branch && ins->branch.conditional && (i == 0))
+      return 0xF;
 
-        /* ALU ops act componentwise so we need to pay attention to
-         * their mask. Texture/ldst does not so we don't clamp source
-         * readmasks based on the writemask */
-        unsigned qmask = ~0;
+   /* ALU ops act componentwise so we need to pay attention to
+    * their mask. Texture/ldst does not so we don't clamp source
+    * readmasks based on the writemask */
+   unsigned qmask = ~0;
 
-        /* Handle dot products and things */
-        if (ins->type == TAG_ALU_4 && !ins->compact_branch) {
-                unsigned props = alu_opcode_props[ins->op].props;
+   /* Handle dot products and things */
+   if (ins->type == TAG_ALU_4 && !ins->compact_branch) {
+      unsigned props = alu_opcode_props[ins->op].props;
 
-                unsigned channel_override = GET_CHANNEL_COUNT(props);
+      unsigned channel_override = GET_CHANNEL_COUNT(props);
 
-                if (channel_override)
-                        qmask = mask_of(channel_override);
-                else
-                        qmask = ins->mask;
-        }
+      if (channel_override)
+         qmask = mask_of(channel_override);
+      else
+         qmask = ins->mask;
+   }
 
-        return mir_bytemask_of_read_components_single(ins->swizzle[i], qmask,
-                nir_alu_type_get_type_size(ins->src_types[i]));
+   return mir_bytemask_of_read_components_single(
+      ins->swizzle[i], qmask, nir_alu_type_get_type_size(ins->src_types[i]));
 }
 
 uint16_t
 mir_bytemask_of_read_components(midgard_instruction *ins, unsigned node)
 {
-        uint16_t mask = 0;
+   uint16_t mask = 0;
 
-        if (node == ~0)
-                return 0;
+   if (node == ~0)
+      return 0;
 
-        mir_foreach_src(ins, i) {
-                if (ins->src[i] != node) continue;
-                mask |= mir_bytemask_of_read_components_index(ins, i);
-        }
+   mir_foreach_src(ins, i) {
+      if (ins->src[i] != node)
+         continue;
+      mask |= mir_bytemask_of_read_components_index(ins, i);
+   }
 
-        return mask;
+   return mask;
 }
 
 /* Register allocation occurs after instruction scheduling, which is fine until
@@ -335,94 +352,97 @@ mir_bytemask_of_read_components(midgard_instruction *ins, unsigned node)
 static midgard_bundle
 mir_bundle_for_op(compiler_context *ctx, midgard_instruction ins)
 {
-        midgard_instruction *u = mir_upload_ins(ctx, ins);
+   midgard_instruction *u = mir_upload_ins(ctx, ins);
 
-        midgard_bundle bundle = {
-                .tag = ins.type,
-                .instruction_count = 1,
-                .instructions = { u },
-        };
+   midgard_bundle bundle = {
+      .tag = ins.type,
+      .instruction_count = 1,
+      .instructions = {u},
+   };
 
-        if (bundle.tag == TAG_ALU_4) {
-                assert(OP_IS_MOVE(u->op));
-                u->unit = UNIT_VMUL;
+   if (bundle.tag == TAG_ALU_4) {
+      assert(OP_IS_MOVE(u->op));
+      u->unit = UNIT_VMUL;
 
-                size_t bytes_emitted = sizeof(uint32_t) + sizeof(midgard_reg_info) + sizeof(midgard_vector_alu);
-                bundle.padding = ~(bytes_emitted - 1) & 0xF;
-                bundle.control = ins.type | u->unit;
-        }
+      size_t bytes_emitted = sizeof(uint32_t) + sizeof(midgard_reg_info) +
+                             sizeof(midgard_vector_alu);
+      bundle.padding = ~(bytes_emitted - 1) & 0xF;
+      bundle.control = ins.type | u->unit;
+   }
 
-        return bundle;
+   return bundle;
 }
 
 static unsigned
 mir_bundle_idx_for_ins(midgard_instruction *tag, midgard_block *block)
 {
-        midgard_bundle *bundles =
-                (midgard_bundle *) block->bundles.data;
+   midgard_bundle *bundles = (midgard_bundle *)block->bundles.data;
 
-        size_t count = (block->bundles.size / sizeof(midgard_bundle));
+   size_t count = (block->bundles.size / sizeof(midgard_bundle));
 
-        for (unsigned i = 0; i < count; ++i) {
-                for (unsigned j = 0; j < bundles[i].instruction_count; ++j) {
-                        if (bundles[i].instructions[j] == tag)
-                                return i;
-                }
-        }
+   for (unsigned i = 0; i < count; ++i) {
+      for (unsigned j = 0; j < bundles[i].instruction_count; ++j) {
+         if (bundles[i].instructions[j] == tag)
+            return i;
+      }
+   }
 
-        mir_print_instruction(tag);
-        unreachable("Instruction not scheduled in block");
+   mir_print_instruction(tag);
+   unreachable("Instruction not scheduled in block");
 }
 
 midgard_instruction *
-mir_insert_instruction_before_scheduled(
-        compiler_context *ctx,
-        midgard_block *block,
-        midgard_instruction *tag,
-        midgard_instruction ins)
+mir_insert_instruction_before_scheduled(compiler_context *ctx,
+                                        midgard_block *block,
+                                        midgard_instruction *tag,
+                                        midgard_instruction ins)
 {
-        unsigned before = mir_bundle_idx_for_ins(tag, block);
-        size_t count = util_dynarray_num_elements(&block->bundles, midgard_bundle);
-        UNUSED void *unused = util_dynarray_grow(&block->bundles, midgard_bundle, 1);
+   unsigned before = mir_bundle_idx_for_ins(tag, block);
+   size_t count = util_dynarray_num_elements(&block->bundles, midgard_bundle);
+   UNUSED void *unused = util_dynarray_grow(&block->bundles, midgard_bundle, 1);
 
-        midgard_bundle *bundles = (midgard_bundle *) block->bundles.data;
-        memmove(bundles + before + 1, bundles + before, (count - before) * sizeof(midgard_bundle));
-        midgard_bundle *before_bundle = bundles + before + 1;
+   midgard_bundle *bundles = (midgard_bundle *)block->bundles.data;
+   memmove(bundles + before + 1, bundles + before,
+           (count - before) * sizeof(midgard_bundle));
+   midgard_bundle *before_bundle = bundles + before + 1;
 
-        midgard_bundle new = mir_bundle_for_op(ctx, ins);
-        memcpy(bundles + before, &new, sizeof(new));
+   midgard_bundle new = mir_bundle_for_op(ctx, ins);
+   memcpy(bundles + before, &new, sizeof(new));
 
-        list_addtail(&new.instructions[0]->link, &before_bundle->instructions[0]->link);
-        block->quadword_count += midgard_tag_props[new.tag].size;
+   list_addtail(&new.instructions[0]->link,
+                &before_bundle->instructions[0]->link);
+   block->quadword_count += midgard_tag_props[new.tag].size;
 
-        return new.instructions[0];
+   return new.instructions[0];
 }
 
 midgard_instruction *
-mir_insert_instruction_after_scheduled(
-        compiler_context *ctx,
-        midgard_block *block,
-        midgard_instruction *tag,
-        midgard_instruction ins)
+mir_insert_instruction_after_scheduled(compiler_context *ctx,
+                                       midgard_block *block,
+                                       midgard_instruction *tag,
+                                       midgard_instruction ins)
 {
-        /* We need to grow the bundles array to add our new bundle */
-        size_t count = util_dynarray_num_elements(&block->bundles, midgard_bundle);
-        UNUSED void *unused = util_dynarray_grow(&block->bundles, midgard_bundle, 1);
+   /* We need to grow the bundles array to add our new bundle */
+   size_t count = util_dynarray_num_elements(&block->bundles, midgard_bundle);
+   UNUSED void *unused = util_dynarray_grow(&block->bundles, midgard_bundle, 1);
 
-        /* Find the bundle that we want to insert after */
-        unsigned after = mir_bundle_idx_for_ins(tag, block);
+   /* Find the bundle that we want to insert after */
+   unsigned after = mir_bundle_idx_for_ins(tag, block);
 
-        /* All the bundles after that one, we move ahead by one */
-        midgard_bundle *bundles = (midgard_bundle *) block->bundles.data;
-        memmove(bundles + after + 2, bundles + after + 1, (count - after - 1) * sizeof(midgard_bundle));
-        midgard_bundle *after_bundle = bundles + after;
+   /* All the bundles after that one, we move ahead by one */
+   midgard_bundle *bundles = (midgard_bundle *)block->bundles.data;
+   memmove(bundles + after + 2, bundles + after + 1,
+           (count - after - 1) * sizeof(midgard_bundle));
+   midgard_bundle *after_bundle = bundles + after;
 
-        midgard_bundle new = mir_bundle_for_op(ctx, ins);
-        memcpy(bundles + after + 1, &new, sizeof(new));
-        list_add(&new.instructions[0]->link, &after_bundle->instructions[after_bundle->instruction_count - 1]->link);
-        block->quadword_count += midgard_tag_props[new.tag].size;
+   midgard_bundle new = mir_bundle_for_op(ctx, ins);
+   memcpy(bundles + after + 1, &new, sizeof(new));
+   list_add(
+      &new.instructions[0]->link,
+      &after_bundle->instructions[after_bundle->instruction_count - 1]->link);
+   block->quadword_count += midgard_tag_props[new.tag].size;
 
-        return new.instructions[0];
+   return new.instructions[0];
 }
 
 /* Flip the first-two arguments of a (binary) op. Currently ALU
@@ -431,32 +451,32 @@ mir_insert_instruction_after_scheduled(
 void
 mir_flip(midgard_instruction *ins)
 {
-        unsigned temp = ins->src[0];
-        ins->src[0] = ins->src[1];
-        ins->src[1] = temp;
+   unsigned temp = ins->src[0];
+   ins->src[0] = ins->src[1];
+   ins->src[1] = temp;
 
-        assert(ins->type == TAG_ALU_4);
+   assert(ins->type == TAG_ALU_4);
 
-        temp = ins->src_types[0];
-        ins->src_types[0] = ins->src_types[1];
-        ins->src_types[1] = temp;
+   temp = ins->src_types[0];
+   ins->src_types[0] = ins->src_types[1];
+   ins->src_types[1] = temp;
 
-        temp = ins->src_abs[0];
-        ins->src_abs[0] = ins->src_abs[1];
-        ins->src_abs[1] = temp;
+   temp = ins->src_abs[0];
+   ins->src_abs[0] = ins->src_abs[1];
+   ins->src_abs[1] = temp;
 
-        temp = ins->src_neg[0];
-        ins->src_neg[0] = ins->src_neg[1];
-        ins->src_neg[1] = temp;
+   temp = ins->src_neg[0];
+   ins->src_neg[0] = ins->src_neg[1];
+   ins->src_neg[1] = temp;
 
-        temp = ins->src_invert[0];
-        ins->src_invert[0] = ins->src_invert[1];
-        ins->src_invert[1] = temp;
+   temp = ins->src_invert[0];
+   ins->src_invert[0] = ins->src_invert[1];
+   ins->src_invert[1] = temp;
 
-        unsigned temp_swizzle[16];
-        memcpy(temp_swizzle, ins->swizzle[0], sizeof(ins->swizzle[0]));
-        memcpy(ins->swizzle[0], ins->swizzle[1], sizeof(ins->swizzle[0]));
-        memcpy(ins->swizzle[1], temp_swizzle, sizeof(ins->swizzle[0]));
+   unsigned temp_swizzle[16];
+   memcpy(temp_swizzle, ins->swizzle[0], sizeof(ins->swizzle[0]));
+   memcpy(ins->swizzle[0], ins->swizzle[1], sizeof(ins->swizzle[0]));
+   memcpy(ins->swizzle[1], temp_swizzle, sizeof(ins->swizzle[0]));
 }
 
 /* Before squashing, calculate ctx->temp_count just by observing the MIR */
@@ -464,15 +484,15 @@ mir_flip(midgard_instruction *ins)
 void
 mir_compute_temp_count(compiler_context *ctx)
 {
-        if (ctx->temp_count)
-                return;
+   if (ctx->temp_count)
+      return;
 
-        unsigned max_dest = 0;
+   unsigned max_dest = 0;
 
-        mir_foreach_instr_global(ctx, ins) {
-                if (ins->dest < SSA_FIXED_MINIMUM)
-                        max_dest = MAX2(max_dest, ins->dest + 1);
-        }
+   mir_foreach_instr_global(ctx, ins) {
+      if (ins->dest < SSA_FIXED_MINIMUM)
+         max_dest = MAX2(max_dest, ins->dest + 1);
+   }
 
-        ctx->temp_count = max_dest;
+   ctx->temp_count = max_dest;
 }
diff --git a/src/panfrost/midgard/mir_promote_uniforms.c b/src/panfrost/midgard/mir_promote_uniforms.c
index a6396749f48..c4909994712 100644
--- a/src/panfrost/midgard/mir_promote_uniforms.c
+++ b/src/panfrost/midgard/mir_promote_uniforms.c
@@ -24,9 +24,9 @@
  *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
  */
 
-#include "compiler.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
+#include "compiler.h"
 
 /* This pass promotes reads from UBOs to register-mapped uniforms.  This saves
  * both instructions and work register pressure, but it reduces the work
@@ -39,17 +39,14 @@
 static bool
 mir_is_ubo(midgard_instruction *ins)
 {
-        return (ins->type == TAG_LOAD_STORE_4) &&
-                (OP_IS_UBO_READ(ins->op));
+   return (ins->type == TAG_LOAD_STORE_4) && (OP_IS_UBO_READ(ins->op));
 }
 
 static bool
 mir_is_direct_aligned_ubo(midgard_instruction *ins)
 {
-        return mir_is_ubo(ins) &&
-                !(ins->constants.u32[0] & 0xF) &&
-                (ins->src[1] == ~0) &&
-                (ins->src[2] == ~0);
+   return mir_is_ubo(ins) && !(ins->constants.u32[0] & 0xF) &&
+          (ins->src[1] == ~0) && (ins->src[2] == ~0);
 }
 
 /* Represents use data for a single UBO */
@@ -57,38 +54,39 @@ mir_is_direct_aligned_ubo(midgard_instruction *ins)
 #define MAX_UBO_QWORDS (65536 / 16)
 
 struct mir_ubo_block {
-        BITSET_DECLARE(uses, MAX_UBO_QWORDS);
-        BITSET_DECLARE(pushed, MAX_UBO_QWORDS);
+   BITSET_DECLARE(uses, MAX_UBO_QWORDS);
+   BITSET_DECLARE(pushed, MAX_UBO_QWORDS);
 };
 
 struct mir_ubo_analysis {
-        /* Per block analysis */
-        unsigned nr_blocks;
-        struct mir_ubo_block *blocks;
+   /* Per block analysis */
+   unsigned nr_blocks;
+   struct mir_ubo_block *blocks;
 };
 
 static struct mir_ubo_analysis
 mir_analyze_ranges(compiler_context *ctx)
 {
-        struct mir_ubo_analysis res = {
-                .nr_blocks = ctx->nir->info.num_ubos + 1,
-        };
+   struct mir_ubo_analysis res = {
+      .nr_blocks = ctx->nir->info.num_ubos + 1,
+   };
 
-        res.blocks = calloc(res.nr_blocks, sizeof(struct mir_ubo_block));
+   res.blocks = calloc(res.nr_blocks, sizeof(struct mir_ubo_block));
 
-        mir_foreach_instr_global(ctx, ins) {
-                if (!mir_is_direct_aligned_ubo(ins)) continue;
+   mir_foreach_instr_global(ctx, ins) {
+      if (!mir_is_direct_aligned_ubo(ins))
+         continue;
 
-                unsigned ubo = midgard_unpack_ubo_index_imm(ins->load_store);
-                unsigned offset = ins->constants.u32[0] / 16;
+      unsigned ubo = midgard_unpack_ubo_index_imm(ins->load_store);
+      unsigned offset = ins->constants.u32[0] / 16;
 
-                assert(ubo < res.nr_blocks);
+      assert(ubo < res.nr_blocks);
 
-                if (offset < MAX_UBO_QWORDS)
-                        BITSET_SET(res.blocks[ubo].uses, offset);
-        }
+      if (offset < MAX_UBO_QWORDS)
+         BITSET_SET(res.blocks[ubo].uses, offset);
+   }
 
-        return res;
+   return res;
 }
 
 /* Select UBO words to push. A sophisticated implementation would consider the
@@ -96,32 +94,33 @@ mir_analyze_ranges(compiler_context *ctx)
  * sophisticated. Select from the last UBO first to prioritize sysvals. */
 
 static void
-mir_pick_ubo(struct panfrost_ubo_push *push, struct mir_ubo_analysis *analysis, unsigned max_qwords)
+mir_pick_ubo(struct panfrost_ubo_push *push, struct mir_ubo_analysis *analysis,
+             unsigned max_qwords)
 {
-        unsigned max_words = MIN2(PAN_MAX_PUSH, max_qwords * 4);
+   unsigned max_words = MIN2(PAN_MAX_PUSH, max_qwords * 4);
 
-        for (signed ubo = analysis->nr_blocks - 1; ubo >= 0; --ubo) {
-                struct mir_ubo_block *block = &analysis->blocks[ubo];
+   for (signed ubo = analysis->nr_blocks - 1; ubo >= 0; --ubo) {
+      struct mir_ubo_block *block = &analysis->blocks[ubo];
 
-                unsigned vec4;
-                BITSET_FOREACH_SET(vec4, block->uses, MAX_UBO_QWORDS) {
-                        /* Don't push more than possible */
-                        if (push->count > max_words - 4)
-                                return;
+      unsigned vec4;
+      BITSET_FOREACH_SET(vec4, block->uses, MAX_UBO_QWORDS) {
+         /* Don't push more than possible */
+         if (push->count > max_words - 4)
+            return;
 
-                        for (unsigned offs = 0; offs < 4; ++offs) {
-                                struct panfrost_ubo_word word = {
-                                        .ubo = ubo,
-                                        .offset = (vec4 * 16) + (offs * 4),
-                                };
+         for (unsigned offs = 0; offs < 4; ++offs) {
+            struct panfrost_ubo_word word = {
+               .ubo = ubo,
+               .offset = (vec4 * 16) + (offs * 4),
+            };
 
-                                push->words[push->count++] = word;
-                        }
+            push->words[push->count++] = word;
+         }
 
-                        /* Mark it as pushed so we can rewrite */
-                        BITSET_SET(block->pushed, vec4);
-                }
-        }
+         /* Mark it as pushed so we can rewrite */
+         BITSET_SET(block->pushed, vec4);
+      }
+   }
 }
 
 #if 0
@@ -154,80 +153,81 @@ mir_dump_ubo_analysis(struct mir_ubo_analysis *res)
 static unsigned
 mir_promoteable_uniform_count(struct mir_ubo_analysis *analysis)
 {
-        unsigned count = 0;
+   unsigned count = 0;
 
-        for (unsigned i = 0; i < analysis->nr_blocks; ++i) {
-                BITSET_WORD *uses = analysis->blocks[i].uses;
+   for (unsigned i = 0; i < analysis->nr_blocks; ++i) {
+      BITSET_WORD *uses = analysis->blocks[i].uses;
 
-                for (unsigned w = 0; w < BITSET_WORDS(MAX_UBO_QWORDS); ++w)
-                        count += util_bitcount(uses[w]);
-        }
+      for (unsigned w = 0; w < BITSET_WORDS(MAX_UBO_QWORDS); ++w)
+         count += util_bitcount(uses[w]);
+   }
 
-        return count;
+   return count;
 }
 
 static unsigned
 mir_count_live(uint16_t *live, unsigned temp_count)
 {
-        unsigned count = 0;
+   unsigned count = 0;
 
-        for (unsigned i = 0; i < temp_count; ++i)
-                count += util_bitcount(live[i]);
+   for (unsigned i = 0; i < temp_count; ++i)
+      count += util_bitcount(live[i]);
 
-        return count;
+   return count;
 }
 
 static unsigned
 mir_estimate_pressure(compiler_context *ctx)
 {
-        mir_invalidate_liveness(ctx);
-        mir_compute_liveness(ctx);
+   mir_invalidate_liveness(ctx);
+   mir_compute_liveness(ctx);
 
-        unsigned max_live = 0;
+   unsigned max_live = 0;
 
-        mir_foreach_block(ctx, _block) {
-                midgard_block *block = (midgard_block *) _block;
-                uint16_t *live = mem_dup(block->base.live_out, ctx->temp_count * sizeof(uint16_t));
+   mir_foreach_block(ctx, _block) {
+      midgard_block *block = (midgard_block *)_block;
+      uint16_t *live =
+         mem_dup(block->base.live_out, ctx->temp_count * sizeof(uint16_t));
 
-                mir_foreach_instr_in_block_rev(block, ins) {
-                        unsigned count = mir_count_live(live, ctx->temp_count);
-                        max_live = MAX2(max_live, count);
-                        mir_liveness_ins_update(live, ins, ctx->temp_count);
-                }
+      mir_foreach_instr_in_block_rev(block, ins) {
+         unsigned count = mir_count_live(live, ctx->temp_count);
+         max_live = MAX2(max_live, count);
+         mir_liveness_ins_update(live, ins, ctx->temp_count);
+      }
 
-                free(live);
-        }
+      free(live);
+   }
 
-        return DIV_ROUND_UP(max_live, 16);
+   return DIV_ROUND_UP(max_live, 16);
 }
 
 static unsigned
 mir_work_heuristic(compiler_context *ctx, struct mir_ubo_analysis *analysis)
 {
-        unsigned uniform_count = mir_promoteable_uniform_count(analysis);
+   unsigned uniform_count = mir_promoteable_uniform_count(analysis);
 
-        /* If there are 8 or fewer uniforms, it doesn't matter what we do, so
-         * allow as many work registers as needed */
+   /* If there are 8 or fewer uniforms, it doesn't matter what we do, so
+    * allow as many work registers as needed */
 
-        if (uniform_count <= 8)
-                return 16;
+   if (uniform_count <= 8)
+      return 16;
 
-        /* Otherwise, estimate the register pressure */
+   /* Otherwise, estimate the register pressure */
 
-        unsigned pressure = mir_estimate_pressure(ctx);
+   unsigned pressure = mir_estimate_pressure(ctx);
 
-        /* Prioritize not spilling above all else. The relation between the
-         * pressure estimate and the actual register pressure is a little
-         * murkier than we might like (due to scheduling, pipeline registers,
-         * failure to pack vector registers, load/store registers, texture
-         * registers...), hence why this is a heuristic parameter */
+   /* Prioritize not spilling above all else. The relation between the
+    * pressure estimate and the actual register pressure is a little
+    * murkier than we might like (due to scheduling, pipeline registers,
+    * failure to pack vector registers, load/store registers, texture
+    * registers...), hence why this is a heuristic parameter */
 
-        if (pressure > 6)
-                return 16;
+   if (pressure > 6)
+      return 16;
 
-        /* If there's no chance of spilling, prioritize UBOs and thread count */
+   /* If there's no chance of spilling, prioritize UBOs and thread count */
 
-        return 8;
+   return 8;
 }
 
 /* Bitset of indices that will be used as a special register -- inputs to a
@@ -237,111 +237,113 @@ mir_work_heuristic(compiler_context *ctx, struct mir_ubo_analysis *analysis)
 static BITSET_WORD *
 mir_special_indices(compiler_context *ctx)
 {
-        mir_compute_temp_count(ctx);
-        BITSET_WORD *bset = calloc(BITSET_WORDS(ctx->temp_count), sizeof(BITSET_WORD));
+   mir_compute_temp_count(ctx);
+   BITSET_WORD *bset =
+      calloc(BITSET_WORDS(ctx->temp_count), sizeof(BITSET_WORD));
 
-        mir_foreach_instr_global(ctx, ins) {
-                /* Look for special instructions */
-                bool is_ldst = ins->type == TAG_LOAD_STORE_4;
-                bool is_tex = ins->type == TAG_TEXTURE_4;
-                bool is_writeout = ins->compact_branch && ins->writeout;
+   mir_foreach_instr_global(ctx, ins) {
+      /* Look for special instructions */
+      bool is_ldst = ins->type == TAG_LOAD_STORE_4;
+      bool is_tex = ins->type == TAG_TEXTURE_4;
+      bool is_writeout = ins->compact_branch && ins->writeout;
 
-                if (!(is_ldst || is_tex || is_writeout))
-                        continue;
+      if (!(is_ldst || is_tex || is_writeout))
+         continue;
 
-                /* Anything read by a special instruction is itself special */
-                mir_foreach_src(ins, i) {
-                        unsigned idx = ins->src[i];
+      /* Anything read by a special instruction is itself special */
+      mir_foreach_src(ins, i) {
+         unsigned idx = ins->src[i];
 
-                        if (idx < ctx->temp_count)
-                                BITSET_SET(bset, idx);
-                }
-        }
+         if (idx < ctx->temp_count)
+            BITSET_SET(bset, idx);
+      }
+   }
 
-        return bset;
+   return bset;
 }
 
 void
 midgard_promote_uniforms(compiler_context *ctx)
 {
-        if (ctx->inputs->no_ubo_to_push) {
-                /* If nothing is pushed, all UBOs need to be uploaded
-                 * conventionally */
-                ctx->ubo_mask = ~0;
-                return;
-        }
+   if (ctx->inputs->no_ubo_to_push) {
+      /* If nothing is pushed, all UBOs need to be uploaded
+       * conventionally */
+      ctx->ubo_mask = ~0;
+      return;
+   }
 
-        struct mir_ubo_analysis analysis = mir_analyze_ranges(ctx);
+   struct mir_ubo_analysis analysis = mir_analyze_ranges(ctx);
 
-        unsigned work_count = mir_work_heuristic(ctx, &analysis);
-        unsigned promoted_count = 24 - work_count;
+   unsigned work_count = mir_work_heuristic(ctx, &analysis);
+   unsigned promoted_count = 24 - work_count;
 
-        /* Ensure we are 16 byte aligned to avoid underallocations */
-        mir_pick_ubo(&ctx->info->push, &analysis, promoted_count);
-        ctx->info->push.count = ALIGN_POT(ctx->info->push.count, 4);
+   /* Ensure we are 16 byte aligned to avoid underallocations */
+   mir_pick_ubo(&ctx->info->push, &analysis, promoted_count);
+   ctx->info->push.count = ALIGN_POT(ctx->info->push.count, 4);
 
-        /* First, figure out special indices a priori so we don't recompute a lot */
-        BITSET_WORD *special = mir_special_indices(ctx);
+   /* First, figure out special indices a priori so we don't recompute a lot */
+   BITSET_WORD *special = mir_special_indices(ctx);
 
-        ctx->ubo_mask = 0;
+   ctx->ubo_mask = 0;
 
-        mir_foreach_instr_global_safe(ctx, ins) {
-                if (!mir_is_ubo(ins)) continue;
+   mir_foreach_instr_global_safe(ctx, ins) {
+      if (!mir_is_ubo(ins))
+         continue;
 
-                unsigned ubo = midgard_unpack_ubo_index_imm(ins->load_store);
-                unsigned qword = ins->constants.u32[0] / 16;
+      unsigned ubo = midgard_unpack_ubo_index_imm(ins->load_store);
+      unsigned qword = ins->constants.u32[0] / 16;
 
-                if (!mir_is_direct_aligned_ubo(ins)) {
-                        if (ins->src[1] == ~0)
-                                ctx->ubo_mask |= BITSET_BIT(ubo);
-                        else
-                                ctx->ubo_mask = ~0;
+      if (!mir_is_direct_aligned_ubo(ins)) {
+         if (ins->src[1] == ~0)
+            ctx->ubo_mask |= BITSET_BIT(ubo);
+         else
+            ctx->ubo_mask = ~0;
 
-                        continue;
-                }
+         continue;
+      }
 
-                /* Check if we decided to push this */
-                assert(ubo < analysis.nr_blocks);
-                if (!BITSET_TEST(analysis.blocks[ubo].pushed, qword)) {
-                        ctx->ubo_mask |= BITSET_BIT(ubo);
-                        continue;
-                }
+      /* Check if we decided to push this */
+      assert(ubo < analysis.nr_blocks);
+      if (!BITSET_TEST(analysis.blocks[ubo].pushed, qword)) {
+         ctx->ubo_mask |= BITSET_BIT(ubo);
+         continue;
+      }
 
-                /* Find where we pushed to, TODO: unaligned pushes to pack */
-                unsigned base = pan_lookup_pushed_ubo(&ctx->info->push, ubo, qword * 16);
-                assert((base & 0x3) == 0);
+      /* Find where we pushed to, TODO: unaligned pushes to pack */
+      unsigned base = pan_lookup_pushed_ubo(&ctx->info->push, ubo, qword * 16);
+      assert((base & 0x3) == 0);
 
-                unsigned address = base / 4;
-                unsigned uniform_reg = 23 - address;
+      unsigned address = base / 4;
+      unsigned uniform_reg = 23 - address;
 
-                /* Should've taken into account when pushing */
-                assert(address < promoted_count);
-                unsigned promoted = SSA_FIXED_REGISTER(uniform_reg);
+      /* Should've taken into account when pushing */
+      assert(address < promoted_count);
+      unsigned promoted = SSA_FIXED_REGISTER(uniform_reg);
 
-                /* We do need the move for safety for a non-SSA dest, or if
-                 * we're being fed into a special class */
+      /* We do need the move for safety for a non-SSA dest, or if
+       * we're being fed into a special class */
 
-                bool needs_move = ins->dest & PAN_IS_REG || ins->dest == ctx->blend_src1;
+      bool needs_move = ins->dest & PAN_IS_REG || ins->dest == ctx->blend_src1;
 
-                if (ins->dest < ctx->temp_count)
-                        needs_move |= BITSET_TEST(special, ins->dest);
+      if (ins->dest < ctx->temp_count)
+         needs_move |= BITSET_TEST(special, ins->dest);
 
-                if (needs_move) {
-                        unsigned type_size = nir_alu_type_get_type_size(ins->dest_type);
-                        midgard_instruction mov = v_mov(promoted, ins->dest);
-                        mov.dest_type = nir_type_uint | type_size;
-                        mov.src_types[1] = mov.dest_type;
+      if (needs_move) {
+         unsigned type_size = nir_alu_type_get_type_size(ins->dest_type);
+         midgard_instruction mov = v_mov(promoted, ins->dest);
+         mov.dest_type = nir_type_uint | type_size;
+         mov.src_types[1] = mov.dest_type;
 
-                        uint16_t rounded = mir_round_bytemask_up(mir_bytemask(ins), type_size);
-                        mir_set_bytemask(&mov, rounded);
-                        mir_insert_instruction_before(ctx, ins, mov);
-                } else {
-                        mir_rewrite_index_src(ctx, ins->dest, promoted);
-                }
+         uint16_t rounded = mir_round_bytemask_up(mir_bytemask(ins), type_size);
+         mir_set_bytemask(&mov, rounded);
+         mir_insert_instruction_before(ctx, ins, mov);
+      } else {
+         mir_rewrite_index_src(ctx, ins->dest, promoted);
+      }
 
-                mir_remove_instruction(ins);
-        }
+      mir_remove_instruction(ins);
+   }
 
-        free(special);
-        free(analysis.blocks);
+   free(special);
+   free(analysis.blocks);
 }
diff --git a/src/panfrost/midgard/mir_squeeze.c b/src/panfrost/midgard/mir_squeeze.c
index aa230f836db..6eae34ef108 100644
--- a/src/panfrost/midgard/mir_squeeze.c
+++ b/src/panfrost/midgard/mir_squeeze.c
@@ -31,25 +31,23 @@
 
 static unsigned
 find_or_allocate_temp(compiler_context *ctx, struct hash_table_u64 *map,
-                unsigned hash)
+                      unsigned hash)
 {
-        if (hash >= SSA_FIXED_MINIMUM)
-                return hash;
+   if (hash >= SSA_FIXED_MINIMUM)
+      return hash;
 
-        unsigned temp = (uintptr_t) _mesa_hash_table_u64_search(
-                                map, hash + 1);
+   unsigned temp = (uintptr_t)_mesa_hash_table_u64_search(map, hash + 1);
 
-        if (temp)
-                return temp - 1;
+   if (temp)
+      return temp - 1;
 
-        /* If no temp is find, allocate one */
-        temp = ctx->temp_count++;
-        ctx->max_hash = MAX2(ctx->max_hash, hash);
+   /* If no temp is find, allocate one */
+   temp = ctx->temp_count++;
+   ctx->max_hash = MAX2(ctx->max_hash, hash);
 
-        _mesa_hash_table_u64_insert(map,
-                                    hash + 1, (void *) ((uintptr_t) temp + 1));
+   _mesa_hash_table_u64_insert(map, hash + 1, (void *)((uintptr_t)temp + 1));
 
-        return temp;
+   return temp;
 }
 
 /* Reassigns numbering to get rid of gaps in the indices and to prioritize
@@ -58,30 +56,30 @@ find_or_allocate_temp(compiler_context *ctx, struct hash_table_u64 *map,
 void
 mir_squeeze_index(compiler_context *ctx)
 {
-        struct hash_table_u64 *map = _mesa_hash_table_u64_create(NULL);
+   struct hash_table_u64 *map = _mesa_hash_table_u64_create(NULL);
 
-        /* Reset */
-        ctx->temp_count = 0;
+   /* Reset */
+   ctx->temp_count = 0;
 
-        /* We need to prioritize texture registers on older GPUs so we don't
-         * fail RA trying to assign to work registers r0/r1 when a work
-         * register is already there */
+   /* We need to prioritize texture registers on older GPUs so we don't
+    * fail RA trying to assign to work registers r0/r1 when a work
+    * register is already there */
 
-        mir_foreach_instr_global(ctx, ins) {
-                if (ins->type == TAG_TEXTURE_4)
-                        ins->dest = find_or_allocate_temp(ctx, map, ins->dest);
-        }
+   mir_foreach_instr_global(ctx, ins) {
+      if (ins->type == TAG_TEXTURE_4)
+         ins->dest = find_or_allocate_temp(ctx, map, ins->dest);
+   }
 
-        mir_foreach_instr_global(ctx, ins) {
-                if (ins->type != TAG_TEXTURE_4)
-                        ins->dest = find_or_allocate_temp(ctx, map, ins->dest);
+   mir_foreach_instr_global(ctx, ins) {
+      if (ins->type != TAG_TEXTURE_4)
+         ins->dest = find_or_allocate_temp(ctx, map, ins->dest);
 
-                for (unsigned i = 0; i < ARRAY_SIZE(ins->src); ++i)
-                        ins->src[i] = find_or_allocate_temp(ctx, map, ins->src[i]);
-        }
+      for (unsigned i = 0; i < ARRAY_SIZE(ins->src); ++i)
+         ins->src[i] = find_or_allocate_temp(ctx, map, ins->src[i]);
+   }
 
-        ctx->blend_input = find_or_allocate_temp(ctx, map, ctx->blend_input);
-        ctx->blend_src1 = find_or_allocate_temp(ctx, map, ctx->blend_src1);
+   ctx->blend_input = find_or_allocate_temp(ctx, map, ctx->blend_input);
+   ctx->blend_src1 = find_or_allocate_temp(ctx, map, ctx->blend_src1);
 
-        _mesa_hash_table_u64_destroy(map);
+   _mesa_hash_table_u64_destroy(map);
 }
diff --git a/src/panfrost/midgard/nir_fuse_io_16.c b/src/panfrost/midgard/nir_fuse_io_16.c
index b97129f1c33..f4b052ea438 100644
--- a/src/panfrost/midgard/nir_fuse_io_16.c
+++ b/src/panfrost/midgard/nir_fuse_io_16.c
@@ -50,22 +50,24 @@ nir_fuse_io_16(nir_shader *shader)
    bool progress = false;
 
    nir_foreach_function(function, shader) {
-      if (!function->impl) continue;
+      if (!function->impl)
+         continue;
 
       nir_builder b;
       nir_builder_init(&b, function->impl);
 
       nir_foreach_block(block, function->impl) {
          nir_foreach_instr_safe(instr, block) {
-            if (instr->type != nir_instr_type_intrinsic) continue;
+            if (instr->type != nir_instr_type_intrinsic)
+               continue;
 
             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 
             if (intr->intrinsic != nir_intrinsic_load_interpolated_input)
-                    continue;
+               continue;
 
             if (nir_dest_bit_size(intr->dest) != 32)
-                    continue;
+               continue;
 
             /* We swizzle at a 32-bit level so need a multiple of 2. We could
              * do a bit better and handle even components though */
@@ -101,8 +103,8 @@ nir_fuse_io_16(nir_shader *shader)
          }
       }
 
-      nir_metadata_preserve(function->impl, nir_metadata_block_index | nir_metadata_dominance);
-
+      nir_metadata_preserve(function->impl,
+                            nir_metadata_block_index | nir_metadata_dominance);
    }
 
    return progress;
diff --git a/src/panfrost/perf/pan_perf.c b/src/panfrost/perf/pan_perf.c
index c543d7f0dbb..c35d0f87c02 100644
--- a/src/panfrost/perf/pan_perf.c
+++ b/src/panfrost/perf/pan_perf.c
@@ -23,12 +23,12 @@
 
 #include "pan_perf.h"
 
-#include <pan_perf_metrics.h>
-#include <lib/pan_device.h>
 #include <drm-uapi/panfrost_drm.h>
+#include <lib/pan_device.h>
+#include <pan_perf_metrics.h>
 
 #define PAN_COUNTERS_PER_CATEGORY 64
-#define PAN_SHADER_CORE_INDEX 3
+#define PAN_SHADER_CORE_INDEX     3
 
 uint32_t
 panfrost_perf_counter_read(const struct panfrost_perf_counter *counter,
@@ -53,12 +53,12 @@ panfrost_perf_counter_read(const struct panfrost_perf_counter *counter,
 static const struct panfrost_perf_config *
 panfrost_lookup_counters(const char *name)
 {
-        for (unsigned i = 0; i < ARRAY_SIZE(panfrost_perf_configs); ++i) {
-                if (strcmp(panfrost_perf_configs[i]->name, name) == 0)
-                        return panfrost_perf_configs[i];
-        }
+   for (unsigned i = 0; i < ARRAY_SIZE(panfrost_perf_configs); ++i) {
+      if (strcmp(panfrost_perf_configs[i]->name, name) == 0)
+         return panfrost_perf_configs[i];
+   }
 
-        return NULL;
+   return NULL;
 }
 
 void
@@ -67,12 +67,12 @@ panfrost_perf_init(struct panfrost_perf *perf, struct panfrost_device *dev)
    perf->dev = dev;
 
    if (dev->model == NULL)
-           unreachable("Invalid GPU ID");
+      unreachable("Invalid GPU ID");
 
    perf->cfg = panfrost_lookup_counters(dev->model->performance_counters);
 
    if (perf->cfg == NULL)
-           unreachable("Performance counters missing!");
+      unreachable("Performance counters missing!");
 
    // Generally counter blocks are laid out in the following order:
    // Job manager, tiler, one or more L2 caches, and one or more shader cores.
@@ -92,7 +92,8 @@ static int
 panfrost_perf_query(struct panfrost_perf *perf, uint32_t enable)
 {
    struct drm_panfrost_perfcnt_enable perfcnt_enable = {enable, 0};
-   return drmIoctl(perf->dev->fd, DRM_IOCTL_PANFROST_PERFCNT_ENABLE, &perfcnt_enable);
+   return drmIoctl(perf->dev->fd, DRM_IOCTL_PANFROST_PERFCNT_ENABLE,
+                   &perfcnt_enable);
 }
 
 int
@@ -110,7 +111,10 @@ panfrost_perf_disable(struct panfrost_perf *perf)
 int
 panfrost_perf_dump(struct panfrost_perf *perf)
 {
-   // Dump performance counter values to the memory buffer pointed to by counter_values
-   struct drm_panfrost_perfcnt_dump perfcnt_dump = {(uint64_t)(uintptr_t)perf->counter_values};
-   return drmIoctl(perf->dev->fd, DRM_IOCTL_PANFROST_PERFCNT_DUMP, &perfcnt_dump);
+   // Dump performance counter values to the memory buffer pointed to by
+   // counter_values
+   struct drm_panfrost_perfcnt_dump perfcnt_dump = {
+      (uint64_t)(uintptr_t)perf->counter_values};
+   return drmIoctl(perf->dev->fd, DRM_IOCTL_PANFROST_PERFCNT_DUMP,
+                   &perfcnt_dump);
 }
diff --git a/src/panfrost/perf/pan_perf.h b/src/panfrost/perf/pan_perf.h
index 592dce71cea..4a40f2cc393 100644
--- a/src/panfrost/perf/pan_perf.h
+++ b/src/panfrost/perf/pan_perf.h
@@ -31,7 +31,7 @@ extern "C" {
 #endif
 
 #define PAN_PERF_MAX_CATEGORIES 4
-#define PAN_PERF_MAX_COUNTERS 64
+#define PAN_PERF_MAX_COUNTERS   64
 
 struct panfrost_device;
 struct panfrost_perf_category;
@@ -85,7 +85,7 @@ struct panfrost_perf_config {
 struct panfrost_perf {
    struct panfrost_device *dev;
 
-   const struct panfrost_perf_config* cfg;
+   const struct panfrost_perf_config *cfg;
 
    // Memory where to dump counter values
    uint32_t *counter_values;
@@ -95,21 +95,17 @@ struct panfrost_perf {
    unsigned category_offset[PAN_PERF_MAX_CATEGORIES];
 };
 
-uint32_t
-panfrost_perf_counter_read(const struct panfrost_perf_counter *counter,
-            const struct panfrost_perf *perf);
+uint32_t panfrost_perf_counter_read(const struct panfrost_perf_counter *counter,
+                                    const struct panfrost_perf *perf);
 
-void
-panfrost_perf_init(struct panfrost_perf *perf, struct panfrost_device *dev);
+void panfrost_perf_init(struct panfrost_perf *perf,
+                        struct panfrost_device *dev);
 
-int
-panfrost_perf_enable(struct panfrost_perf *perf);
+int panfrost_perf_enable(struct panfrost_perf *perf);
 
-int
-panfrost_perf_disable(struct panfrost_perf *perf);
+int panfrost_perf_disable(struct panfrost_perf *perf);
 
-int
-panfrost_perf_dump(struct panfrost_perf *perf);
+int panfrost_perf_dump(struct panfrost_perf *perf);
 
 #if defined(__cplusplus)
 } // extern "C"
diff --git a/src/panfrost/perf/quick.c b/src/panfrost/perf/quick.c
index 286c7263f72..56513322306 100644
--- a/src/panfrost/perf/quick.c
+++ b/src/panfrost/perf/quick.c
@@ -2,51 +2,55 @@
 #include <lib/pan_device.h>
 #include "pan_perf.h"
 
-int main(void) {
-        int fd = drmOpenWithType("panfrost", NULL, DRM_NODE_RENDER);
+int
+main(void)
+{
+   int fd = drmOpenWithType("panfrost", NULL, DRM_NODE_RENDER);
 
-        if (fd < 0) {
-                fprintf(stderr, "No panfrost device\n");
-                exit(1);
-        }
+   if (fd < 0) {
+      fprintf(stderr, "No panfrost device\n");
+      exit(1);
+   }
 
-        void *ctx = ralloc_context(NULL);
-        struct panfrost_perf *perf = rzalloc(ctx, struct panfrost_perf);
+   void *ctx = ralloc_context(NULL);
+   struct panfrost_perf *perf = rzalloc(ctx, struct panfrost_perf);
 
-        struct panfrost_device dev = {};
-        panfrost_open_device(ctx, fd, &dev);
+   struct panfrost_device dev = {};
+   panfrost_open_device(ctx, fd, &dev);
 
-        panfrost_perf_init(perf, &dev);
-        int ret = panfrost_perf_enable(perf);
-        
-        if (ret < 0) {
-                fprintf(stderr, "failed to enable counters (%d)\n", ret);
-                fprintf(stderr, "try `# echo Y > /sys/module/panfrost/parameters/unstable_ioctls`\n");
+   panfrost_perf_init(perf, &dev);
+   int ret = panfrost_perf_enable(perf);
 
-                exit(1);
-        }
+   if (ret < 0) {
+      fprintf(stderr, "failed to enable counters (%d)\n", ret);
+      fprintf(
+         stderr,
+         "try `# echo Y > /sys/module/panfrost/parameters/unstable_ioctls`\n");
 
-        sleep(1);
+      exit(1);
+   }
 
-        panfrost_perf_dump(perf);
+   sleep(1);
 
-        for (unsigned i = 0; i < perf->cfg->n_categories; ++i) {
-                const struct panfrost_perf_category *cat = &perf->cfg->categories[i];
-                printf("%s\n", cat->name);
+   panfrost_perf_dump(perf);
 
-                for (unsigned j = 0; j < cat->n_counters; ++j) {
-                        const struct panfrost_perf_counter *ctr = &cat->counters[j];
-                        uint32_t val = panfrost_perf_counter_read(ctr, perf);
-                        printf("%s (%s): %u\n", ctr->name, ctr->symbol_name, val);
-                }
+   for (unsigned i = 0; i < perf->cfg->n_categories; ++i) {
+      const struct panfrost_perf_category *cat = &perf->cfg->categories[i];
+      printf("%s\n", cat->name);
 
-                printf("\n");
-        }
+      for (unsigned j = 0; j < cat->n_counters; ++j) {
+         const struct panfrost_perf_counter *ctr = &cat->counters[j];
+         uint32_t val = panfrost_perf_counter_read(ctr, perf);
+         printf("%s (%s): %u\n", ctr->name, ctr->symbol_name, val);
+      }
 
-        if (panfrost_perf_disable(perf) < 0) {
-                fprintf(stderr, "failed to disable counters\n");
-                exit(1);
-        }
+      printf("\n");
+   }
 
-        panfrost_close_device(&dev);
+   if (panfrost_perf_disable(perf) < 0) {
+      fprintf(stderr, "failed to disable counters\n");
+      exit(1);
+   }
+
+   panfrost_close_device(&dev);
 }
diff --git a/src/panfrost/shared/pan_minmax_cache.c b/src/panfrost/shared/pan_minmax_cache.c
index be6f173893a..1da7ee51e1e 100644
--- a/src/panfrost/shared/pan_minmax_cache.c
+++ b/src/panfrost/shared/pan_minmax_cache.c
@@ -41,50 +41,51 @@
 #include "pan_minmax_cache.h"
 
 bool
-panfrost_minmax_cache_get(struct panfrost_minmax_cache *cache, unsigned start, unsigned count,
-                     unsigned *min_index, unsigned *max_index)
+panfrost_minmax_cache_get(struct panfrost_minmax_cache *cache, unsigned start,
+                          unsigned count, unsigned *min_index,
+                          unsigned *max_index)
 {
-        uint64_t ht_key = (((uint64_t)count) << 32) | start;
-        bool found = false;
+   uint64_t ht_key = (((uint64_t)count) << 32) | start;
+   bool found = false;
 
-        if (!cache)
-           return false;
+   if (!cache)
+      return false;
 
-        for (unsigned i = 0; i < cache->size; ++i) {
-                if (cache->keys[i] == ht_key) {
-                        uint64_t hit = cache->values[i];
+   for (unsigned i = 0; i < cache->size; ++i) {
+      if (cache->keys[i] == ht_key) {
+         uint64_t hit = cache->values[i];
 
-                        *min_index = hit & 0xffffffff;
-                        *max_index = hit >> 32;
-                        found = true;
-                        break;
-                }
-        }
+         *min_index = hit & 0xffffffff;
+         *max_index = hit >> 32;
+         found = true;
+         break;
+      }
+   }
 
-        return found;
+   return found;
 }
 
 void
-panfrost_minmax_cache_add(struct panfrost_minmax_cache *cache, unsigned start, unsigned count,
-                     unsigned min_index, unsigned max_index)
+panfrost_minmax_cache_add(struct panfrost_minmax_cache *cache, unsigned start,
+                          unsigned count, unsigned min_index,
+                          unsigned max_index)
 {
-        uint64_t ht_key = (((uint64_t)count) << 32) | start;
-        uint64_t value = min_index | (((uint64_t)max_index) << 32);
-        unsigned index = 0;
+   uint64_t ht_key = (((uint64_t)count) << 32) | start;
+   uint64_t value = min_index | (((uint64_t)max_index) << 32);
+   unsigned index = 0;
 
-        if (!cache)
-                return;
+   if (!cache)
+      return;
 
-        if (cache->size == PANFROST_MINMAX_SIZE) {
-                index = cache->index++;
-                cache->index = cache->index % PANFROST_MINMAX_SIZE;
-        } else {
-                index = cache->size++;
-        }
-
-        cache->keys[index] =  ht_key;
-        cache->values[index] = value;
+   if (cache->size == PANFROST_MINMAX_SIZE) {
+      index = cache->index++;
+      cache->index = cache->index % PANFROST_MINMAX_SIZE;
+   } else {
+      index = cache->size++;
+   }
 
+   cache->keys[index] = ht_key;
+   cache->values[index] = value;
 }
 
 /* If we've been caching min/max indices and we update the index
@@ -92,32 +93,34 @@ panfrost_minmax_cache_add(struct panfrost_minmax_cache *cache, unsigned start, u
  * what we've written, and throw out invalid entries. */
 
 void
-panfrost_minmax_cache_invalidate(struct panfrost_minmax_cache *cache, struct pipe_transfer *transfer)
+panfrost_minmax_cache_invalidate(struct panfrost_minmax_cache *cache,
+                                 struct pipe_transfer *transfer)
 {
-        /* Ensure there is a cache to invalidate and a write */
-        if (!cache)
-                return;
+   /* Ensure there is a cache to invalidate and a write */
+   if (!cache)
+      return;
 
-        if (!(transfer->usage & PIPE_MAP_WRITE))
-                return;
+   if (!(transfer->usage & PIPE_MAP_WRITE))
+      return;
 
-        unsigned valid_count = 0;
+   unsigned valid_count = 0;
 
-        for (unsigned i = 0; i < cache->size; ++i) {
-                uint64_t key = cache->keys[i];
+   for (unsigned i = 0; i < cache->size; ++i) {
+      uint64_t key = cache->keys[i];
 
-                uint32_t start = key & 0xffffffff;
-                uint32_t count = key >> 32;
+      uint32_t start = key & 0xffffffff;
+      uint32_t count = key >> 32;
 
-                /* 1D range intersection */
-                bool invalid = MAX2(transfer->box.x, start) < MIN2(transfer->box.x + transfer->box.width, start + count);
-                if (!invalid) {
-                        cache->keys[valid_count] = key;
-                        cache->values[valid_count] = cache->values[i];
-                        valid_count++;
-                }
-        }
+      /* 1D range intersection */
+      bool invalid = MAX2(transfer->box.x, start) <
+                     MIN2(transfer->box.x + transfer->box.width, start + count);
+      if (!invalid) {
+         cache->keys[valid_count] = key;
+         cache->values[valid_count] = cache->values[i];
+         valid_count++;
+      }
+   }
 
-        cache->size = valid_count;
-        cache->index = 0;
+   cache->size = valid_count;
+   cache->index = 0;
 }
diff --git a/src/panfrost/shared/pan_minmax_cache.h b/src/panfrost/shared/pan_minmax_cache.h
index fe264370ef3..651e6436cd6 100644
--- a/src/panfrost/shared/pan_minmax_cache.h
+++ b/src/panfrost/shared/pan_minmax_cache.h
@@ -32,21 +32,21 @@
 #define PANFROST_MINMAX_SIZE 64
 
 struct panfrost_minmax_cache {
-        uint64_t keys[PANFROST_MINMAX_SIZE];
-        uint64_t values[PANFROST_MINMAX_SIZE];
-        unsigned size;
-        unsigned index;
+   uint64_t keys[PANFROST_MINMAX_SIZE];
+   uint64_t values[PANFROST_MINMAX_SIZE];
+   unsigned size;
+   unsigned index;
 };
 
-bool
-panfrost_minmax_cache_get(struct panfrost_minmax_cache *cache, unsigned start, unsigned count,
-                     unsigned *min_index, unsigned *max_index);
+bool panfrost_minmax_cache_get(struct panfrost_minmax_cache *cache,
+                               unsigned start, unsigned count,
+                               unsigned *min_index, unsigned *max_index);
 
-void
-panfrost_minmax_cache_add(struct panfrost_minmax_cache *cache, unsigned start, unsigned count,
-                     unsigned min_index, unsigned max_index);
+void panfrost_minmax_cache_add(struct panfrost_minmax_cache *cache,
+                               unsigned start, unsigned count,
+                               unsigned min_index, unsigned max_index);
 
-void
-panfrost_minmax_cache_invalidate(struct panfrost_minmax_cache *cache, struct pipe_transfer *transfer);
+void panfrost_minmax_cache_invalidate(struct panfrost_minmax_cache *cache,
+                                      struct pipe_transfer *transfer);
 
 #endif
diff --git a/src/panfrost/shared/pan_tiling.c b/src/panfrost/shared/pan_tiling.c
index 40007c5c67b..d49c69d48ce 100644
--- a/src/panfrost/shared/pan_tiling.c
+++ b/src/panfrost/shared/pan_tiling.c
@@ -27,8 +27,8 @@
 
 #include "pan_tiling.h"
 #include <stdbool.h>
-#include "util/macros.h"
 #include "util/bitscan.h"
+#include "util/macros.h"
 
 /*
  * This file implements software encode/decode of u-interleaved textures.
@@ -105,8 +105,8 @@ const unsigned space_4[16] = {
 
 /* The scheme uses 16x16 tiles */
 
-#define TILE_WIDTH 16
-#define TILE_HEIGHT 16
+#define TILE_WIDTH      16
+#define TILE_HEIGHT     16
 #define PIXELS_PER_TILE (TILE_WIDTH * TILE_HEIGHT)
 
 /* We need a 128-bit type for idiomatically tiling bpp128 formats. The type must
@@ -118,24 +118,24 @@ const unsigned space_4[16] = {
 typedef __uint128_t pan_uint128_t;
 #else
 typedef struct {
-  uint64_t lo;
-  uint64_t hi;
+   uint64_t lo;
+   uint64_t hi;
 } __attribute__((packed)) pan_uint128_t;
 #endif
 
 typedef struct {
-  uint16_t lo;
-  uint8_t hi;
+   uint16_t lo;
+   uint8_t hi;
 } __attribute__((packed)) pan_uint24_t;
 
 typedef struct {
-  uint32_t lo;
-  uint16_t hi;
+   uint32_t lo;
+   uint16_t hi;
 } __attribute__((packed)) pan_uint48_t;
 
 typedef struct {
-  uint64_t lo;
-  uint32_t hi;
+   uint64_t lo;
+   uint32_t hi;
 } __attribute__((packed)) pan_uint96_t;
 
 /* Optimized routine to tile an aligned (w & 0xF == 0) texture. Explanation:
@@ -169,33 +169,29 @@ typedef struct {
  * be unrolled), calculating the index within the tile and writing.
  */
 
-#define TILED_ACCESS_TYPE(pixel_t, shift) \
-static ALWAYS_INLINE void \
-panfrost_access_tiled_image_##pixel_t \
-                              (void *dst, void *src, \
-                               uint16_t sx, uint16_t sy, \
-                               uint16_t w, uint16_t h, \
-                               uint32_t dst_stride, \
-                               uint32_t src_stride, \
-                               bool is_store) \
-{ \
-   uint8_t *dest_start = dst + ((sx >> 4) * PIXELS_PER_TILE * sizeof(pixel_t)); \
-   for (int y = sy, src_y = 0; src_y < h; ++y, ++src_y) { \
-      uint8_t *dest = (uint8_t *) (dest_start + ((y >> 4) * dst_stride)); \
-      pixel_t *source = src + (src_y * src_stride); \
-      pixel_t *source_end = source + w; \
-      unsigned expanded_y = bit_duplication[y & 0xF] << shift; \
-      for (; source < source_end; dest += (PIXELS_PER_TILE << shift)) { \
-         for (uint8_t i = 0; i < 16; ++i) { \
-            unsigned index = expanded_y ^ (space_4[i] << shift); \
-            if (is_store) \
-                *((pixel_t *) (dest + index)) = *(source++); \
-            else \
-                *(source++) = *((pixel_t *) (dest + index)); \
-         } \
-      } \
-   } \
-} \
+#define TILED_ACCESS_TYPE(pixel_t, shift)                                      \
+   static ALWAYS_INLINE void panfrost_access_tiled_image_##pixel_t(            \
+      void *dst, void *src, uint16_t sx, uint16_t sy, uint16_t w, uint16_t h,  \
+      uint32_t dst_stride, uint32_t src_stride, bool is_store)                 \
+   {                                                                           \
+      uint8_t *dest_start =                                                    \
+         dst + ((sx >> 4) * PIXELS_PER_TILE * sizeof(pixel_t));                \
+      for (int y = sy, src_y = 0; src_y < h; ++y, ++src_y) {                   \
+         uint8_t *dest = (uint8_t *)(dest_start + ((y >> 4) * dst_stride));    \
+         pixel_t *source = src + (src_y * src_stride);                         \
+         pixel_t *source_end = source + w;                                     \
+         unsigned expanded_y = bit_duplication[y & 0xF] << shift;              \
+         for (; source < source_end; dest += (PIXELS_PER_TILE << shift)) {     \
+            for (uint8_t i = 0; i < 16; ++i) {                                 \
+               unsigned index = expanded_y ^ (space_4[i] << shift);            \
+               if (is_store)                                                   \
+                  *((pixel_t *)(dest + index)) = *(source++);                  \
+               else                                                            \
+                  *(source++) = *((pixel_t *)(dest + index));                  \
+            }                                                                  \
+         }                                                                     \
+      }                                                                        \
+   }
 
 TILED_ACCESS_TYPE(uint8_t, 0);
 TILED_ACCESS_TYPE(uint16_t, 1);
@@ -203,44 +199,47 @@ TILED_ACCESS_TYPE(uint32_t, 2);
 TILED_ACCESS_TYPE(uint64_t, 3);
 TILED_ACCESS_TYPE(pan_uint128_t, 4);
 
-#define TILED_UNALIGNED_TYPE(pixel_t, is_store, tile_shift) { \
-   const unsigned mask = (1 << tile_shift) - 1; \
-   for (int y = sy, src_y = 0; src_y < h; ++y, ++src_y) { \
-      unsigned block_start_s = (y >> tile_shift) * dst_stride; \
-      unsigned source_start = src_y * src_stride; \
-      unsigned expanded_y = bit_duplication[y & mask]; \
- \
-      for (int x = sx, src_x = 0; src_x < w; ++x, ++src_x) { \
-         unsigned block_x_s = (x >> tile_shift) * (1 << (tile_shift * 2)); \
-         unsigned index = expanded_y ^ space_4[x & mask]; \
-         uint8_t *source = src + source_start + sizeof(pixel_t) * src_x; \
-         uint8_t *dest = dst + block_start_s + sizeof(pixel_t) * (block_x_s + index); \
- \
-         pixel_t *outp = (pixel_t *) (is_store ? dest : source); \
-         pixel_t *inp = (pixel_t *) (is_store ? source : dest); \
-         *outp = *inp; \
-      } \
-   } \
-}
+#define TILED_UNALIGNED_TYPE(pixel_t, is_store, tile_shift)                    \
+   {                                                                           \
+      const unsigned mask = (1 << tile_shift) - 1;                             \
+      for (int y = sy, src_y = 0; src_y < h; ++y, ++src_y) {                   \
+         unsigned block_start_s = (y >> tile_shift) * dst_stride;              \
+         unsigned source_start = src_y * src_stride;                           \
+         unsigned expanded_y = bit_duplication[y & mask];                      \
+                                                                               \
+         for (int x = sx, src_x = 0; src_x < w; ++x, ++src_x) {                \
+            unsigned block_x_s = (x >> tile_shift) * (1 << (tile_shift * 2));  \
+            unsigned index = expanded_y ^ space_4[x & mask];                   \
+            uint8_t *source = src + source_start + sizeof(pixel_t) * src_x;    \
+            uint8_t *dest =                                                    \
+               dst + block_start_s + sizeof(pixel_t) * (block_x_s + index);    \
+                                                                               \
+            pixel_t *outp = (pixel_t *)(is_store ? dest : source);             \
+            pixel_t *inp = (pixel_t *)(is_store ? source : dest);              \
+            *outp = *inp;                                                      \
+         }                                                                     \
+      }                                                                        \
+   }
 
-#define TILED_UNALIGNED_TYPES(store, shift) { \
-   if (bpp == 8) \
-      TILED_UNALIGNED_TYPE(uint8_t, store, shift) \
-   else if (bpp == 16) \
-      TILED_UNALIGNED_TYPE(uint16_t, store, shift) \
-   else if (bpp == 24) \
-      TILED_UNALIGNED_TYPE(pan_uint24_t, store, shift) \
-   else if (bpp == 32) \
-      TILED_UNALIGNED_TYPE(uint32_t, store, shift) \
-   else if (bpp == 48) \
-      TILED_UNALIGNED_TYPE(pan_uint48_t, store, shift) \
-   else if (bpp == 64) \
-      TILED_UNALIGNED_TYPE(uint64_t, store, shift) \
-   else if (bpp == 96) \
-      TILED_UNALIGNED_TYPE(pan_uint96_t, store, shift) \
-   else if (bpp == 128) \
-      TILED_UNALIGNED_TYPE(pan_uint128_t, store, shift) \
-}
+#define TILED_UNALIGNED_TYPES(store, shift)                                    \
+   {                                                                           \
+      if (bpp == 8)                                                            \
+         TILED_UNALIGNED_TYPE(uint8_t, store, shift)                           \
+      else if (bpp == 16)                                                      \
+         TILED_UNALIGNED_TYPE(uint16_t, store, shift)                          \
+      else if (bpp == 24)                                                      \
+         TILED_UNALIGNED_TYPE(pan_uint24_t, store, shift)                      \
+      else if (bpp == 32)                                                      \
+         TILED_UNALIGNED_TYPE(uint32_t, store, shift)                          \
+      else if (bpp == 48)                                                      \
+         TILED_UNALIGNED_TYPE(pan_uint48_t, store, shift)                      \
+      else if (bpp == 64)                                                      \
+         TILED_UNALIGNED_TYPE(uint64_t, store, shift)                          \
+      else if (bpp == 96)                                                      \
+         TILED_UNALIGNED_TYPE(pan_uint96_t, store, shift)                      \
+      else if (bpp == 128)                                                     \
+         TILED_UNALIGNED_TYPE(pan_uint128_t, store, shift)                     \
+   }
 
 /*
  * Perform a generic access to a tiled image with a given format. This works
@@ -249,13 +248,11 @@ TILED_ACCESS_TYPE(pan_uint128_t, 4);
  * so we divide here. Alignment is assumed.
  */
 static void
-panfrost_access_tiled_image_generic(void *dst, void *src,
-                               unsigned sx, unsigned sy,
-                               unsigned w, unsigned h,
-                               uint32_t dst_stride,
-                               uint32_t src_stride,
-                               const struct util_format_description *desc,
-                               bool _is_store)
+panfrost_access_tiled_image_generic(void *dst, void *src, unsigned sx,
+                                    unsigned sy, unsigned w, unsigned h,
+                                    uint32_t dst_stride, uint32_t src_stride,
+                                    const struct util_format_description *desc,
+                                    bool _is_store)
 {
    unsigned bpp = desc->block.bits;
 
@@ -278,16 +275,15 @@ panfrost_access_tiled_image_generic(void *dst, void *src,
    }
 }
 
-#define OFFSET(src, _x, _y) (void *) ((uint8_t *) src + ((_y) - orig_y) * src_stride + (((_x) - orig_x) * (bpp / 8)))
+#define OFFSET(src, _x, _y)                                                    \
+   (void *)((uint8_t *)src + ((_y)-orig_y) * src_stride +                      \
+            (((_x)-orig_x) * (bpp / 8)))
 
 static ALWAYS_INLINE void
-panfrost_access_tiled_image(void *dst, void *src,
-                           unsigned x, unsigned y,
-                           unsigned w, unsigned h,
-                           uint32_t dst_stride,
-                           uint32_t src_stride,
-                           enum pipe_format format,
-                           bool is_store)
+panfrost_access_tiled_image(void *dst, void *src, unsigned x, unsigned y,
+                            unsigned w, unsigned h, uint32_t dst_stride,
+                            uint32_t src_stride, enum pipe_format format,
+                            bool is_store)
 {
    const struct util_format_description *desc = util_format_description(format);
    unsigned bpp = desc->block.bits;
@@ -300,10 +296,10 @@ panfrost_access_tiled_image(void *dst, void *src,
    assert((dst_stride % (bpp / 8)) == 0 && "unaligned destination stride");
    assert((src_stride % (bpp / 8)) == 0 && "unaligned source stride");
 
-   if (desc->block.width > 1 || !util_is_power_of_two_nonzero(desc->block.bits)) {
-      panfrost_access_tiled_image_generic(dst, (void *) src,
-            x, y, w, h,
-            dst_stride, src_stride, desc, is_store);
+   if (desc->block.width > 1 ||
+       !util_is_power_of_two_nonzero(desc->block.bits)) {
+      panfrost_access_tiled_image_generic(
+         dst, (void *)src, x, y, w, h, dst_stride, src_stride, desc, is_store);
 
       return;
    }
@@ -320,9 +316,9 @@ panfrost_access_tiled_image(void *dst, void *src,
    if (first_full_tile_y != y) {
       unsigned dist = MIN2(first_full_tile_y - y, h);
 
-      panfrost_access_tiled_image_generic(dst, OFFSET(src, x, y),
-            x, y, w, dist,
-            dst_stride, src_stride, desc, is_store);
+      panfrost_access_tiled_image_generic(dst, OFFSET(src, x, y), x, y, w, dist,
+                                          dst_stride, src_stride, desc,
+                                          is_store);
 
       if (dist == h)
          return;
@@ -335,9 +331,9 @@ panfrost_access_tiled_image(void *dst, void *src,
    if (last_full_tile_y != (y + h)) {
       unsigned dist = (y + h) - last_full_tile_y;
 
-      panfrost_access_tiled_image_generic(dst, OFFSET(src, x, last_full_tile_y),
-            x, last_full_tile_y, w, dist,
-            dst_stride, src_stride, desc, is_store);
+      panfrost_access_tiled_image_generic(
+         dst, OFFSET(src, x, last_full_tile_y), x, last_full_tile_y, w, dist,
+         dst_stride, src_stride, desc, is_store);
 
       h -= dist;
    }
@@ -346,9 +342,9 @@ panfrost_access_tiled_image(void *dst, void *src,
    if (first_full_tile_x != x) {
       unsigned dist = MIN2(first_full_tile_x - x, w);
 
-      panfrost_access_tiled_image_generic(dst, OFFSET(src, x, y),
-            x, y, dist, h,
-            dst_stride, src_stride, desc, is_store);
+      panfrost_access_tiled_image_generic(dst, OFFSET(src, x, y), x, y, dist, h,
+                                          dst_stride, src_stride, desc,
+                                          is_store);
 
       if (dist == w)
          return;
@@ -361,23 +357,28 @@ panfrost_access_tiled_image(void *dst, void *src,
    if (last_full_tile_x != (x + w)) {
       unsigned dist = (x + w) - last_full_tile_x;
 
-      panfrost_access_tiled_image_generic(dst, OFFSET(src, last_full_tile_x, y),
-            last_full_tile_x, y, dist, h,
-            dst_stride, src_stride, desc, is_store);
+      panfrost_access_tiled_image_generic(
+         dst, OFFSET(src, last_full_tile_x, y), last_full_tile_x, y, dist, h,
+         dst_stride, src_stride, desc, is_store);
 
       w -= dist;
    }
 
    if (bpp == 8)
-      panfrost_access_tiled_image_uint8_t(dst,  OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store);
+      panfrost_access_tiled_image_uint8_t(dst, OFFSET(src, x, y), x, y, w, h,
+                                          dst_stride, src_stride, is_store);
    else if (bpp == 16)
-      panfrost_access_tiled_image_uint16_t(dst, OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store);
+      panfrost_access_tiled_image_uint16_t(dst, OFFSET(src, x, y), x, y, w, h,
+                                           dst_stride, src_stride, is_store);
    else if (bpp == 32)
-      panfrost_access_tiled_image_uint32_t(dst, OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store);
+      panfrost_access_tiled_image_uint32_t(dst, OFFSET(src, x, y), x, y, w, h,
+                                           dst_stride, src_stride, is_store);
    else if (bpp == 64)
-      panfrost_access_tiled_image_uint64_t(dst, OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store);
+      panfrost_access_tiled_image_uint64_t(dst, OFFSET(src, x, y), x, y, w, h,
+                                           dst_stride, src_stride, is_store);
    else if (bpp == 128)
-      panfrost_access_tiled_image_pan_uint128_t(dst, OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store);
+      panfrost_access_tiled_image_pan_uint128_t(
+         dst, OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store);
 }
 
 /**
@@ -386,27 +387,19 @@ panfrost_access_tiled_image(void *dst, void *src,
  * are aligned to the block size.
  */
 void
-panfrost_store_tiled_image(void *dst, const void *src,
-                           unsigned x, unsigned y,
-                           unsigned w, unsigned h,
-                           uint32_t dst_stride,
-                           uint32_t src_stride,
-                           enum pipe_format format)
+panfrost_store_tiled_image(void *dst, const void *src, unsigned x, unsigned y,
+                           unsigned w, unsigned h, uint32_t dst_stride,
+                           uint32_t src_stride, enum pipe_format format)
 {
-    panfrost_access_tiled_image(dst, (void *) src,
-        x, y, w, h,
-        dst_stride, src_stride, format, true);
+   panfrost_access_tiled_image(dst, (void *)src, x, y, w, h, dst_stride,
+                               src_stride, format, true);
 }
 
 void
-panfrost_load_tiled_image(void *dst, const void *src,
-                           unsigned x, unsigned y,
-                           unsigned w, unsigned h,
-                           uint32_t dst_stride,
-                           uint32_t src_stride,
-                           enum pipe_format format)
+panfrost_load_tiled_image(void *dst, const void *src, unsigned x, unsigned y,
+                          unsigned w, unsigned h, uint32_t dst_stride,
+                          uint32_t src_stride, enum pipe_format format)
 {
-   panfrost_access_tiled_image((void *) src, dst,
-       x, y, w, h,
-       src_stride, dst_stride, format, false);
+   panfrost_access_tiled_image((void *)src, dst, x, y, w, h, src_stride,
+                               dst_stride, format, false);
 }
diff --git a/src/panfrost/shared/pan_tiling.h b/src/panfrost/shared/pan_tiling.h
index d63c581edef..c8734057bc5 100644
--- a/src/panfrost/shared/pan_tiling.h
+++ b/src/panfrost/shared/pan_tiling.h
@@ -47,11 +47,9 @@ extern "C" {
  * @src_stride Number of bytes between adjacent rows of tiles in source.
  * @format Format of the source and destination image
  */
-void panfrost_load_tiled_image(void *dst, const void *src,
-                               unsigned x, unsigned y,
-                               unsigned w, unsigned h,
-                               uint32_t dst_stride,
-                               uint32_t src_stride,
+void panfrost_load_tiled_image(void *dst, const void *src, unsigned x,
+                               unsigned y, unsigned w, unsigned h,
+                               uint32_t dst_stride, uint32_t src_stride,
                                enum pipe_format format);
 
 /**
@@ -67,14 +65,11 @@ void panfrost_load_tiled_image(void *dst, const void *src,
  * @src_stride Stride in bytes of linear source
  * @format Format of the source and destination image
  */
-void panfrost_store_tiled_image(void *dst, const void *src,
-                                unsigned x, unsigned y,
-                                unsigned w, unsigned h,
-                                uint32_t dst_stride,
-                                uint32_t src_stride,
+void panfrost_store_tiled_image(void *dst, const void *src, unsigned x,
+                                unsigned y, unsigned w, unsigned h,
+                                uint32_t dst_stride, uint32_t src_stride,
                                 enum pipe_format format);
 
-
 #ifdef __cplusplus
 } /* extern C */
 #endif
diff --git a/src/panfrost/shared/test/test-tiling.cpp b/src/panfrost/shared/test/test-tiling.cpp
index d5ad9e31c7f..8ee4fbf2172 100644
--- a/src/panfrost/shared/test/test-tiling.cpp
+++ b/src/panfrost/shared/test/test-tiling.cpp
@@ -45,13 +45,14 @@ u_order(unsigned x, unsigned y)
    unsigned y2 = (y & 4) ? 1 : 0;
    unsigned y3 = (y & 8) ? 1 : 0;
 
-   return (xy0 << 0) | (y0 << 1) | (xy1 << 2) | (y1 << 3) |
-          (xy2 << 4) | (y2 << 5) | (xy3 << 6) | (y3 << 7);
+   return (xy0 << 0) | (y0 << 1) | (xy1 << 2) | (y1 << 3) | (xy2 << 4) |
+          (y2 << 5) | (xy3 << 6) | (y3 << 7);
 }
 
 /* x/y are in blocks */
 static unsigned
-tiled_offset(unsigned x, unsigned y, unsigned stride, unsigned tilesize, unsigned blocksize)
+tiled_offset(unsigned x, unsigned y, unsigned stride, unsigned tilesize,
+             unsigned blocksize)
 {
    unsigned tile_x = x / tilesize;
    unsigned tile_y = y / tilesize;
@@ -75,15 +76,13 @@ linear_offset(unsigned x, unsigned y, unsigned stride, unsigned blocksize)
 }
 
 static void
-ref_access_tiled(void *dst, const void *src,
-                 unsigned region_x, unsigned region_y,
-                 unsigned w, unsigned h,
-                 uint32_t dst_stride,
-                 uint32_t src_stride,
-                 enum pipe_format format,
+ref_access_tiled(void *dst, const void *src, unsigned region_x,
+                 unsigned region_y, unsigned w, unsigned h, uint32_t dst_stride,
+                 uint32_t src_stride, enum pipe_format format,
                  bool dst_is_tiled)
 {
-   const struct util_format_description *desc = util_format_description(format);;
+   const struct util_format_description *desc = util_format_description(format);
+   ;
 
    unsigned tilesize = (desc->block.width > 1) ? 4 : 16;
    unsigned blocksize = (desc->block.bits / 8);
@@ -94,8 +93,10 @@ ref_access_tiled(void *dst, const void *src,
    unsigned region_x_block = region_x / desc->block.width;
    unsigned region_y_block = region_y / desc->block.height;
 
-   for (unsigned linear_y_block = 0; linear_y_block < h_block; ++linear_y_block) {
-      for (unsigned linear_x_block = 0; linear_x_block < w_block; ++linear_x_block) {
+   for (unsigned linear_y_block = 0; linear_y_block < h_block;
+        ++linear_y_block) {
+      for (unsigned linear_x_block = 0; linear_x_block < w_block;
+           ++linear_x_block) {
 
          unsigned tiled_x_block = region_x_block + linear_x_block;
          unsigned tiled_y_block = region_y_block + linear_y_block;
@@ -103,15 +104,18 @@ ref_access_tiled(void *dst, const void *src,
          unsigned dst_offset, src_offset;
 
          if (dst_is_tiled) {
-            dst_offset = tiled_offset(tiled_x_block, tiled_y_block, dst_stride, tilesize, blocksize);
-            src_offset = linear_offset(linear_x_block, linear_y_block, src_stride, blocksize);
+            dst_offset = tiled_offset(tiled_x_block, tiled_y_block, dst_stride,
+                                      tilesize, blocksize);
+            src_offset = linear_offset(linear_x_block, linear_y_block,
+                                       src_stride, blocksize);
          } else {
-            dst_offset = linear_offset(linear_x_block, linear_y_block, dst_stride, blocksize);
-            src_offset = tiled_offset(tiled_x_block, tiled_y_block, src_stride, tilesize, blocksize);
+            dst_offset = linear_offset(linear_x_block, linear_y_block,
+                                       dst_stride, blocksize);
+            src_offset = tiled_offset(tiled_x_block, tiled_y_block, src_stride,
+                                      tilesize, blocksize);
          }
 
-         memcpy((uint8_t *) dst + dst_offset,
-                (const uint8_t *) src + src_offset,
+         memcpy((uint8_t *)dst + dst_offset, (const uint8_t *)src + src_offset,
                 desc->block.bits / 8);
       }
    }
@@ -123,14 +127,13 @@ ref_access_tiled(void *dst, const void *src,
  * production.
  */
 static void
-test(unsigned width, unsigned height, unsigned rx, unsigned ry,
-     unsigned rw, unsigned rh, unsigned linear_stride,
-     enum pipe_format format, bool store)
+test(unsigned width, unsigned height, unsigned rx, unsigned ry, unsigned rw,
+     unsigned rh, unsigned linear_stride, enum pipe_format format, bool store)
 {
    unsigned bpp = util_format_get_blocksize(format);
    unsigned tile_height = util_format_is_compressed(format) ? 4 : 16;
 
-   unsigned tiled_width  = ALIGN_POT(width, 16);
+   unsigned tiled_width = ALIGN_POT(width, 16);
    unsigned tiled_height = ALIGN_POT(height, 16);
    unsigned tiled_stride = tiled_width * tile_height * bpp;
 
@@ -139,26 +142,27 @@ test(unsigned width, unsigned height, unsigned rx, unsigned ry,
 
    void *tiled = calloc(bpp, tiled_width * tiled_height);
    void *linear = calloc(bpp, rw * linear_stride);
-   void *ref = calloc(bpp, store ? (tiled_width * tiled_height) : (rw * linear_stride));
+   void *ref =
+      calloc(bpp, store ? (tiled_width * tiled_height) : (rw * linear_stride));
 
    if (store) {
       for (unsigned i = 0; i < bpp * rw * linear_stride; ++i) {
-         ((uint8_t *) linear)[i] = (i & 0xFF);
+         ((uint8_t *)linear)[i] = (i & 0xFF);
       }
 
-      panfrost_store_tiled_image(tiled, linear, rx, ry, rw, rh,
-                                 dst_stride, src_stride, format);
+      panfrost_store_tiled_image(tiled, linear, rx, ry, rw, rh, dst_stride,
+                                 src_stride, format);
    } else {
       for (unsigned i = 0; i < bpp * tiled_width * tiled_height; ++i) {
-         ((uint8_t *) tiled)[i] = (i & 0xFF);
+         ((uint8_t *)tiled)[i] = (i & 0xFF);
       }
 
-      panfrost_load_tiled_image(linear, tiled, rx, ry, rw, rh,
-                                dst_stride, src_stride, format);
+      panfrost_load_tiled_image(linear, tiled, rx, ry, rw, rh, dst_stride,
+                                src_stride, format);
    }
 
-   ref_access_tiled(ref, store ? linear : tiled, rx, ry, rw, rh,
-                    dst_stride, src_stride, format, store);
+   ref_access_tiled(ref, store ? linear : tiled, rx, ry, rw, rh, dst_stride,
+                    src_stride, format, store);
 
    if (store)
       EXPECT_EQ(memcmp(ref, tiled, bpp * tiled_width * tiled_height), 0);
@@ -273,7 +277,7 @@ TEST(UInterleavedTiling, ASTC)
 TEST(UInterleavedTiling, PartialASTC)
 {
    /* Block alignment assumed */
-   test_ldst(40, 40, 4, 4, 16,  8, 512, PIPE_FORMAT_ASTC_4x4);
-   test_ldst(50, 40, 5, 4, 10,  8, 512, PIPE_FORMAT_ASTC_5x4);
+   test_ldst(40, 40, 4, 4, 16, 8, 512, PIPE_FORMAT_ASTC_4x4);
+   test_ldst(50, 40, 5, 4, 10, 8, 512, PIPE_FORMAT_ASTC_5x4);
    test_ldst(50, 50, 5, 5, 10, 10, 512, PIPE_FORMAT_ASTC_5x5);
 }
diff --git a/src/panfrost/tools/panfrost_texfeatures.c b/src/panfrost/tools/panfrost_texfeatures.c
index 67872e56697..f64a962aec0 100644
--- a/src/panfrost/tools/panfrost_texfeatures.c
+++ b/src/panfrost/tools/panfrost_texfeatures.c
@@ -12,36 +12,39 @@
  * Malis should be similar.
  */
 struct format {
-        unsigned bit;
-        const char *name;
+   unsigned bit;
+   const char *name;
 };
 
-#define FMT(bit, name) { bit, name ":" }
+#define FMT(bit, name)                                                         \
+   {                                                                           \
+      bit, name ":"                                                            \
+   }
 
 static struct format formats[] = {
-        FMT( 1, "ETC2"),
-        FMT( 3, "ETC2 EAC"),
-        FMT(19, "ETC2 PTA"),
-        FMT( 2, "EAC 1"),
-        FMT( 4, "EAC 2"),
-        FMT(17, "EAC snorm 1"),
-        FMT(18, "EAC snorm 2"),
-        { 0, NULL },
-        FMT(20, "ASTC 3D LDR"),
-        FMT(21, "ASTC 3D HDR"),
-        FMT(22, "ASTC 2D LDR"),
-        FMT(23, "ASTC 3D HDR"),
-        { 0, NULL },
-        FMT( 7, "BC1"),
-        FMT( 8, "BC2"),
-        FMT( 9, "BC3"),
-        FMT(10, "BC4 unorm"),
-        FMT(11, "BC4 snorm"),
-        FMT(12, "BC5 unorm"),
-        FMT(13, "BC5 snorm"),
-        FMT(14, "BC6H UF16"),
-        FMT(15, "BC6H SF16"),
-        FMT(16, "BC7"),
+   FMT(1, "ETC2"),
+   FMT(3, "ETC2 EAC"),
+   FMT(19, "ETC2 PTA"),
+   FMT(2, "EAC 1"),
+   FMT(4, "EAC 2"),
+   FMT(17, "EAC snorm 1"),
+   FMT(18, "EAC snorm 2"),
+   {0, NULL},
+   FMT(20, "ASTC 3D LDR"),
+   FMT(21, "ASTC 3D HDR"),
+   FMT(22, "ASTC 2D LDR"),
+   FMT(23, "ASTC 3D HDR"),
+   {0, NULL},
+   FMT(7, "BC1"),
+   FMT(8, "BC2"),
+   FMT(9, "BC3"),
+   FMT(10, "BC4 unorm"),
+   FMT(11, "BC4 snorm"),
+   FMT(12, "BC5 unorm"),
+   FMT(13, "BC5 snorm"),
+   FMT(14, "BC6H UF16"),
+   FMT(15, "BC6H SF16"),
+   FMT(16, "BC7"),
 };
 
 /* ANSI escape code */
@@ -49,44 +52,47 @@ static struct format formats[] = {
 #define RED(x)   "\033[31m" x RESET
 #define GREEN(x) "\033[32m" x RESET
 
-int main(void) {
-        int fd = drmOpenWithType("panfrost", NULL, DRM_NODE_RENDER);
-        if (fd < 0) {
-                fprintf(stderr, "No panfrost device\n");
-                exit(1);
-        }
+int
+main(void)
+{
+   int fd = drmOpenWithType("panfrost", NULL, DRM_NODE_RENDER);
+   if (fd < 0) {
+      fprintf(stderr, "No panfrost device\n");
+      exit(1);
+   }
 
-        void *ctx = ralloc_context(NULL);
-        struct panfrost_device dev = { 0 };
-        panfrost_open_device(ctx, fd, &dev);
+   void *ctx = ralloc_context(NULL);
+   struct panfrost_device dev = {0};
+   panfrost_open_device(ctx, fd, &dev);
 
-        uint32_t supported = dev.compressed_formats;
-        bool all_ok = true;
+   uint32_t supported = dev.compressed_formats;
+   bool all_ok = true;
 
-        printf("System-on-chip compressed texture support:" "\n\n");
+   printf("System-on-chip compressed texture support:"
+          "\n\n");
 
-        for (unsigned i = 0; i < ARRAY_SIZE(formats); ++i) {
-                if (formats[i].name == NULL) {
-                        printf("\n");
-                        continue;
-                }
+   for (unsigned i = 0; i < ARRAY_SIZE(formats); ++i) {
+      if (formats[i].name == NULL) {
+         printf("\n");
+         continue;
+      }
 
-                /* Maximum length for justification */
-                assert(strlen(formats[i].name) <= 12);
+      /* Maximum length for justification */
+      assert(strlen(formats[i].name) <= 12);
 
-                bool ok = (supported & BITFIELD_BIT(formats[i].bit));
-                all_ok &= ok;
+      bool ok = (supported & BITFIELD_BIT(formats[i].bit));
+      all_ok &= ok;
 
-                printf("%-14s %s\n", formats[i].name,
-                       ok ? GREEN("YES") : RED(" NO"));
-        }
+      printf("%-14s %s\n", formats[i].name, ok ? GREEN("YES") : RED(" NO"));
+   }
 
-        if (!all_ok) {
-                printf("\n"
-                       "This system-on-chip lacks support for some formats. This is not a driver bug.\n"
-                       "Unsupported formats will be emulated at a performance and memory cost.\n");
-        }
+   if (!all_ok) {
+      printf(
+         "\n"
+         "This system-on-chip lacks support for some formats. This is not a driver bug.\n"
+         "Unsupported formats will be emulated at a performance and memory cost.\n");
+   }
 
-        panfrost_close_device(&dev);
-        ralloc_free(ctx);
+   panfrost_close_device(&dev);
+   ralloc_free(ctx);
 }
diff --git a/src/panfrost/tools/panfrostdump.c b/src/panfrost/tools/panfrostdump.c
index b61db132912..d204866573a 100644
--- a/src/panfrost/tools/panfrostdump.c
+++ b/src/panfrost/tools/panfrostdump.c
@@ -37,16 +37,16 @@
  * or times out after 5min)
  */
 
-#include <stdbool.h>
-#include <string.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <stdint.h>
 #include <endian.h>
+#include <errno.h>
 #include <getopt.h>
 #include <inttypes.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
 
 #include <drm-uapi/panfrost_drm.h>
 
@@ -81,7 +81,7 @@ struct panfrost_dump_object_header_ho {
 };
 
 #define MAX_BODUMP_FILENAME 32
-#define GPU_PAGE_SIZE 4096
+#define GPU_PAGE_SIZE       4096
 
 static bool
 read_header(FILE *fp, struct panfrost_dump_object_header_ho *pdoh)
@@ -109,7 +109,7 @@ read_header(FILE *fp, struct panfrost_dump_object_header_ho *pdoh)
    pdoh->file_offset = le32toh(doh_le.file_offset);
    pdoh->file_size = le32toh(doh_le.file_size);
 
-   switch(pdoh->type) {
+   switch (pdoh->type) {
    case PANFROSTDUMP_BUF_REG:
       pdoh->reghdr.jc = le64toh(doh_le.reghdr.jc);
       pdoh->reghdr.gpu_id = le32toh(doh_le.reghdr.gpu_id);
@@ -234,7 +234,7 @@ main(int argc, char *argv[])
    /* clang-format on */
 
    while ((c = getopt_long(argc, argv, "arh", longopts, NULL)) != -1) {
-      switch(c) {
+      switch (c) {
       case 'h':
          print_help(argv[0], stderr);
          return EXIT_SUCCESS;
@@ -326,7 +326,8 @@ main(int argc, char *argv[])
             }
 
             if (print_addr) {
-               printf("BO(%u) VA(%"PRIX64") SZ(%"PRIX32") page addresses:\n",
+               printf("BO(%u) VA(%" PRIX64 ") SZ(%" PRIX32
+                      ") page addresses:\n",
                       j, doh.bomap.iova, doh.file_size);
 
                for (k = 0; k < (doh.file_size / GPU_PAGE_SIZE); k++) {
@@ -343,8 +344,7 @@ main(int argc, char *argv[])
             char bodump_filename[MAX_BODUMP_FILENAME];
             FILE *bodump;
 
-            snprintf(bodump_filename, MAX_BODUMP_FILENAME,
-                     "bodump-%u.dump", j);
+            snprintf(bodump_filename, MAX_BODUMP_FILENAME, "bodump-%u.dump", j);
 
             if ((bodump = fopen(bodump_filename, "wb"))) {
                if (fseek(data_fp, doh.file_offset, SEEK_SET)) {
@@ -367,16 +367,14 @@ main(int argc, char *argv[])
                }
                nbytes = fwrite(bos[j], 1, doh.file_size, bodump);
                if (nbytes < doh.file_size) {
-                  fprintf(stderr,
-                          "Failed to write BO contents into file: %u\n",
+                  fprintf(stderr, "Failed to write BO contents into file: %u\n",
                           errno);
                   return EXIT_FAILURE;
                }
 
                fclose(bodump);
 
-               pandecode_inject_mmap(doh.bomap.iova,
-                                     bos[j],doh.file_size,
+               pandecode_inject_mmap(doh.bomap.iova, bos[j], doh.file_size,
                                      NULL);
 
             } else {
diff --git a/src/panfrost/util/lcra.c b/src/panfrost/util/lcra.c
index 45cff37e61f..00585c646a6 100644
--- a/src/panfrost/util/lcra.c
+++ b/src/panfrost/util/lcra.c
@@ -24,14 +24,14 @@
  *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
  */
 
-#include <stdio.h>
+#include "lcra.h"
 #include <assert.h>
+#include <limits.h>
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <limits.h>
 #include "util/macros.h"
 #include "util/u_math.h"
-#include "lcra.h"
 
 /* This module is the reference implementation of "Linearly Constrained
  * Register Allocation". The paper is available in PDF form
@@ -40,161 +40,168 @@
  */
 
 struct lcra_state *
-lcra_alloc_equations(
-                unsigned node_count, unsigned class_count)
+lcra_alloc_equations(unsigned node_count, unsigned class_count)
 {
-        struct lcra_state *l = calloc(1, sizeof(*l));
+   struct lcra_state *l = calloc(1, sizeof(*l));
 
-        l->node_count = node_count;
-        l->class_count = class_count;
+   l->node_count = node_count;
+   l->class_count = class_count;
 
-        l->alignment = calloc(sizeof(l->alignment[0]), node_count);
-        l->linear = calloc(sizeof(l->linear[0]), node_count * node_count);
-        l->modulus = calloc(sizeof(l->modulus[0]), node_count);
-        l->class = calloc(sizeof(l->class[0]), node_count);
-        l->class_start = calloc(sizeof(l->class_start[0]), class_count);
-        l->class_disjoint = calloc(sizeof(l->class_disjoint[0]), class_count * class_count);
-        l->class_size = calloc(sizeof(l->class_size[0]), class_count);
-        l->spill_cost = calloc(sizeof(l->spill_cost[0]), node_count);
-        l->solutions = calloc(sizeof(l->solutions[0]), node_count);
+   l->alignment = calloc(sizeof(l->alignment[0]), node_count);
+   l->linear = calloc(sizeof(l->linear[0]), node_count * node_count);
+   l->modulus = calloc(sizeof(l->modulus[0]), node_count);
+   l->class = calloc(sizeof(l->class[0]), node_count);
+   l->class_start = calloc(sizeof(l->class_start[0]), class_count);
+   l->class_disjoint =
+      calloc(sizeof(l->class_disjoint[0]), class_count * class_count);
+   l->class_size = calloc(sizeof(l->class_size[0]), class_count);
+   l->spill_cost = calloc(sizeof(l->spill_cost[0]), node_count);
+   l->solutions = calloc(sizeof(l->solutions[0]), node_count);
 
-        memset(l->solutions, ~0, sizeof(l->solutions[0]) * node_count);
+   memset(l->solutions, ~0, sizeof(l->solutions[0]) * node_count);
 
-        return l;
+   return l;
 }
 
 void
 lcra_free(struct lcra_state *l)
 {
-        if (!l)
-                return;
+   if (!l)
+      return;
 
-        free(l->alignment);
-        free(l->linear);
-        free(l->modulus);
-        free(l->class);
-        free(l->class_start);
-        free(l->class_disjoint);
-        free(l->class_size);
-        free(l->spill_cost);
-        free(l->solutions);
+   free(l->alignment);
+   free(l->linear);
+   free(l->modulus);
+   free(l->class);
+   free(l->class_start);
+   free(l->class_disjoint);
+   free(l->class_size);
+   free(l->spill_cost);
+   free(l->solutions);
 
-        free(l);
+   free(l);
 }
 
 void
-lcra_set_alignment(struct lcra_state *l, unsigned node, unsigned align_log2, unsigned bound)
+lcra_set_alignment(struct lcra_state *l, unsigned node, unsigned align_log2,
+                   unsigned bound)
 {
-        l->alignment[node] = (align_log2 + 1) | (bound << 16);
+   l->alignment[node] = (align_log2 + 1) | (bound << 16);
 }
 
 void
 lcra_set_disjoint_class(struct lcra_state *l, unsigned c1, unsigned c2)
 {
-        l->class_disjoint[(c1 * l->class_count) + c2] = true;
-        l->class_disjoint[(c2 * l->class_count) + c1] = true;
+   l->class_disjoint[(c1 * l->class_count) + c2] = true;
+   l->class_disjoint[(c2 * l->class_count) + c1] = true;
 }
 
 void
 lcra_restrict_range(struct lcra_state *l, unsigned node, unsigned len)
 {
-        if (node < l->node_count && l->alignment[node]) {
-                unsigned BA = l->alignment[node];
-                unsigned alignment = (BA & 0xffff) - 1;
-                unsigned bound = BA >> 16;
-                l->modulus[node] = DIV_ROUND_UP(bound - len + 1, 1 << alignment);
-        }
+   if (node < l->node_count && l->alignment[node]) {
+      unsigned BA = l->alignment[node];
+      unsigned alignment = (BA & 0xffff) - 1;
+      unsigned bound = BA >> 16;
+      l->modulus[node] = DIV_ROUND_UP(bound - len + 1, 1 << alignment);
+   }
 }
 
 void
-lcra_add_node_interference(struct lcra_state *l, unsigned i, unsigned cmask_i, unsigned j, unsigned cmask_j)
+lcra_add_node_interference(struct lcra_state *l, unsigned i, unsigned cmask_i,
+                           unsigned j, unsigned cmask_j)
 {
-        if (i == j)
-                return;
+   if (i == j)
+      return;
 
-        if (l->class_disjoint[(l->class[i] * l->class_count) + l->class[j]])
-                return;
+   if (l->class_disjoint[(l->class[i] * l -> class_count) + l->class[j]])
+      return;
 
-        uint32_t constraint_fw = 0;
-        uint32_t constraint_bw = 0;
+   uint32_t constraint_fw = 0;
+   uint32_t constraint_bw = 0;
 
-        for (unsigned D = 0; D < 16; ++D) {
-                if (cmask_i & (cmask_j << D)) {
-                        constraint_bw |= (1 << (15 + D));
-                        constraint_fw |= (1 << (15 - D));
-                }
+   for (unsigned D = 0; D < 16; ++D) {
+      if (cmask_i & (cmask_j << D)) {
+         constraint_bw |= (1 << (15 + D));
+         constraint_fw |= (1 << (15 - D));
+      }
 
-                if (cmask_i & (cmask_j >> D)) {
-                        constraint_fw |= (1 << (15 + D));
-                        constraint_bw |= (1 << (15 - D));
-                }
-        }
+      if (cmask_i & (cmask_j >> D)) {
+         constraint_fw |= (1 << (15 + D));
+         constraint_bw |= (1 << (15 - D));
+      }
+   }
 
-        l->linear[j * l->node_count + i] |= constraint_fw;
-        l->linear[i * l->node_count + j] |= constraint_bw;
+   l->linear[j * l->node_count + i] |= constraint_fw;
+   l->linear[i * l->node_count + j] |= constraint_bw;
 }
 
 static bool
 lcra_test_linear(struct lcra_state *l, unsigned *solutions, unsigned i)
 {
-        unsigned *row = &l->linear[i * l->node_count];
-        signed constant = solutions[i];
+   unsigned *row = &l->linear[i * l->node_count];
+   signed constant = solutions[i];
 
-        for (unsigned j = 0; j < l->node_count; ++j) {
-                if (solutions[j] == ~0) continue;
+   for (unsigned j = 0; j < l->node_count; ++j) {
+      if (solutions[j] == ~0)
+         continue;
 
-                signed lhs = solutions[j] - constant;
+      signed lhs = solutions[j] - constant;
 
-                if (lhs < -15 || lhs > 15)
-                        continue;
+      if (lhs < -15 || lhs > 15)
+         continue;
 
-                if (row[j] & (1 << (lhs + 15)))
-                        return false;
-        }
+      if (row[j] & (1 << (lhs + 15)))
+         return false;
+   }
 
-        return true;
+   return true;
 }
 
 bool
 lcra_solve(struct lcra_state *l)
 {
-        for (unsigned step = 0; step < l->node_count; ++step) {
-                if (l->solutions[step] != ~0) continue;
-                if (l->alignment[step] == 0) continue;
+   for (unsigned step = 0; step < l->node_count; ++step) {
+      if (l->solutions[step] != ~0)
+         continue;
+      if (l->alignment[step] == 0)
+         continue;
 
-                unsigned _class = l->class[step];
-                unsigned class_start = l->class_start[_class];
+      unsigned _class = l->class[step];
+      unsigned class_start = l->class_start[_class];
 
-                unsigned BA = l->alignment[step];
-                unsigned shift = (BA & 0xffff) - 1;
-                unsigned bound = BA >> 16;
+      unsigned BA = l->alignment[step];
+      unsigned shift = (BA & 0xffff) - 1;
+      unsigned bound = BA >> 16;
 
-                unsigned P = bound >> shift;
-                unsigned Q = l->modulus[step];
-                unsigned r_max = l->class_size[_class];
-                unsigned k_max = r_max >> shift;
-                unsigned m_max = k_max / P;
-                bool succ = false;
+      unsigned P = bound >> shift;
+      unsigned Q = l->modulus[step];
+      unsigned r_max = l->class_size[_class];
+      unsigned k_max = r_max >> shift;
+      unsigned m_max = k_max / P;
+      bool succ = false;
 
-                for (unsigned m = 0; m < m_max; ++m) {
-                        for (unsigned n = 0; n < Q; ++n) {
-                                l->solutions[step] = ((m * P + n) << shift) + class_start;
-                                succ = lcra_test_linear(l, l->solutions, step);
+      for (unsigned m = 0; m < m_max; ++m) {
+         for (unsigned n = 0; n < Q; ++n) {
+            l->solutions[step] = ((m * P + n) << shift) + class_start;
+            succ = lcra_test_linear(l, l->solutions, step);
 
-                                if (succ) break;
-                        }
+            if (succ)
+               break;
+         }
 
-                        if (succ) break;
-                }
+         if (succ)
+            break;
+      }
 
-                /* Out of registers - prepare to spill */
-                if (!succ) {
-                        l->spill_class = l->class[step];
-                        return false;
-                }
-        }
+      /* Out of registers - prepare to spill */
+      if (!succ) {
+         l->spill_class = l->class[step];
+         return false;
+      }
+   }
 
-        return true;
+   return true;
 }
 
 /* Register spilling is implemented with a cost-benefit system. Costs are set
@@ -203,45 +210,47 @@ lcra_solve(struct lcra_state *l)
 void
 lcra_set_node_spill_cost(struct lcra_state *l, unsigned node, signed cost)
 {
-        if (node < l->node_count)
-                l->spill_cost[node] = cost;
+   if (node < l->node_count)
+      l->spill_cost[node] = cost;
 }
 
 static unsigned
 lcra_count_constraints(struct lcra_state *l, unsigned i)
 {
-        unsigned count = 0;
-        unsigned *constraints = &l->linear[i * l->node_count];
+   unsigned count = 0;
+   unsigned *constraints = &l->linear[i * l->node_count];
 
-        for (unsigned j = 0; j < l->node_count; ++j)
-                count += util_bitcount(constraints[j]);
+   for (unsigned j = 0; j < l->node_count; ++j)
+      count += util_bitcount(constraints[j]);
 
-        return count;
+   return count;
 }
 
 signed
 lcra_get_best_spill_node(struct lcra_state *l)
 {
-        /* If there are no constraints on a node, do not pick it to spill under
-         * any circumstance, or else we would hang rather than fail RA */
-        float best_benefit = 0.0;
-        signed best_node = -1;
+   /* If there are no constraints on a node, do not pick it to spill under
+    * any circumstance, or else we would hang rather than fail RA */
+   float best_benefit = 0.0;
+   signed best_node = -1;
 
-        for (unsigned i = 0; i < l->node_count; ++i) {
-                /* Find spillable nodes */
-                if (l->class[i] != l->spill_class) continue;
-                if (l->spill_cost[i] < 0) continue;
+   for (unsigned i = 0; i < l->node_count; ++i) {
+      /* Find spillable nodes */
+      if (l->class[i] != l->spill_class)
+         continue;
+      if (l->spill_cost[i] < 0)
+         continue;
 
-                /* Adapted from Chaitin's heuristic */
-                float constraints = lcra_count_constraints(l, i);
-                float cost = (l->spill_cost[i] + 1);
-                float benefit = constraints / cost;
+      /* Adapted from Chaitin's heuristic */
+      float constraints = lcra_count_constraints(l, i);
+      float cost = (l->spill_cost[i] + 1);
+      float benefit = constraints / cost;
 
-                if (benefit > best_benefit) {
-                        best_benefit = benefit;
-                        best_node = i;
-                }
-        }
+      if (benefit > best_benefit) {
+         best_benefit = benefit;
+         best_node = i;
+      }
+   }
 
-        return best_node;
+   return best_node;
 }
diff --git a/src/panfrost/util/lcra.h b/src/panfrost/util/lcra.h
index fd47fdc3543..0b1ed13400f 100644
--- a/src/panfrost/util/lcra.h
+++ b/src/panfrost/util/lcra.h
@@ -31,78 +31,71 @@
 #include <stdint.h>
 
 struct lcra_state {
-        unsigned node_count;
+   unsigned node_count;
 
-        /* Alignment for node in log2(bytes)+1. Since alignment must be
-         * non-negative power-of-two, the elements are strictly positive
-         * integers. Zero is the sentinel for a missing node. In upper word,
-         * bound. */
-        unsigned *alignment;
+   /* Alignment for node in log2(bytes)+1. Since alignment must be
+    * non-negative power-of-two, the elements are strictly positive
+    * integers. Zero is the sentinel for a missing node. In upper word,
+    * bound. */
+   unsigned *alignment;
 
-        /* Linear constraints imposed. Nested array sized upfront, organized as
-         * linear[node_left][node_right]. That is, calculate indices as:
-         *
-         * Each element is itself a bit field denoting whether (c_j - c_i) bias
-         * is present or not, including negative biases.
-         *
-         * Note for Midgard, there are 16 components so the bias is in range
-         * [-15, 15] so encoded by 32-bit field. */
+   /* Linear constraints imposed. Nested array sized upfront, organized as
+    * linear[node_left][node_right]. That is, calculate indices as:
+    *
+    * Each element is itself a bit field denoting whether (c_j - c_i) bias
+    * is present or not, including negative biases.
+    *
+    * Note for Midgard, there are 16 components so the bias is in range
+    * [-15, 15] so encoded by 32-bit field. */
 
-        uint32_t *linear;
+   uint32_t *linear;
 
-        /* Per node max modulus constraints */
-        uint8_t *modulus;
+   /* Per node max modulus constraints */
+   uint8_t *modulus;
 
-        /* Classes allow nodes to be partitioned with a starting register.
-         * Classes cannot interfere; that is, they are true partitions in the
-         * usual sense of the word. class_count is the number of classes.
-         * class[] is indexed by a node to get the mapped class. class_start is
-         * biased to all solutions in the class. */
+   /* Classes allow nodes to be partitioned with a starting register.
+    * Classes cannot interfere; that is, they are true partitions in the
+    * usual sense of the word. class_count is the number of classes.
+    * class[] is indexed by a node to get the mapped class. class_start is
+    * biased to all solutions in the class. */
 
-        unsigned class_count;
-        unsigned *class;
-        unsigned *class_start;
-        unsigned *class_size;
-        bool *class_disjoint;
+   unsigned class_count;
+   unsigned *class;
+   unsigned *class_start;
+   unsigned *class_size;
+   bool *class_disjoint;
 
-        /* Before solving, forced registers; after solving, solutions. */
-        unsigned *solutions;
+   /* Before solving, forced registers; after solving, solutions. */
+   unsigned *solutions;
 
-        /* For register spilling, the costs to spill nodes (as set by the user)
-         * are in spill_cost[], negative if a node is unspillable. Internally,
-         * spill_class specifies which class to spill (whichever class failed
-         * to allocate) */
+   /* For register spilling, the costs to spill nodes (as set by the user)
+    * are in spill_cost[], negative if a node is unspillable. Internally,
+    * spill_class specifies which class to spill (whichever class failed
+    * to allocate) */
 
-        signed *spill_cost;
-        unsigned spill_class;
+   signed *spill_cost;
+   unsigned spill_class;
 };
 
-struct lcra_state *
-lcra_alloc_equations(
-                unsigned node_count, unsigned class_count);
+struct lcra_state *lcra_alloc_equations(unsigned node_count,
+                                        unsigned class_count);
 
-void
-lcra_free(struct lcra_state *l);
+void lcra_free(struct lcra_state *l);
 
-void
-lcra_set_disjoint_class(struct lcra_state *l, unsigned c1, unsigned c2);
+void lcra_set_disjoint_class(struct lcra_state *l, unsigned c1, unsigned c2);
 
-void
-lcra_set_alignment(struct lcra_state *l, unsigned node, unsigned align_log2, unsigned bound);
+void lcra_set_alignment(struct lcra_state *l, unsigned node,
+                        unsigned align_log2, unsigned bound);
 
-void
-lcra_restrict_range(struct lcra_state *l, unsigned node, unsigned len);
+void lcra_restrict_range(struct lcra_state *l, unsigned node, unsigned len);
 
-void
-lcra_add_node_interference(struct lcra_state *l, unsigned i, unsigned cmask_i, unsigned j, unsigned cmask_j);
+void lcra_add_node_interference(struct lcra_state *l, unsigned i,
+                                unsigned cmask_i, unsigned j, unsigned cmask_j);
 
-bool
-lcra_solve(struct lcra_state *l);
+bool lcra_solve(struct lcra_state *l);
 
-void
-lcra_set_node_spill_cost(struct lcra_state *l, unsigned node, signed cost);
+void lcra_set_node_spill_cost(struct lcra_state *l, unsigned node, signed cost);
 
-signed
-lcra_get_best_spill_node(struct lcra_state *l);
+signed lcra_get_best_spill_node(struct lcra_state *l);
 
 #endif
diff --git a/src/panfrost/util/nir_mod_helpers.c b/src/panfrost/util/nir_mod_helpers.c
index 2fe7b4fabe5..b8baf720ced 100644
--- a/src/panfrost/util/nir_mod_helpers.c
+++ b/src/panfrost/util/nir_mod_helpers.c
@@ -34,12 +34,13 @@
  * ALU source (principally fneg or fabs). If so, return true and rewrite the
  * source to be the argument, respecting swizzles as needed. If not (or it
  * cannot be proven), return false and leave the source untouched.
-*/
+ */
 
 bool
 pan_has_source_mod(nir_alu_src *src, nir_op op)
 {
-   if (!src->src.is_ssa || src->src.ssa->parent_instr->type != nir_instr_type_alu)
+   if (!src->src.is_ssa ||
+       src->src.ssa->parent_instr->type != nir_instr_type_alu)
       return false;
 
    nir_alu_instr *alu = nir_instr_as_alu(src->src.ssa->parent_instr);
@@ -56,7 +57,7 @@ pan_has_source_mod(nir_alu_src *src, nir_op op)
 
    /* Okay - we've found the modifier we wanted. Let's construct the new ALU
     * src. In a scalar world, this is just psrc, but for vector archs we need
-    * to respect the swizzle, so we compose. 
+    * to respect the swizzle, so we compose.
     */
 
    nir_alu_src nsrc = {
diff --git a/src/panfrost/util/pan_collect_varyings.c b/src/panfrost/util/pan_collect_varyings.c
index 6aa85b7089f..505aac085c7 100644
--- a/src/panfrost/util/pan_collect_varyings.c
+++ b/src/panfrost/util/pan_collect_varyings.c
@@ -29,165 +29,162 @@
 static enum pipe_format
 varying_format(nir_alu_type t, unsigned ncomps)
 {
-        assert(ncomps >= 1 && ncomps <= 4);
+   assert(ncomps >= 1 && ncomps <= 4);
 
-#define VARYING_FORMAT(ntype, nsz, ptype, psz) \
-        { \
-                .type = nir_type_ ## ntype ## nsz, \
-                .formats = { \
-                        PIPE_FORMAT_R ## psz ## _ ## ptype, \
-                        PIPE_FORMAT_R ## psz ## G ## psz ## _ ## ptype, \
-                        PIPE_FORMAT_R ## psz ## G ## psz ## B ## psz ## _ ## ptype, \
-                        PIPE_FORMAT_R ## psz ## G ## psz ## B ## psz  ## A ## psz ## _ ## ptype, \
-                } \
-        }
+#define VARYING_FORMAT(ntype, nsz, ptype, psz)                                 \
+   {                                                                           \
+      .type = nir_type_##ntype##nsz, .formats = {                              \
+         PIPE_FORMAT_R##psz##_##ptype,                                         \
+         PIPE_FORMAT_R##psz##G##psz##_##ptype,                                 \
+         PIPE_FORMAT_R##psz##G##psz##B##psz##_##ptype,                         \
+         PIPE_FORMAT_R##psz##G##psz##B##psz##A##psz##_##ptype,                 \
+      }                                                                        \
+   }
 
-        static const struct {
-                nir_alu_type type;
-                enum pipe_format formats[4];
-        } conv[] = {
-                VARYING_FORMAT(float, 32, FLOAT, 32),
-                VARYING_FORMAT(uint, 32, UINT, 32),
-                VARYING_FORMAT(float, 16, FLOAT, 16),
-        };
+   static const struct {
+      nir_alu_type type;
+      enum pipe_format formats[4];
+   } conv[] = {
+      VARYING_FORMAT(float, 32, FLOAT, 32),
+      VARYING_FORMAT(uint, 32, UINT, 32),
+      VARYING_FORMAT(float, 16, FLOAT, 16),
+   };
 #undef VARYING_FORMAT
 
-        assert(ncomps > 0 && ncomps <= ARRAY_SIZE(conv[0].formats));
+   assert(ncomps > 0 && ncomps <= ARRAY_SIZE(conv[0].formats));
 
-        for (unsigned i = 0; i < ARRAY_SIZE(conv); i++) {
-                if (conv[i].type == t)
-                        return conv[i].formats[ncomps - 1];
-        }
+   for (unsigned i = 0; i < ARRAY_SIZE(conv); i++) {
+      if (conv[i].type == t)
+         return conv[i].formats[ncomps - 1];
+   }
 
-        unreachable("Invalid type");
+   unreachable("Invalid type");
 }
 
 struct slot_info {
-        nir_alu_type type;
-        unsigned count;
-        unsigned index;
+   nir_alu_type type;
+   unsigned count;
+   unsigned index;
 };
 
 static bool
 walk_varyings(UNUSED nir_builder *b, nir_instr *instr, void *data)
 {
-        struct slot_info *slots = data;
+   struct slot_info *slots = data;
 
-        if (instr->type != nir_instr_type_intrinsic)
-                return false;
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
 
-        nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-        unsigned count;
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   unsigned count;
 
-        /* Only consider intrinsics that access varyings */
-        switch (intr->intrinsic) {
-        case nir_intrinsic_store_output:
-                if (b->shader->info.stage != MESA_SHADER_VERTEX)
-                        return false;
+   /* Only consider intrinsics that access varyings */
+   switch (intr->intrinsic) {
+   case nir_intrinsic_store_output:
+      if (b->shader->info.stage != MESA_SHADER_VERTEX)
+         return false;
 
-                count = nir_src_num_components(intr->src[0]);
-                break;
+      count = nir_src_num_components(intr->src[0]);
+      break;
 
-        case nir_intrinsic_load_input:
-        case nir_intrinsic_load_interpolated_input:
-                if (b->shader->info.stage != MESA_SHADER_FRAGMENT)
-                        return false;
+   case nir_intrinsic_load_input:
+   case nir_intrinsic_load_interpolated_input:
+      if (b->shader->info.stage != MESA_SHADER_FRAGMENT)
+         return false;
 
-                count = nir_dest_num_components(intr->dest);
-                break;
+      count = nir_dest_num_components(intr->dest);
+      break;
 
-        default:
-                return false;
-        }
+   default:
+      return false;
+   }
 
-        nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
+   nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
 
-        if (sem.no_varying)
-                return false;
+   if (sem.no_varying)
+      return false;
 
-        /* In a fragment shader, flat shading is lowered to load_input but
-         * interpolation is lowered to load_interpolated_input, so we can check
-         * the intrinsic to distinguish.
-         *
-         * In a vertex shader, we consider everything flat, as the information
-         * will not contribute to the final linked varyings -- flatness is used
-         * only to determine the type, and the GL linker uses the type from the
-         * fragment shader instead.
-         */
-        bool flat = (intr->intrinsic != nir_intrinsic_load_interpolated_input);
-        nir_alu_type type = flat ? nir_type_uint : nir_type_float;
+   /* In a fragment shader, flat shading is lowered to load_input but
+    * interpolation is lowered to load_interpolated_input, so we can check
+    * the intrinsic to distinguish.
+    *
+    * In a vertex shader, we consider everything flat, as the information
+    * will not contribute to the final linked varyings -- flatness is used
+    * only to determine the type, and the GL linker uses the type from the
+    * fragment shader instead.
+    */
+   bool flat = (intr->intrinsic != nir_intrinsic_load_interpolated_input);
+   nir_alu_type type = flat ? nir_type_uint : nir_type_float;
 
-        /* Demote interpolated float varyings to fp16 where possible. We do not
-         * demote flat varyings, including integer varyings, due to various
-         * issues with the Midgard hardware behaviour and TGSI shaders, as well
-         * as having no demonstrable benefit in practice.
-         */
-        if (type == nir_type_float && sem.medium_precision)
-                type |= 16;
-        else
-                type |= 32;
+   /* Demote interpolated float varyings to fp16 where possible. We do not
+    * demote flat varyings, including integer varyings, due to various
+    * issues with the Midgard hardware behaviour and TGSI shaders, as well
+    * as having no demonstrable benefit in practice.
+    */
+   if (type == nir_type_float && sem.medium_precision)
+      type |= 16;
+   else
+      type |= 32;
 
-        /* Count currently contains the number of components accessed by this
-         * intrinsics. However, we may be accessing a fractional location,
-         * indicating by the NIR component. Add that in. The final value be the
-         * maximum (component + count), an upper bound on the number of
-         * components possibly used.
-         */
-        count += nir_intrinsic_component(intr);
+   /* Count currently contains the number of components accessed by this
+    * intrinsics. However, we may be accessing a fractional location,
+    * indicating by the NIR component. Add that in. The final value be the
+    * maximum (component + count), an upper bound on the number of
+    * components possibly used.
+    */
+   count += nir_intrinsic_component(intr);
 
-        /* Consider each slot separately */
-        for (unsigned offset = 0; offset < sem.num_slots; ++offset) {
-                unsigned location = sem.location + offset;
-                unsigned index = nir_intrinsic_base(intr) + offset;
+   /* Consider each slot separately */
+   for (unsigned offset = 0; offset < sem.num_slots; ++offset) {
+      unsigned location = sem.location + offset;
+      unsigned index = nir_intrinsic_base(intr) + offset;
 
-                if (slots[location].type) {
-                        assert(slots[location].type == type);
-                        assert(slots[location].index == index);
-                } else {
-                        slots[location].type = type;
-                        slots[location].index = index;
-                }
+      if (slots[location].type) {
+         assert(slots[location].type == type);
+         assert(slots[location].index == index);
+      } else {
+         slots[location].type = type;
+         slots[location].index = index;
+      }
 
-                slots[location].count = MAX2(slots[location].count, count);
-        }
+      slots[location].count = MAX2(slots[location].count, count);
+   }
 
-        return false;
+   return false;
 }
 
 void
 pan_nir_collect_varyings(nir_shader *s, struct pan_shader_info *info)
 {
-        if (s->info.stage != MESA_SHADER_VERTEX &&
-            s->info.stage != MESA_SHADER_FRAGMENT)
-                return;
+   if (s->info.stage != MESA_SHADER_VERTEX &&
+       s->info.stage != MESA_SHADER_FRAGMENT)
+      return;
 
-        struct slot_info slots[64] = { 0 };
-        nir_shader_instructions_pass(s, walk_varyings, nir_metadata_all, slots);
+   struct slot_info slots[64] = {0};
+   nir_shader_instructions_pass(s, walk_varyings, nir_metadata_all, slots);
 
-        struct pan_shader_varying *varyings =
-                (s->info.stage == MESA_SHADER_VERTEX) ?
-                info->varyings.output :
-                info->varyings.input;
+   struct pan_shader_varying *varyings = (s->info.stage == MESA_SHADER_VERTEX)
+                                            ? info->varyings.output
+                                            : info->varyings.input;
 
-        unsigned count = 0;
+   unsigned count = 0;
 
-        for (unsigned i = 0; i < ARRAY_SIZE(slots); ++i) {
-                if (!slots[i].type)
-                        continue;
+   for (unsigned i = 0; i < ARRAY_SIZE(slots); ++i) {
+      if (!slots[i].type)
+         continue;
 
-                enum pipe_format format =
-                        varying_format(slots[i].type, slots[i].count);
-                assert(format != PIPE_FORMAT_NONE);
+      enum pipe_format format = varying_format(slots[i].type, slots[i].count);
+      assert(format != PIPE_FORMAT_NONE);
 
-                unsigned index = slots[i].index;
-                count = MAX2(count, index + 1);
+      unsigned index = slots[i].index;
+      count = MAX2(count, index + 1);
 
-                varyings[index].location = i;
-                varyings[index].format = format;
-        }
+      varyings[index].location = i;
+      varyings[index].format = format;
+   }
 
-        if (s->info.stage == MESA_SHADER_VERTEX)
-                info->varyings.output_count = count;
-        else
-                info->varyings.input_count = count;
+   if (s->info.stage == MESA_SHADER_VERTEX)
+      info->varyings.output_count = count;
+   else
+      info->varyings.input_count = count;
 }
diff --git a/src/panfrost/util/pan_ir.c b/src/panfrost/util/pan_ir.c
index c469274933f..8524e08bc84 100644
--- a/src/panfrost/util/pan_ir.c
+++ b/src/panfrost/util/pan_ir.c
@@ -32,73 +32,66 @@
 uint16_t
 pan_to_bytemask(unsigned bytes, unsigned mask)
 {
-        switch (bytes) {
-        case 0:
-                assert(mask == 0);
-                return 0;
+   switch (bytes) {
+   case 0:
+      assert(mask == 0);
+      return 0;
 
-        case 8:
-                return mask;
+   case 8:
+      return mask;
 
-        case 16: {
-                unsigned space =
-                        (mask & 0x1) |
-                        ((mask & 0x2) << (2 - 1)) |
-                        ((mask & 0x4) << (4 - 2)) |
-                        ((mask & 0x8) << (6 - 3)) |
-                        ((mask & 0x10) << (8 - 4)) |
-                        ((mask & 0x20) << (10 - 5)) |
-                        ((mask & 0x40) << (12 - 6)) |
-                        ((mask & 0x80) << (14 - 7));
+   case 16: {
+      unsigned space =
+         (mask & 0x1) | ((mask & 0x2) << (2 - 1)) | ((mask & 0x4) << (4 - 2)) |
+         ((mask & 0x8) << (6 - 3)) | ((mask & 0x10) << (8 - 4)) |
+         ((mask & 0x20) << (10 - 5)) | ((mask & 0x40) << (12 - 6)) |
+         ((mask & 0x80) << (14 - 7));
 
-                return space | (space << 1);
-        }
+      return space | (space << 1);
+   }
 
-        case 32: {
-                unsigned space =
-                        (mask & 0x1) |
-                        ((mask & 0x2) << (4 - 1)) |
-                        ((mask & 0x4) << (8 - 2)) |
-                        ((mask & 0x8) << (12 - 3));
+   case 32: {
+      unsigned space = (mask & 0x1) | ((mask & 0x2) << (4 - 1)) |
+                       ((mask & 0x4) << (8 - 2)) | ((mask & 0x8) << (12 - 3));
 
-                return space | (space << 1) | (space << 2) | (space << 3);
-        }
+      return space | (space << 1) | (space << 2) | (space << 3);
+   }
 
-        case 64: {
-                unsigned A = (mask & 0x1) ? 0xFF : 0x00;
-                unsigned B = (mask & 0x2) ? 0xFF : 0x00;
-                return A | (B << 8);
-        }
+   case 64: {
+      unsigned A = (mask & 0x1) ? 0xFF : 0x00;
+      unsigned B = (mask & 0x2) ? 0xFF : 0x00;
+      return A | (B << 8);
+   }
 
-        default:
-                unreachable("Invalid register mode");
-        }
+   default:
+      unreachable("Invalid register mode");
+   }
 }
 
 void
 pan_block_add_successor(pan_block *block, pan_block *successor)
 {
-        assert(block);
-        assert(successor);
+   assert(block);
+   assert(successor);
 
-        /* Cull impossible edges */
-        if (block->unconditional_jumps)
-                return;
+   /* Cull impossible edges */
+   if (block->unconditional_jumps)
+      return;
 
-        for (unsigned i = 0; i < ARRAY_SIZE(block->successors); ++i) {
-                if (block->successors[i]) {
-                       if (block->successors[i] == successor)
-                               return;
-                       else
-                               continue;
-                }
+   for (unsigned i = 0; i < ARRAY_SIZE(block->successors); ++i) {
+      if (block->successors[i]) {
+         if (block->successors[i] == successor)
+            return;
+         else
+            continue;
+      }
 
-                block->successors[i] = successor;
-                _mesa_set_add(successor->predecessors, block);
-                return;
-        }
+      block->successors[i] = successor;
+      _mesa_set_add(successor->predecessors, block);
+      return;
+   }
 
-        unreachable("Too many successors");
+   unreachable("Too many successors");
 }
 
 /* Prints a NIR ALU type in Bifrost-style ".f32" ".i8" etc */
@@ -106,45 +99,42 @@ pan_block_add_successor(pan_block *block, pan_block *successor)
 void
 pan_print_alu_type(nir_alu_type t, FILE *fp)
 {
-        unsigned size = nir_alu_type_get_type_size(t);
-        nir_alu_type base = nir_alu_type_get_base_type(t);
+   unsigned size = nir_alu_type_get_type_size(t);
+   nir_alu_type base = nir_alu_type_get_base_type(t);
 
-        switch (base) {
-        case nir_type_int:
-                fprintf(fp, ".i");
-                break;
-        case nir_type_uint:
-                fprintf(fp, ".u");
-                break;
-        case nir_type_bool:
-                fprintf(fp, ".b");
-                break;
-        case nir_type_float:
-                fprintf(fp, ".f");
-                break;
-        default:
-                fprintf(fp, ".unknown");
-                break;
-        }
+   switch (base) {
+   case nir_type_int:
+      fprintf(fp, ".i");
+      break;
+   case nir_type_uint:
+      fprintf(fp, ".u");
+      break;
+   case nir_type_bool:
+      fprintf(fp, ".b");
+      break;
+   case nir_type_float:
+      fprintf(fp, ".f");
+      break;
+   default:
+      fprintf(fp, ".unknown");
+      break;
+   }
 
-        fprintf(fp, "%u", size);
+   fprintf(fp, "%u", size);
 }
 
 /* Could optimize with a better data structure if anyone cares, TODO: profile */
 
 unsigned
-pan_lookup_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo, unsigned offs)
+pan_lookup_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo,
+                      unsigned offs)
 {
-        struct panfrost_ubo_word word = {
-                .ubo = ubo,
-                .offset = offs
-        };
+   struct panfrost_ubo_word word = {.ubo = ubo, .offset = offs};
 
-        for (unsigned i = 0; i < push->count; ++i) {
-                if (memcmp(push->words + i, &word, sizeof(word)) == 0)
-                        return i;
-        }
-
-        unreachable("UBO not pushed");
+   for (unsigned i = 0; i < push->count; ++i) {
+      if (memcmp(push->words + i, &word, sizeof(word)) == 0)
+         return i;
+   }
 
+   unreachable("UBO not pushed");
 }
diff --git a/src/panfrost/util/pan_ir.h b/src/panfrost/util/pan_ir.h
index 8b41668ea66..3ddec343856 100644
--- a/src/panfrost/util/pan_ir.h
+++ b/src/panfrost/util/pan_ir.h
@@ -26,8 +26,8 @@
 
 #include <stdint.h>
 #include "compiler/nir/nir.h"
-#include "util/u_dynarray.h"
 #include "util/hash_table.h"
+#include "util/u_dynarray.h"
 
 /* On Valhall, the driver gives the hardware a table of resource tables.
  * Resources are addressed as the index of the table together with the index of
@@ -38,14 +38,14 @@
  * Gallium driver and the Valhall compiler.
  */
 enum pan_resource_table {
-        PAN_TABLE_UBO = 0,
-        PAN_TABLE_ATTRIBUTE,
-        PAN_TABLE_ATTRIBUTE_BUFFER,
-        PAN_TABLE_SAMPLER,
-        PAN_TABLE_TEXTURE,
-        PAN_TABLE_IMAGE,
+   PAN_TABLE_UBO = 0,
+   PAN_TABLE_ATTRIBUTE,
+   PAN_TABLE_ATTRIBUTE_BUFFER,
+   PAN_TABLE_SAMPLER,
+   PAN_TABLE_TEXTURE,
+   PAN_TABLE_IMAGE,
 
-        PAN_NUM_RESOURCE_TABLES
+   PAN_NUM_RESOURCE_TABLES
 };
 
 /* Indices for named (non-XFB) varyings that are present. These are packed
@@ -61,15 +61,15 @@ enum pan_resource_table {
  */
 
 enum pan_special_varying {
-        PAN_VARY_GENERAL = 0,
-        PAN_VARY_POSITION = 1,
-        PAN_VARY_PSIZ = 2,
-        PAN_VARY_PNTCOORD = 3,
-        PAN_VARY_FACE = 4,
-        PAN_VARY_FRAGCOORD = 5,
+   PAN_VARY_GENERAL = 0,
+   PAN_VARY_POSITION = 1,
+   PAN_VARY_PSIZ = 2,
+   PAN_VARY_PNTCOORD = 3,
+   PAN_VARY_FACE = 4,
+   PAN_VARY_FRAGCOORD = 5,
 
-        /* Keep last */
-        PAN_VARY_MAX,
+   /* Keep last */
+   PAN_VARY_MAX,
 };
 
 /* Maximum number of attribute descriptors required for varyings. These include
@@ -84,53 +84,49 @@ enum pan_special_varying {
 /* Allow 2D of sysval IDs, while allowing nonparametric sysvals to equal
  * their class for equal comparison */
 
-#define PAN_SYSVAL(type, no) (((no) << 16) | PAN_SYSVAL_##type)
-#define PAN_SYSVAL_TYPE(sysval) ((sysval) & 0xffff)
-#define PAN_SYSVAL_ID(sysval) ((sysval) >> 16)
+#define PAN_SYSVAL(type, no)    (((no) << 16) | PAN_SYSVAL_##type)
+#define PAN_SYSVAL_TYPE(sysval) ((sysval)&0xffff)
+#define PAN_SYSVAL_ID(sysval)   ((sysval) >> 16)
 
 /* Define some common types. We start at one for easy indexing of hash
  * tables internal to the compiler */
 
 enum {
-        PAN_SYSVAL_VIEWPORT_SCALE = 1,
-        PAN_SYSVAL_VIEWPORT_OFFSET = 2,
-        PAN_SYSVAL_TEXTURE_SIZE = 3,
-        PAN_SYSVAL_SSBO = 4,
-        PAN_SYSVAL_NUM_WORK_GROUPS = 5,
-        PAN_SYSVAL_SAMPLER = 7,
-        PAN_SYSVAL_LOCAL_GROUP_SIZE = 8,
-        PAN_SYSVAL_WORK_DIM = 9,
-        PAN_SYSVAL_IMAGE_SIZE = 10,
-        PAN_SYSVAL_SAMPLE_POSITIONS = 11,
-        PAN_SYSVAL_MULTISAMPLED = 12,
-        PAN_SYSVAL_RT_CONVERSION = 13,
-        PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS = 14,
-        PAN_SYSVAL_DRAWID = 15,
-        PAN_SYSVAL_BLEND_CONSTANTS = 16,
-        PAN_SYSVAL_XFB = 17,
-        PAN_SYSVAL_NUM_VERTICES = 18,
+   PAN_SYSVAL_VIEWPORT_SCALE = 1,
+   PAN_SYSVAL_VIEWPORT_OFFSET = 2,
+   PAN_SYSVAL_TEXTURE_SIZE = 3,
+   PAN_SYSVAL_SSBO = 4,
+   PAN_SYSVAL_NUM_WORK_GROUPS = 5,
+   PAN_SYSVAL_SAMPLER = 7,
+   PAN_SYSVAL_LOCAL_GROUP_SIZE = 8,
+   PAN_SYSVAL_WORK_DIM = 9,
+   PAN_SYSVAL_IMAGE_SIZE = 10,
+   PAN_SYSVAL_SAMPLE_POSITIONS = 11,
+   PAN_SYSVAL_MULTISAMPLED = 12,
+   PAN_SYSVAL_RT_CONVERSION = 13,
+   PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS = 14,
+   PAN_SYSVAL_DRAWID = 15,
+   PAN_SYSVAL_BLEND_CONSTANTS = 16,
+   PAN_SYSVAL_XFB = 17,
+   PAN_SYSVAL_NUM_VERTICES = 18,
 };
 
-#define PAN_TXS_SYSVAL_ID(texidx, dim, is_array)          \
-	((texidx) | ((dim) << 7) | ((is_array) ? (1 << 9) : 0))
+#define PAN_TXS_SYSVAL_ID(texidx, dim, is_array)                               \
+   ((texidx) | ((dim) << 7) | ((is_array) ? (1 << 9) : 0))
 
-#define PAN_SYSVAL_ID_TO_TXS_TEX_IDX(id)        ((id) & 0x7f)
-#define PAN_SYSVAL_ID_TO_TXS_DIM(id)            (((id) >> 7) & 0x3)
-#define PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(id)       !!((id) & (1 << 9))
+#define PAN_SYSVAL_ID_TO_TXS_TEX_IDX(id)  ((id)&0x7f)
+#define PAN_SYSVAL_ID_TO_TXS_DIM(id)      (((id) >> 7) & 0x3)
+#define PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(id) !!((id) & (1 << 9))
 
 /* Special attribute slots for vertex builtins. Sort of arbitrary but let's be
  * consistent with the blob so we can compare traces easier. */
 
-enum {
-        PAN_VERTEX_ID   = 16,
-        PAN_INSTANCE_ID = 17,
-        PAN_MAX_ATTRIBUTE
-};
+enum { PAN_VERTEX_ID = 16, PAN_INSTANCE_ID = 17, PAN_MAX_ATTRIBUTE };
 
 struct panfrost_sysvals {
-        /* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */
-        unsigned sysvals[MAX_SYSVAL_COUNT];
-        unsigned sysval_count;
+   /* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */
+   unsigned sysvals[MAX_SYSVAL_COUNT];
+   unsigned sysval_count;
 };
 
 /* Architecturally, Bifrost/Valhall can address 128 FAU slots of 64-bits each.
@@ -149,82 +145,78 @@ struct panfrost_sysvals {
  * an offset to a word must be < 2^16. There are less than 2^8 UBOs */
 
 struct panfrost_ubo_word {
-        uint16_t ubo;
-        uint16_t offset;
+   uint16_t ubo;
+   uint16_t offset;
 };
 
 struct panfrost_ubo_push {
-        unsigned count;
-        struct panfrost_ubo_word words[PAN_MAX_PUSH];
+   unsigned count;
+   struct panfrost_ubo_word words[PAN_MAX_PUSH];
 };
 
 /* Helper for searching the above. Note this is O(N) to the number of pushed
  * constants, do not run in the draw call hot path */
 
-unsigned
-pan_lookup_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo, unsigned offs);
+unsigned pan_lookup_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo,
+                               unsigned offs);
 
 struct hash_table_u64 *
 panfrost_init_sysvals(struct panfrost_sysvals *sysvals,
-                      struct panfrost_sysvals *fixed_sysvals,
-                      void *memctx);
+                      struct panfrost_sysvals *fixed_sysvals, void *memctx);
 
-unsigned
-pan_lookup_sysval(struct hash_table_u64 *sysval_to_id,
-                  struct panfrost_sysvals *sysvals,
-                  int sysval);
+unsigned pan_lookup_sysval(struct hash_table_u64 *sysval_to_id,
+                           struct panfrost_sysvals *sysvals, int sysval);
 
-int
-panfrost_sysval_for_instr(nir_instr *instr, nir_dest *dest);
+int panfrost_sysval_for_instr(nir_instr *instr, nir_dest *dest);
 
 struct panfrost_compile_inputs {
-        struct util_debug_callback *debug;
+   struct util_debug_callback *debug;
 
-        unsigned gpu_id;
-        bool is_blend, is_blit;
-        struct {
-                unsigned rt;
-                unsigned nr_samples;
-                uint64_t bifrost_blend_desc;
-        } blend;
-        int fixed_sysval_ubo;
-        struct panfrost_sysvals *fixed_sysval_layout;
-        bool no_idvs;
-        bool no_ubo_to_push;
+   unsigned gpu_id;
+   bool is_blend, is_blit;
+   struct {
+      unsigned rt;
+      unsigned nr_samples;
+      uint64_t bifrost_blend_desc;
+   } blend;
+   int fixed_sysval_ubo;
+   struct panfrost_sysvals *fixed_sysval_layout;
+   bool no_idvs;
+   bool no_ubo_to_push;
 
-        enum pipe_format rt_formats[8];
-        uint8_t raw_fmt_mask;
-        unsigned nr_cbufs;
+   enum pipe_format rt_formats[8];
+   uint8_t raw_fmt_mask;
+   unsigned nr_cbufs;
 
-        /* Used on Valhall.
-         *
-         * Bit mask of special desktop-only varyings (e.g VARYING_SLOT_TEX0)
-         * written by the previous stage (fragment shader) or written by this
-         * stage (vertex shader). Bits are slots from gl_varying_slot.
-         *
-         * For modern APIs (GLES or VK), this should be 0.
-         */
-        uint32_t fixed_varying_mask;
+   /* Used on Valhall.
+    *
+    * Bit mask of special desktop-only varyings (e.g VARYING_SLOT_TEX0)
+    * written by the previous stage (fragment shader) or written by this
+    * stage (vertex shader). Bits are slots from gl_varying_slot.
+    *
+    * For modern APIs (GLES or VK), this should be 0.
+    */
+   uint32_t fixed_varying_mask;
 
-        union {
-                struct {
-                        bool static_rt_conv;
-                        uint32_t rt_conv[8];
-                } bifrost;
-        };
+   union {
+      struct {
+         bool static_rt_conv;
+         uint32_t rt_conv[8];
+      } bifrost;
+   };
 };
 
 struct pan_shader_varying {
-        gl_varying_slot location;
-        enum pipe_format format;
+   gl_varying_slot location;
+   enum pipe_format format;
 };
 
 struct bifrost_shader_blend_info {
-        nir_alu_type type;
-        uint32_t return_offset;
+   nir_alu_type type;
+   uint32_t return_offset;
 
-        /* mali_bifrost_register_file_format corresponding to nir_alu_type */
-        unsigned format;
+   /* mali_bifrost_register_file_format corresponding to nir_alu_type */
+   unsigned format;
 };
 
 /*
@@ -234,227 +226,226 @@ struct bifrost_shader_blend_info {
  * GenXML.
  */
 struct bifrost_message_preload {
-        /* Whether to preload this message */
-        bool enabled;
+   /* Whether to preload this message */
+   bool enabled;
 
-        /* Varying to load from */
-        unsigned varying_index;
+   /* Varying to load from */
+   unsigned varying_index;
 
-        /* Register type, FP32 otherwise */
-        bool fp16;
+   /* Register type, FP32 otherwise */
+   bool fp16;
 
-        /* Number of components, ignored if texturing */
-        unsigned num_components;
+   /* Number of components, ignored if texturing */
+   unsigned num_components;
 
-        /* If texture is set, performs a texture instruction according to
-         * texture_index, skip, and zero_lod. If texture is unset, only the
-         * varying load is performed.
-         */
-        bool texture, skip, zero_lod;
-        unsigned texture_index;
+   /* If texture is set, performs a texture instruction according to
+    * texture_index, skip, and zero_lod. If texture is unset, only the
+    * varying load is performed.
+    */
+   bool texture, skip, zero_lod;
+   unsigned texture_index;
 };
 
 struct bifrost_shader_info {
-        struct bifrost_shader_blend_info blend[8];
-        nir_alu_type blend_src1_type;
-        bool wait_6, wait_7;
-        struct bifrost_message_preload messages[2];
+   struct bifrost_shader_blend_info blend[8];
+   nir_alu_type blend_src1_type;
+   bool wait_6, wait_7;
+   struct bifrost_message_preload messages[2];
 
-        /* Whether any flat varyings are loaded. This may disable optimizations
-         * that change the provoking vertex, since that would load incorrect
-         * values for flat varyings.
-         */
-        bool uses_flat_shading;
+   /* Whether any flat varyings are loaded. This may disable optimizations
+    * that change the provoking vertex, since that would load incorrect
+    * values for flat varyings.
+    */
+   bool uses_flat_shading;
 };
 
 struct midgard_shader_info {
-        unsigned first_tag;
+   unsigned first_tag;
 };
 
 struct pan_shader_info {
-        gl_shader_stage stage;
-        unsigned work_reg_count;
-        unsigned tls_size;
-        unsigned wls_size;
+   gl_shader_stage stage;
+   unsigned work_reg_count;
+   unsigned tls_size;
+   unsigned wls_size;
 
-        /* Bit mask of preloaded registers */
-        uint64_t preload;
+   /* Bit mask of preloaded registers */
+   uint64_t preload;
 
-        union {
-                struct {
-                        bool reads_frag_coord;
-                        bool reads_point_coord;
-                        bool reads_face;
-                        bool can_discard;
-                        bool writes_depth;
-                        bool writes_stencil;
-                        bool writes_coverage;
-                        bool sidefx;
-                        bool sample_shading;
-                        bool early_fragment_tests;
-                        bool can_early_z, can_fpk;
-                        bool untyped_color_outputs;
-                        BITSET_WORD outputs_read;
-                        BITSET_WORD outputs_written;
-                } fs;
+   union {
+      struct {
+         bool reads_frag_coord;
+         bool reads_point_coord;
+         bool reads_face;
+         bool can_discard;
+         bool writes_depth;
+         bool writes_stencil;
+         bool writes_coverage;
+         bool sidefx;
+         bool sample_shading;
+         bool early_fragment_tests;
+         bool can_early_z, can_fpk;
+         bool untyped_color_outputs;
+         BITSET_WORD outputs_read;
+         BITSET_WORD outputs_written;
+      } fs;
 
-                struct {
-                        bool writes_point_size;
+      struct {
+         bool writes_point_size;
 
-                        /* If the primary shader writes point size, the Valhall
-                         * driver may need a variant that does not write point
-                         * size. Offset to such a shader in the program binary.
-                         *
-                         * Zero if no such variant is required.
-                         *
-                         * Only used with IDVS on Valhall.
-                         */
-                        unsigned no_psiz_offset;
+         /* If the primary shader writes point size, the Valhall
+          * driver may need a variant that does not write point
+          * size. Offset to such a shader in the program binary.
+          *
+          * Zero if no such variant is required.
+          *
+          * Only used with IDVS on Valhall.
+          */
+         unsigned no_psiz_offset;
 
-                        /* Set if Index-Driven Vertex Shading is in use */
-                        bool idvs;
+         /* Set if Index-Driven Vertex Shading is in use */
+         bool idvs;
 
-                        /* If IDVS is used, whether a varying shader is used */
-                        bool secondary_enable;
+         /* If IDVS is used, whether a varying shader is used */
+         bool secondary_enable;
 
-                        /* If a varying shader is used, the varying shader's
-                         * offset in the program binary
-                         */
-                        unsigned secondary_offset;
+         /* If a varying shader is used, the varying shader's
+          * offset in the program binary
+          */
+         unsigned secondary_offset;
 
-                        /* If IDVS is in use, number of work registers used by
-                         * the varying shader
-                         */
-                        unsigned secondary_work_reg_count;
+         /* If IDVS is in use, number of work registers used by
+          * the varying shader
+          */
+         unsigned secondary_work_reg_count;
 
-                        /* If IDVS is in use, bit mask of preloaded registers
-                         * used by the varying shader
-                         */
-                        uint64_t secondary_preload;
-                } vs;
+         /* If IDVS is in use, bit mask of preloaded registers
+          * used by the varying shader
+          */
+         uint64_t secondary_preload;
+      } vs;
 
-                struct {
-                        /* Is it legal to merge workgroups? This is true if the
-                         * shader uses neither barriers nor shared memory. This
-                         * requires caution: if the API allows specifying shared
-                         * memory at launch time (instead of compile time), that
-                         * memory will not be accounted for by the compiler.
-                         *
-                         * Used by the Valhall hardware.
-                         */
-                        bool allow_merging_workgroups;
-                } cs;
-        };
+      struct {
+         /* Is it legal to merge workgroups? This is true if the
+          * shader uses neither barriers nor shared memory. This
+          * requires caution: if the API allows specifying shared
+          * memory at launch time (instead of compile time), that
+          * memory will not be accounted for by the compiler.
+          *
+          * Used by the Valhall hardware.
+          */
+         bool allow_merging_workgroups;
+      } cs;
+   };
 
-        /* Does the shader contains a barrier? or (for fragment shaders) does it
-         * require helper invocations, which demand the same ordering guarantees
-         * of the hardware? These notions are unified in the hardware, so we
-         * unify them here as well.
-         */
-        bool contains_barrier;
-        bool separable;
-        bool writes_global;
-        uint64_t outputs_written;
+   /* Does the shader contains a barrier? or (for fragment shaders) does it
+    * require helper invocations, which demand the same ordering guarantees
+    * of the hardware? These notions are unified in the hardware, so we
+    * unify them here as well.
+    */
+   bool contains_barrier;
+   bool separable;
+   bool writes_global;
+   uint64_t outputs_written;
 
-        /* Floating point controls that the driver should try to honour */
-        bool ftz_fp16, ftz_fp32;
+   /* Floating point controls that the driver should try to honour */
+   bool ftz_fp16, ftz_fp32;
 
-        unsigned sampler_count;
-        unsigned texture_count;
-        unsigned ubo_count;
-        unsigned attributes_read_count;
-        unsigned attribute_count;
-        unsigned attributes_read;
+   unsigned sampler_count;
+   unsigned texture_count;
+   unsigned ubo_count;
+   unsigned attributes_read_count;
+   unsigned attribute_count;
+   unsigned attributes_read;
 
-        struct {
-                unsigned input_count;
-                struct pan_shader_varying input[PAN_MAX_VARYINGS];
-                unsigned output_count;
-                struct pan_shader_varying output[PAN_MAX_VARYINGS];
-        } varyings;
+   struct {
+      unsigned input_count;
+      struct pan_shader_varying input[PAN_MAX_VARYINGS];
+      unsigned output_count;
+      struct pan_shader_varying output[PAN_MAX_VARYINGS];
+   } varyings;
 
-        struct panfrost_sysvals sysvals;
+   struct panfrost_sysvals sysvals;
 
-        /* UBOs to push to Register Mapped Uniforms (Midgard) or Fast Access
-         * Uniforms (Bifrost) */
-        struct panfrost_ubo_push push;
+   /* UBOs to push to Register Mapped Uniforms (Midgard) or Fast Access
+    * Uniforms (Bifrost) */
+   struct panfrost_ubo_push push;
 
-        uint32_t ubo_mask;
+   uint32_t ubo_mask;
 
-        union {
-                struct bifrost_shader_info bifrost;
-                struct midgard_shader_info midgard;
-        };
+   union {
+      struct bifrost_shader_info bifrost;
+      struct midgard_shader_info midgard;
+   };
 };
 
 typedef struct pan_block {
-        /* Link to next block. Must be first for mir_get_block */
-        struct list_head link;
+   /* Link to next block. Must be first for mir_get_block */
+   struct list_head link;
 
-        /* List of instructions emitted for the current block */
-        struct list_head instructions;
+   /* List of instructions emitted for the current block */
+   struct list_head instructions;
 
-        /* Index of the block in source order */
-        unsigned name;
+   /* Index of the block in source order */
+   unsigned name;
 
-        /* Control flow graph */
-        struct pan_block *successors[2];
-        struct set *predecessors;
-        bool unconditional_jumps;
+   /* Control flow graph */
+   struct pan_block *successors[2];
+   struct set *predecessors;
+   bool unconditional_jumps;
 
-        /* In liveness analysis, these are live masks (per-component) for
-         * indices for the block. Scalar compilers have the luxury of using
-         * simple bit fields, but for us, liveness is a vector idea. */
-        uint16_t *live_in;
-        uint16_t *live_out;
+   /* In liveness analysis, these are live masks (per-component) for
+    * indices for the block. Scalar compilers have the luxury of using
+    * simple bit fields, but for us, liveness is a vector idea. */
+   uint16_t *live_in;
+   uint16_t *live_out;
 } pan_block;
 
 struct pan_instruction {
-        struct list_head link;
+   struct list_head link;
 };
 
-#define pan_foreach_instr_in_block_rev(block, v) \
-        list_for_each_entry_rev(struct pan_instruction, v, &block->instructions, link)
+#define pan_foreach_instr_in_block_rev(block, v)                               \
+   list_for_each_entry_rev(struct pan_instruction, v, &block->instructions,    \
+                           link)
 
-#define pan_foreach_successor(blk, v) \
-        pan_block *v; \
-        pan_block **_v; \
-        for (_v = (pan_block **) &blk->successors[0], \
-                v = *_v; \
-                v != NULL && _v < (pan_block **) &blk->successors[2]; \
-                _v++, v = *_v) \
+#define pan_foreach_successor(blk, v)                                          \
+   pan_block *v;                                                               \
+   pan_block **_v;                                                             \
+   for (_v = (pan_block **)&blk->successors[0], v = *_v;                       \
+        v != NULL && _v < (pan_block **)&blk->successors[2]; _v++, v = *_v)
 
-#define pan_foreach_predecessor(blk, v) \
-        struct set_entry *_entry_##v; \
-        struct pan_block *v; \
-        for (_entry_##v = _mesa_set_next_entry(blk->predecessors, NULL), \
-                v = (struct pan_block *) (_entry_##v ? _entry_##v->key : NULL);  \
-                _entry_##v != NULL; \
-                _entry_##v = _mesa_set_next_entry(blk->predecessors, _entry_##v), \
-                v = (struct pan_block *) (_entry_##v ? _entry_##v->key : NULL))
+#define pan_foreach_predecessor(blk, v)                                        \
+   struct set_entry *_entry_##v;                                               \
+   struct pan_block *v;                                                        \
+   for (_entry_##v = _mesa_set_next_entry(blk->predecessors, NULL),            \
+       v = (struct pan_block *)(_entry_##v ? _entry_##v->key : NULL);          \
+        _entry_##v != NULL;                                                    \
+        _entry_##v = _mesa_set_next_entry(blk->predecessors, _entry_##v),      \
+       v = (struct pan_block *)(_entry_##v ? _entry_##v->key : NULL))
 
 static inline pan_block *
 pan_exit_block(struct list_head *blocks)
 {
-        pan_block *last = list_last_entry(blocks, pan_block, link);
-        assert(!last->successors[0] && !last->successors[1]);
-        return last;
+   pan_block *last = list_last_entry(blocks, pan_block, link);
+   assert(!last->successors[0] && !last->successors[1]);
+   return last;
 }
 
 typedef void (*pan_liveness_update)(uint16_t *, void *, unsigned max);
 
-void pan_liveness_gen(uint16_t *live, unsigned node, unsigned max, uint16_t mask);
-void pan_liveness_kill(uint16_t *live, unsigned node, unsigned max, uint16_t mask);
+void pan_liveness_gen(uint16_t *live, unsigned node, unsigned max,
+                      uint16_t mask);
+void pan_liveness_kill(uint16_t *live, unsigned node, unsigned max,
+                       uint16_t mask);
 bool pan_liveness_get(uint16_t *live, unsigned node, uint16_t max);
 
-void pan_compute_liveness(struct list_head *blocks,
-                unsigned temp_count,
-                pan_liveness_update callback);
+void pan_compute_liveness(struct list_head *blocks, unsigned temp_count,
+                          pan_liveness_update callback);
 
 void pan_free_liveness(struct list_head *blocks);
 
-uint16_t
-pan_to_bytemask(unsigned bytes, unsigned mask);
+uint16_t pan_to_bytemask(unsigned bytes, unsigned mask);
 
 void pan_block_add_successor(pan_block *block, pan_block *successor);
 
@@ -464,30 +455,30 @@ void pan_block_add_successor(pan_block *block, pan_block *successor);
 static inline unsigned
 pan_ssa_index(nir_ssa_def *ssa)
 {
-        /* Off-by-one ensures BIR_NO_ARG is skipped */
-        return ((ssa->index + 1) << 1) | 0;
+   /* Off-by-one ensures BIR_NO_ARG is skipped */
+   return ((ssa->index + 1) << 1) | 0;
 }
 
 static inline unsigned
 pan_src_index(nir_src *src)
 {
-        if (src->is_ssa)
-                return pan_ssa_index(src->ssa);
-        else {
-                assert(!src->reg.indirect);
-                return (src->reg.reg->index << 1) | PAN_IS_REG;
-        }
+   if (src->is_ssa)
+      return pan_ssa_index(src->ssa);
+   else {
+      assert(!src->reg.indirect);
+      return (src->reg.reg->index << 1) | PAN_IS_REG;
+   }
 }
 
 static inline unsigned
 pan_dest_index(nir_dest *dst)
 {
-        if (dst->is_ssa)
-                return pan_ssa_index(&dst->ssa);
-        else {
-                assert(!dst->reg.indirect);
-                return (dst->reg.reg->index << 1) | PAN_IS_REG;
-        }
+   if (dst->is_ssa)
+      return pan_ssa_index(&dst->ssa);
+   else {
+      assert(!dst->reg.indirect);
+      return (dst->reg.reg->index << 1) | PAN_IS_REG;
+   }
 }
 
 /* IR printing helpers */
@@ -523,14 +514,14 @@ void pan_nir_collect_varyings(nir_shader *s, struct pan_shader_info *info);
 static inline unsigned
 pan_subgroup_size(unsigned arch)
 {
-        if (arch >= 9)
-                return 16;
-        else if (arch >= 7)
-                return 8;
-        else if (arch >= 6)
-                return 4;
-        else
-                return 1;
+   if (arch >= 9)
+      return 16;
+   else if (arch >= 7)
+      return 8;
+   else if (arch >= 6)
+      return 4;
+   else
+      return 1;
 }
 
 #endif
diff --git a/src/panfrost/util/pan_liveness.c b/src/panfrost/util/pan_liveness.c
index 0ec9652b59d..e299bc29d75 100644
--- a/src/panfrost/util/pan_liveness.c
+++ b/src/panfrost/util/pan_liveness.c
@@ -21,10 +21,10 @@
  * SOFTWARE.
  */
 
-#include "pan_ir.h"
-#include "util/u_memory.h"
 #include "util/list.h"
 #include "util/set.h"
+#include "util/u_memory.h"
+#include "pan_ir.h"
 
 /* Routines for liveness analysis. Liveness is tracked per byte per node. Per
  * byte granularity is necessary for proper handling of int8 */
@@ -32,28 +32,28 @@
 void
 pan_liveness_gen(uint16_t *live, unsigned node, unsigned max, uint16_t mask)
 {
-        if (node >= max)
-                return;
+   if (node >= max)
+      return;
 
-        live[node] |= mask;
+   live[node] |= mask;
 }
 
 void
 pan_liveness_kill(uint16_t *live, unsigned node, unsigned max, uint16_t mask)
 {
-        if (node >= max)
-                return;
+   if (node >= max)
+      return;
 
-        live[node] &= ~mask;
+   live[node] &= ~mask;
 }
 
 bool
 pan_liveness_get(uint16_t *live, unsigned node, uint16_t max)
 {
-        if (node >= max)
-                return false;
+   if (node >= max)
+      return false;
 
-        return live[node];
+   return live[node];
 }
 
 /* live_out[s] = sum { p in succ[s] } ( live_in[p] ) */
@@ -61,10 +61,10 @@ pan_liveness_get(uint16_t *live, unsigned node, uint16_t max)
 static void
 liveness_block_live_out(pan_block *blk, unsigned temp_count)
 {
-        pan_foreach_successor(blk, succ) {
-                for (unsigned i = 0; i < temp_count; ++i)
-                        blk->live_out[i] |= succ->live_in[i];
-        }
+   pan_foreach_successor(blk, succ) {
+      for (unsigned i = 0; i < temp_count; ++i)
+         blk->live_out[i] |= succ->live_in[i];
+   }
 }
 
 /* Liveness analysis is a backwards-may dataflow analysis pass. Within a block,
@@ -72,32 +72,30 @@ liveness_block_live_out(pan_block *blk, unsigned temp_count)
  * returns whether progress was made. */
 
 static bool
-liveness_block_update(
-                pan_block *blk, unsigned temp_count,
-                pan_liveness_update callback)
+liveness_block_update(pan_block *blk, unsigned temp_count,
+                      pan_liveness_update callback)
 {
-        bool progress = false;
+   bool progress = false;
 
-        liveness_block_live_out(blk, temp_count);
+   liveness_block_live_out(blk, temp_count);
 
-        uint16_t *live = ralloc_array(blk, uint16_t, temp_count);
-        memcpy(live, blk->live_out, temp_count * sizeof(uint16_t));
+   uint16_t *live = ralloc_array(blk, uint16_t, temp_count);
+   memcpy(live, blk->live_out, temp_count * sizeof(uint16_t));
 
-        pan_foreach_instr_in_block_rev(blk, ins)
-                callback(live, (void *) ins, temp_count);
+   pan_foreach_instr_in_block_rev(blk, ins)
+      callback(live, (void *)ins, temp_count);
 
-        /* To figure out progress, diff live_in */
+   /* To figure out progress, diff live_in */
 
-        for (unsigned i = 0; (i < temp_count) && !progress; ++i)
-                progress |= (blk->live_in[i] != live[i]);
+   for (unsigned i = 0; (i < temp_count) && !progress; ++i)
+      progress |= (blk->live_in[i] != live[i]);
 
-        ralloc_free(blk->live_in);
-        blk->live_in = live;
+   ralloc_free(blk->live_in);
+   blk->live_in = live;
 
-        return progress;
+   return progress;
 }
 
-
 /* Globally, liveness analysis uses a fixed-point algorithm based on a
  * worklist. We initialize a work list with the exit block. We iterate the work
  * list to compute live_in from live_out for each block on the work list,
@@ -105,70 +103,66 @@ liveness_block_update(
  */
 
 void
-pan_compute_liveness(
-                struct list_head *blocks,
-                unsigned temp_count,
-                pan_liveness_update callback)
+pan_compute_liveness(struct list_head *blocks, unsigned temp_count,
+                     pan_liveness_update callback)
 {
 
-        /* Set of pan_block */
-        struct set *work_list = _mesa_set_create(NULL,
-                        _mesa_hash_pointer,
-                        _mesa_key_pointer_equal);
+   /* Set of pan_block */
+   struct set *work_list =
+      _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
 
-        struct set *visited = _mesa_set_create(NULL,
-                        _mesa_hash_pointer,
-                        _mesa_key_pointer_equal);
+   struct set *visited =
+      _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
 
-        /* Free any previous liveness, and allocate */
+   /* Free any previous liveness, and allocate */
 
-        pan_free_liveness(blocks);
+   pan_free_liveness(blocks);
 
-        list_for_each_entry(pan_block, block, blocks, link) {
-                block->live_in = rzalloc_array(block, uint16_t, temp_count);
-                block->live_out = rzalloc_array(block, uint16_t, temp_count);
-        }
+   list_for_each_entry(pan_block, block, blocks, link) {
+      block->live_in = rzalloc_array(block, uint16_t, temp_count);
+      block->live_out = rzalloc_array(block, uint16_t, temp_count);
+   }
 
-        /* Initialize the work list with the exit block */
-        struct set_entry *cur;
+   /* Initialize the work list with the exit block */
+   struct set_entry *cur;
 
-        cur = _mesa_set_add(work_list, pan_exit_block(blocks));
+   cur = _mesa_set_add(work_list, pan_exit_block(blocks));
 
-        /* Iterate the work list */
+   /* Iterate the work list */
 
-        do {
-                /* Pop off a block */
-                pan_block *blk = (struct pan_block *) cur->key;
-                _mesa_set_remove(work_list, cur);
+   do {
+      /* Pop off a block */
+      pan_block *blk = (struct pan_block *)cur->key;
+      _mesa_set_remove(work_list, cur);
 
-                /* Update its liveness information */
-                bool progress = liveness_block_update(blk, temp_count, callback);
+      /* Update its liveness information */
+      bool progress = liveness_block_update(blk, temp_count, callback);
 
-                /* If we made progress, we need to process the predecessors */
+      /* If we made progress, we need to process the predecessors */
 
-                if (progress || !_mesa_set_search(visited, blk)) {
-                        pan_foreach_predecessor(blk, pred)
-                                _mesa_set_add(work_list, pred);
-                }
+      if (progress || !_mesa_set_search(visited, blk)) {
+         pan_foreach_predecessor(blk, pred)
+            _mesa_set_add(work_list, pred);
+      }
 
-                _mesa_set_add(visited, blk);
-        } while((cur = _mesa_set_next_entry(work_list, NULL)) != NULL);
+      _mesa_set_add(visited, blk);
+   } while ((cur = _mesa_set_next_entry(work_list, NULL)) != NULL);
 
-        _mesa_set_destroy(visited, NULL);
-        _mesa_set_destroy(work_list, NULL);
+   _mesa_set_destroy(visited, NULL);
+   _mesa_set_destroy(work_list, NULL);
 }
 
 void
 pan_free_liveness(struct list_head *blocks)
 {
-        list_for_each_entry(pan_block, block, blocks, link) {
-                if (block->live_in)
-                        ralloc_free(block->live_in);
+   list_for_each_entry(pan_block, block, blocks, link) {
+      if (block->live_in)
+         ralloc_free(block->live_in);
 
-                if (block->live_out)
-                        ralloc_free(block->live_out);
+      if (block->live_out)
+         ralloc_free(block->live_out);
 
-                block->live_in = NULL;
-                block->live_out = NULL;
-        }
+      block->live_in = NULL;
+      block->live_out = NULL;
+   }
 }
diff --git a/src/panfrost/util/pan_lower_64bit_intrin.c b/src/panfrost/util/pan_lower_64bit_intrin.c
index 7c4edcfa9d7..3730e7660c8 100644
--- a/src/panfrost/util/pan_lower_64bit_intrin.c
+++ b/src/panfrost/util/pan_lower_64bit_intrin.c
@@ -21,8 +21,8 @@
  * SOFTWARE.
  */
 
-#include "pan_ir.h"
 #include "compiler/nir/nir_builder.h"
+#include "pan_ir.h"
 
 /* OpenCL uses 64-bit types for some intrinsic functions, including
  * global_invocation_id(). This could be worked around during conversion to
@@ -36,43 +36,41 @@
 static bool
 nir_lower_64bit_intrin_instr(nir_builder *b, nir_instr *instr, void *data)
 {
-        if (instr->type != nir_instr_type_intrinsic)
-                return false;
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
 
-        nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 
-        switch (intr->intrinsic) {
-        case nir_intrinsic_load_global_invocation_id:
-        case nir_intrinsic_load_global_invocation_id_zero_base:
-        case nir_intrinsic_load_workgroup_id:
-        case nir_intrinsic_load_num_workgroups:
-                break;
+   switch (intr->intrinsic) {
+   case nir_intrinsic_load_global_invocation_id:
+   case nir_intrinsic_load_global_invocation_id_zero_base:
+   case nir_intrinsic_load_workgroup_id:
+   case nir_intrinsic_load_num_workgroups:
+      break;
 
-        default:
-                return false;
-        }
+   default:
+      return false;
+   }
 
-        if (nir_dest_bit_size(intr->dest) != 64)
-                return false;
+   if (nir_dest_bit_size(intr->dest) != 64)
+      return false;
 
-        b->cursor = nir_after_instr(instr);
+   b->cursor = nir_after_instr(instr);
 
-        assert(intr->dest.is_ssa);
-        intr->dest.ssa.bit_size = 32;
+   assert(intr->dest.is_ssa);
+   intr->dest.ssa.bit_size = 32;
 
-        nir_ssa_def *conv = nir_u2u64(b, &intr->dest.ssa);
+   nir_ssa_def *conv = nir_u2u64(b, &intr->dest.ssa);
 
-        nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, conv,
-                                       conv->parent_instr);
+   nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, conv, conv->parent_instr);
 
-        return true;
+   return true;
 }
 
 bool
 pan_nir_lower_64bit_intrin(nir_shader *shader)
 {
-        return nir_shader_instructions_pass(shader,
-                                            nir_lower_64bit_intrin_instr,
-                                            nir_metadata_block_index | nir_metadata_dominance,
-                                            NULL);
+   return nir_shader_instructions_pass(
+      shader, nir_lower_64bit_intrin_instr,
+      nir_metadata_block_index | nir_metadata_dominance, NULL);
 }
diff --git a/src/panfrost/util/pan_lower_framebuffer.c b/src/panfrost/util/pan_lower_framebuffer.c
index b03090202f3..aacb9ce30d4 100644
--- a/src/panfrost/util/pan_lower_framebuffer.c
+++ b/src/panfrost/util/pan_lower_framebuffer.c
@@ -47,11 +47,11 @@
  * smallest precision necessary to store the pixel losslessly.
  */
 
+#include "pan_lower_framebuffer.h"
 #include "compiler/nir/nir.h"
 #include "compiler/nir/nir_builder.h"
 #include "compiler/nir/nir_format_convert.h"
 #include "util/format/u_format.h"
-#include "pan_lower_framebuffer.h"
 
 /* Determines the unpacked type best suiting a given format, so the rest of the
  * pipeline may be adjusted accordingly */
@@ -59,54 +59,54 @@
 nir_alu_type
 pan_unpacked_type_for_format(const struct util_format_description *desc)
 {
-        int c = util_format_get_first_non_void_channel(desc->format);
+   int c = util_format_get_first_non_void_channel(desc->format);
 
-        if (c == -1)
-                unreachable("Void format not renderable");
+   if (c == -1)
+      unreachable("Void format not renderable");
 
-        bool large = (desc->channel[c].size > 16);
-        bool large_norm = (desc->channel[c].size > 8);
-        bool bit8 = (desc->channel[c].size == 8);
-        assert(desc->channel[c].size <= 32);
+   bool large = (desc->channel[c].size > 16);
+   bool large_norm = (desc->channel[c].size > 8);
+   bool bit8 = (desc->channel[c].size == 8);
+   assert(desc->channel[c].size <= 32);
 
-        if (desc->channel[c].normalized)
-                return large_norm ? nir_type_float32 : nir_type_float16;
+   if (desc->channel[c].normalized)
+      return large_norm ? nir_type_float32 : nir_type_float16;
 
-        switch (desc->channel[c].type) {
-        case UTIL_FORMAT_TYPE_UNSIGNED:
-                return bit8 ? nir_type_uint8 :
-                        large ? nir_type_uint32 : nir_type_uint16;
-        case UTIL_FORMAT_TYPE_SIGNED:
-                return bit8 ? nir_type_int8 :
-                        large ? nir_type_int32 : nir_type_int16;
-        case UTIL_FORMAT_TYPE_FLOAT:
-                return large ? nir_type_float32 : nir_type_float16;
-        default:
-                unreachable("Format not renderable");
-        }
+   switch (desc->channel[c].type) {
+   case UTIL_FORMAT_TYPE_UNSIGNED:
+      return bit8 ? nir_type_uint8 : large ? nir_type_uint32 : nir_type_uint16;
+   case UTIL_FORMAT_TYPE_SIGNED:
+      return bit8 ? nir_type_int8 : large ? nir_type_int32 : nir_type_int16;
+   case UTIL_FORMAT_TYPE_FLOAT:
+      return large ? nir_type_float32 : nir_type_float16;
+   default:
+      unreachable("Format not renderable");
+   }
 }
 
 static bool
-pan_is_format_native(const struct util_format_description *desc, bool broken_ld_special, bool is_store)
+pan_is_format_native(const struct util_format_description *desc,
+                     bool broken_ld_special, bool is_store)
 {
-        if (is_store || broken_ld_special)
-                return false;
+   if (is_store || broken_ld_special)
+      return false;
 
-        if (util_format_is_pure_integer(desc->format) || util_format_is_float(desc->format))
-                return false;
+   if (util_format_is_pure_integer(desc->format) ||
+       util_format_is_float(desc->format))
+      return false;
 
-        /* Some formats are missing as typed but have unpacks */
-        if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
-                return false;
+   /* Some formats are missing as typed but have unpacks */
+   if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
+      return false;
 
-        if (desc->is_array) {
-                int c = util_format_get_first_non_void_channel(desc->format);
-                assert(c >= 0);
-                if (desc->channel[c].size > 8)
-                        return false;
-        }
+   if (desc->is_array) {
+      int c = util_format_get_first_non_void_channel(desc->format);
+      assert(c >= 0);
+      if (desc->channel[c].size > 8)
+         return false;
+   }
 
-        return true;
+   return true;
 }
 
 /* Software packs/unpacks, by format class. Packs take in the pixel value typed
@@ -117,12 +117,12 @@ pan_is_format_native(const struct util_format_description *desc, bool broken_ld_
 static nir_ssa_def *
 pan_replicate(nir_builder *b, nir_ssa_def *v, unsigned num_components)
 {
-        nir_ssa_def *replicated[4];
+   nir_ssa_def *replicated[4];
 
-        for (unsigned i = 0; i < 4; ++i)
-                replicated[i] = nir_channel(b, v, i % num_components);
+   for (unsigned i = 0; i < 4; ++i)
+      replicated[i] = nir_channel(b, v, i % num_components);
 
-        return nir_vec(b, replicated, 4);
+   return nir_vec(b, replicated, 4);
 }
 
 /* Pure x16 formats are x16 unpacked, so it's similar, but we need to pack
@@ -131,135 +131,129 @@ pan_replicate(nir_builder *b, nir_ssa_def *v, unsigned num_components)
 static nir_ssa_def *
 pan_pack_pure_16(nir_builder *b, nir_ssa_def *v, unsigned num_components)
 {
-        nir_ssa_def *v4 = pan_replicate(b, v, num_components);
+   nir_ssa_def *v4 = pan_replicate(b, v, num_components);
 
-        nir_ssa_def *lo = nir_pack_32_2x16(b, nir_channels(b, v4, 0x3 << 0));
-        nir_ssa_def *hi = nir_pack_32_2x16(b, nir_channels(b, v4, 0x3 << 2));
+   nir_ssa_def *lo = nir_pack_32_2x16(b, nir_channels(b, v4, 0x3 << 0));
+   nir_ssa_def *hi = nir_pack_32_2x16(b, nir_channels(b, v4, 0x3 << 2));
 
-        return nir_vec4(b, lo, hi, lo, hi);
+   return nir_vec4(b, lo, hi, lo, hi);
 }
 
 static nir_ssa_def *
 pan_unpack_pure_16(nir_builder *b, nir_ssa_def *pack, unsigned num_components)
 {
-        nir_ssa_def *unpacked[4];
+   nir_ssa_def *unpacked[4];
 
-        assert(num_components <= 4);
+   assert(num_components <= 4);
 
-        for (unsigned i = 0; i < num_components; i += 2) {
-                nir_ssa_def *halves = 
-                        nir_unpack_32_2x16(b, nir_channel(b, pack, i >> 1));
+   for (unsigned i = 0; i < num_components; i += 2) {
+      nir_ssa_def *halves = nir_unpack_32_2x16(b, nir_channel(b, pack, i >> 1));
 
-                unpacked[i + 0] = nir_channel(b, halves, 0);
-                unpacked[i + 1] = nir_channel(b, halves, 1);
-        }
+      unpacked[i + 0] = nir_channel(b, halves, 0);
+      unpacked[i + 1] = nir_channel(b, halves, 1);
+   }
 
-        return nir_pad_vec4(b, nir_vec(b, unpacked, num_components));
+   return nir_pad_vec4(b, nir_vec(b, unpacked, num_components));
 }
 
 static nir_ssa_def *
-pan_pack_reorder(nir_builder *b,
-                 const struct util_format_description *desc,
+pan_pack_reorder(nir_builder *b, const struct util_format_description *desc,
                  nir_ssa_def *v)
 {
-        unsigned swizzle[4] = { 0, 1, 2, 3 };
+   unsigned swizzle[4] = {0, 1, 2, 3};
 
-        for (unsigned i = 0; i < v->num_components; i++) {
-                if (desc->swizzle[i] <= PIPE_SWIZZLE_W)
-                        swizzle[i] = desc->swizzle[i];
-        }
+   for (unsigned i = 0; i < v->num_components; i++) {
+      if (desc->swizzle[i] <= PIPE_SWIZZLE_W)
+         swizzle[i] = desc->swizzle[i];
+   }
 
-        return nir_swizzle(b, v, swizzle, v->num_components);
+   return nir_swizzle(b, v, swizzle, v->num_components);
 }
 
 static nir_ssa_def *
-pan_unpack_reorder(nir_builder *b,
-                   const struct util_format_description *desc,
+pan_unpack_reorder(nir_builder *b, const struct util_format_description *desc,
                    nir_ssa_def *v)
 {
-        unsigned swizzle[4] = { 0, 1, 2, 3 };
+   unsigned swizzle[4] = {0, 1, 2, 3};
 
-        for (unsigned i = 0; i < v->num_components; i++) {
-                if (desc->swizzle[i] <= PIPE_SWIZZLE_W)
-                        swizzle[desc->swizzle[i]] = i;
-        }
+   for (unsigned i = 0; i < v->num_components; i++) {
+      if (desc->swizzle[i] <= PIPE_SWIZZLE_W)
+         swizzle[desc->swizzle[i]] = i;
+   }
 
-        return nir_swizzle(b, v, swizzle, v->num_components);
+   return nir_swizzle(b, v, swizzle, v->num_components);
 }
 
 static nir_ssa_def *
 pan_replicate_4(nir_builder *b, nir_ssa_def *v)
 {
-        return nir_vec4(b, v, v, v, v);
+   return nir_vec4(b, v, v, v, v);
 }
 
 static nir_ssa_def *
 pan_pack_pure_8(nir_builder *b, nir_ssa_def *v, unsigned num_components)
 {
-        return pan_replicate_4(b, nir_pack_32_4x8(b, pan_replicate(b, v, num_components)));
+   return pan_replicate_4(
+      b, nir_pack_32_4x8(b, pan_replicate(b, v, num_components)));
 }
 
 static nir_ssa_def *
 pan_unpack_pure_8(nir_builder *b, nir_ssa_def *pack, unsigned num_components)
 {
-        nir_ssa_def *unpacked = nir_unpack_32_4x8(b, nir_channel(b, pack, 0));
-        return nir_channels(b, unpacked, (1 << num_components) - 1);
+   nir_ssa_def *unpacked = nir_unpack_32_4x8(b, nir_channel(b, pack, 0));
+   return nir_channels(b, unpacked, (1 << num_components) - 1);
 }
 
 static nir_ssa_def *
 pan_fsat(nir_builder *b, nir_ssa_def *v, bool is_signed)
 {
-        if (is_signed)
-                return nir_fsat_signed_mali(b, v);
-        else
-                return nir_fsat(b, v);
+   if (is_signed)
+      return nir_fsat_signed_mali(b, v);
+   else
+      return nir_fsat(b, v);
 }
 
 static float
 norm_scale(bool snorm, unsigned bits)
 {
-        if (snorm)
-                return (1 << (bits - 1)) - 1;
-        else
-                return (1 << bits) - 1;
+   if (snorm)
+      return (1 << (bits - 1)) - 1;
+   else
+      return (1 << bits) - 1;
 }
 
 /* For <= 8-bits per channel, [U,S]NORM formats are packed like [U,S]NORM 8,
  * with zeroes spacing out each component as needed */
 
 static nir_ssa_def *
-pan_pack_norm(nir_builder *b, nir_ssa_def *v,
-              unsigned x, unsigned y, unsigned z, unsigned w,
-              bool is_signed)
+pan_pack_norm(nir_builder *b, nir_ssa_def *v, unsigned x, unsigned y,
+              unsigned z, unsigned w, bool is_signed)
 {
-        /* If a channel has N bits, 1.0 is encoded as 2^N - 1 for UNORMs and
-         * 2^(N-1) - 1 for SNORMs */
-        nir_ssa_def *scales =
-                is_signed ?
-                nir_imm_vec4_16(b,
-                                (1 << (x - 1)) - 1, (1 << (y - 1)) - 1,
-                                (1 << (z - 1)) - 1, (1 << (w - 1)) - 1) :
-                nir_imm_vec4_16(b,
-                                (1 << x) - 1, (1 << y) - 1,
-                                (1 << z) - 1, (1 << w) - 1);
+   /* If a channel has N bits, 1.0 is encoded as 2^N - 1 for UNORMs and
+    * 2^(N-1) - 1 for SNORMs */
+   nir_ssa_def *scales =
+      is_signed ? nir_imm_vec4_16(b, (1 << (x - 1)) - 1, (1 << (y - 1)) - 1,
+                                  (1 << (z - 1)) - 1, (1 << (w - 1)) - 1)
+                : nir_imm_vec4_16(b, (1 << x) - 1, (1 << y) - 1, (1 << z) - 1,
+                                  (1 << w) - 1);
 
-        /* If a channel has N bits, we pad out to the byte by (8 - N) bits */
-        nir_ssa_def *shifts = nir_imm_ivec4(b, 8 - x, 8 - y, 8 - z, 8 - w);
-        nir_ssa_def *clamped = pan_fsat(b, nir_pad_vec4(b, v), is_signed);
+   /* If a channel has N bits, we pad out to the byte by (8 - N) bits */
+   nir_ssa_def *shifts = nir_imm_ivec4(b, 8 - x, 8 - y, 8 - z, 8 - w);
+   nir_ssa_def *clamped = pan_fsat(b, nir_pad_vec4(b, v), is_signed);
 
-        nir_ssa_def *f = nir_fmul(b, clamped, scales);
-        nir_ssa_def *u8 = nir_f2u8(b, nir_fround_even(b, f));
-        nir_ssa_def *s = nir_ishl(b, u8, shifts);
-        nir_ssa_def *repl = nir_pack_32_4x8(b, s);
+   nir_ssa_def *f = nir_fmul(b, clamped, scales);
+   nir_ssa_def *u8 = nir_f2u8(b, nir_fround_even(b, f));
+   nir_ssa_def *s = nir_ishl(b, u8, shifts);
+   nir_ssa_def *repl = nir_pack_32_4x8(b, s);
 
-        return pan_replicate_4(b, repl);
+   return pan_replicate_4(b, repl);
 }
 
 static nir_ssa_def *
-pan_pack_unorm(nir_builder *b, nir_ssa_def *v,
-               unsigned x, unsigned y, unsigned z, unsigned w)
+pan_pack_unorm(nir_builder *b, nir_ssa_def *v, unsigned x, unsigned y,
+               unsigned z, unsigned w)
 {
-        return pan_pack_norm(b, v, x, y, z, w, false);
+   return pan_pack_norm(b, v, x, y, z, w, false);
 }
 
 /* RGB10_A2 is packed in the tilebuffer as the bottom 3 bytes being the top
@@ -269,25 +263,26 @@ pan_pack_unorm(nir_builder *b, nir_ssa_def *v,
 static nir_ssa_def *
 pan_pack_unorm_1010102(nir_builder *b, nir_ssa_def *v)
 {
-        nir_ssa_def *scale = nir_imm_vec4(b, 1023.0, 1023.0, 1023.0, 3.0);
-        nir_ssa_def *s = nir_f2u32(b, nir_fround_even(b, nir_fmul(b, nir_fsat(b, v), scale)));
+   nir_ssa_def *scale = nir_imm_vec4(b, 1023.0, 1023.0, 1023.0, 3.0);
+   nir_ssa_def *s =
+      nir_f2u32(b, nir_fround_even(b, nir_fmul(b, nir_fsat(b, v), scale)));
 
-        nir_ssa_def *top8 = nir_ushr(b, s, nir_imm_ivec4(b, 0x2, 0x2, 0x2, 0x2));
-        nir_ssa_def *top8_rgb = nir_pack_32_4x8(b, nir_u2u8(b, top8));
+   nir_ssa_def *top8 = nir_ushr(b, s, nir_imm_ivec4(b, 0x2, 0x2, 0x2, 0x2));
+   nir_ssa_def *top8_rgb = nir_pack_32_4x8(b, nir_u2u8(b, top8));
 
-        nir_ssa_def *bottom2 = nir_iand(b, s, nir_imm_ivec4(b, 0x3, 0x3, 0x3, 0x3));
+   nir_ssa_def *bottom2 = nir_iand(b, s, nir_imm_ivec4(b, 0x3, 0x3, 0x3, 0x3));
 
-        nir_ssa_def *top =
-                 nir_ior(b,
-                        nir_ior(b, 
-                                nir_ishl(b, nir_channel(b, bottom2, 0), nir_imm_int(b, 24 + 0)),
-                                nir_ishl(b, nir_channel(b, bottom2, 1), nir_imm_int(b, 24 + 2))),
-                        nir_ior(b, 
-                                nir_ishl(b, nir_channel(b, bottom2, 2), nir_imm_int(b, 24 + 4)),
-                                nir_ishl(b, nir_channel(b, bottom2, 3), nir_imm_int(b, 24 + 6))));
+   nir_ssa_def *top = nir_ior(
+      b,
+      nir_ior(b,
+              nir_ishl(b, nir_channel(b, bottom2, 0), nir_imm_int(b, 24 + 0)),
+              nir_ishl(b, nir_channel(b, bottom2, 1), nir_imm_int(b, 24 + 2))),
+      nir_ior(b,
+              nir_ishl(b, nir_channel(b, bottom2, 2), nir_imm_int(b, 24 + 4)),
+              nir_ishl(b, nir_channel(b, bottom2, 3), nir_imm_int(b, 24 + 6))));
 
-        nir_ssa_def *p = nir_ior(b, top, top8_rgb);
-        return pan_replicate_4(b, p);
+   nir_ssa_def *p = nir_ior(b, top, top8_rgb);
+   return pan_replicate_4(b, p);
 }
 
 /* On the other hand, the pure int RGB10_A2 is identical to the spec */
@@ -295,41 +290,40 @@ pan_pack_unorm_1010102(nir_builder *b, nir_ssa_def *v)
 static nir_ssa_def *
 pan_pack_int_1010102(nir_builder *b, nir_ssa_def *v, bool is_signed)
 {
-        v = nir_u2u32(b, v);
+   v = nir_u2u32(b, v);
 
-        /* Clamp the values */
-        if (is_signed) {
-                v = nir_imin(b, v, nir_imm_ivec4(b, 511, 511, 511, 1));
-                v = nir_imax(b, v, nir_imm_ivec4(b, -512, -512, -512, -2));
-        } else {
-                v = nir_umin(b, v, nir_imm_ivec4(b, 1023, 1023, 1023, 3));
-        }
+   /* Clamp the values */
+   if (is_signed) {
+      v = nir_imin(b, v, nir_imm_ivec4(b, 511, 511, 511, 1));
+      v = nir_imax(b, v, nir_imm_ivec4(b, -512, -512, -512, -2));
+   } else {
+      v = nir_umin(b, v, nir_imm_ivec4(b, 1023, 1023, 1023, 3));
+   }
 
-        v = nir_ishl(b, v, nir_imm_ivec4(b, 0, 10, 20, 30));
-        v = nir_ior(b,
-                    nir_ior(b, nir_channel(b, v, 0), nir_channel(b, v, 1)),
-                    nir_ior(b, nir_channel(b, v, 2), nir_channel(b, v, 3)));
+   v = nir_ishl(b, v, nir_imm_ivec4(b, 0, 10, 20, 30));
+   v = nir_ior(b, nir_ior(b, nir_channel(b, v, 0), nir_channel(b, v, 1)),
+               nir_ior(b, nir_channel(b, v, 2), nir_channel(b, v, 3)));
 
-        return pan_replicate_4(b, v);
+   return pan_replicate_4(b, v);
 }
 
 static nir_ssa_def *
 pan_unpack_int_1010102(nir_builder *b, nir_ssa_def *packed, bool is_signed)
 {
-        nir_ssa_def *v = pan_replicate_4(b, nir_channel(b, packed, 0));
+   nir_ssa_def *v = pan_replicate_4(b, nir_channel(b, packed, 0));
 
-        /* Left shift all components so the sign bit is on the MSB, and
-         * can be extended by ishr(). The ishl()+[u,i]shr() combination
-         * sets all unused bits to 0 without requiring a mask.
-         */
-        v = nir_ishl(b, v, nir_imm_ivec4(b, 22, 12, 2, 0));
+   /* Left shift all components so the sign bit is on the MSB, and
+    * can be extended by ishr(). The ishl()+[u,i]shr() combination
+    * sets all unused bits to 0 without requiring a mask.
+    */
+   v = nir_ishl(b, v, nir_imm_ivec4(b, 22, 12, 2, 0));
 
-        if (is_signed)
-                v = nir_ishr(b, v, nir_imm_ivec4(b, 22, 22, 22, 30));
-        else
-                v = nir_ushr(b, v, nir_imm_ivec4(b, 22, 22, 22, 30));
+   if (is_signed)
+      v = nir_ishr(b, v, nir_imm_ivec4(b, 22, 22, 22, 30));
+   else
+      v = nir_ushr(b, v, nir_imm_ivec4(b, 22, 22, 22, 30));
 
-        return nir_i2i16(b, v);
+   return nir_i2i16(b, v);
 }
 
 /* NIR means we can *finally* catch a break */
@@ -337,25 +331,21 @@ pan_unpack_int_1010102(nir_builder *b, nir_ssa_def *packed, bool is_signed)
 static nir_ssa_def *
 pan_pack_r11g11b10(nir_builder *b, nir_ssa_def *v)
 {
-        return pan_replicate_4(b, nir_format_pack_11f11f10f(b, 
-                                nir_f2f32(b, v)));
+   return pan_replicate_4(b, nir_format_pack_11f11f10f(b, nir_f2f32(b, v)));
 }
 
 static nir_ssa_def *
 pan_unpack_r11g11b10(nir_builder *b, nir_ssa_def *v)
 {
-        nir_ssa_def *f32 = nir_format_unpack_11f11f10f(b, nir_channel(b, v, 0));
-        nir_ssa_def *f16 = nir_f2fmp(b, f32);
+   nir_ssa_def *f32 = nir_format_unpack_11f11f10f(b, nir_channel(b, v, 0));
+   nir_ssa_def *f16 = nir_f2fmp(b, f32);
 
-        /* Extend to vec4 with alpha */
-        nir_ssa_def *components[4] = {
-                nir_channel(b, f16, 0),
-                nir_channel(b, f16, 1),
-                nir_channel(b, f16, 2),
-                nir_imm_float16(b, 1.0)
-        };
+   /* Extend to vec4 with alpha */
+   nir_ssa_def *components[4] = {nir_channel(b, f16, 0), nir_channel(b, f16, 1),
+                                 nir_channel(b, f16, 2),
+                                 nir_imm_float16(b, 1.0)};
 
-        return nir_vec(b, components, 4);
+   return nir_vec(b, components, 4);
 }
 
 /* Wrapper around sRGB conversion */
@@ -363,300 +353,294 @@ pan_unpack_r11g11b10(nir_builder *b, nir_ssa_def *v)
 static nir_ssa_def *
 pan_linear_to_srgb(nir_builder *b, nir_ssa_def *linear)
 {
-        nir_ssa_def *rgb = nir_channels(b, linear, 0x7);
+   nir_ssa_def *rgb = nir_channels(b, linear, 0x7);
 
-        /* TODO: fp16 native conversion */
-        nir_ssa_def *srgb = nir_f2fmp(b,
-                        nir_format_linear_to_srgb(b, nir_f2f32(b, rgb)));
+   /* TODO: fp16 native conversion */
+   nir_ssa_def *srgb =
+      nir_f2fmp(b, nir_format_linear_to_srgb(b, nir_f2f32(b, rgb)));
 
-        nir_ssa_def *comp[4] = {
-                nir_channel(b, srgb, 0),
-                nir_channel(b, srgb, 1),
-                nir_channel(b, srgb, 2),
-                nir_channel(b, linear, 3),
-        };
+   nir_ssa_def *comp[4] = {
+      nir_channel(b, srgb, 0),
+      nir_channel(b, srgb, 1),
+      nir_channel(b, srgb, 2),
+      nir_channel(b, linear, 3),
+   };
 
-        return nir_vec(b, comp, 4);
+   return nir_vec(b, comp, 4);
 }
 
 static nir_ssa_def *
 pan_unpack_pure(nir_builder *b, nir_ssa_def *packed, unsigned size, unsigned nr)
 {
-        switch (size) {
-        case 32:
-                return nir_trim_vector(b, packed, nr);
-        case 16:
-                return pan_unpack_pure_16(b, packed, nr);
-        case 8:
-                return pan_unpack_pure_8(b, packed, nr);
-        default:
-                unreachable("Unrenderable size");
-        }
+   switch (size) {
+   case 32:
+      return nir_trim_vector(b, packed, nr);
+   case 16:
+      return pan_unpack_pure_16(b, packed, nr);
+   case 8:
+      return pan_unpack_pure_8(b, packed, nr);
+   default:
+      unreachable("Unrenderable size");
+   }
 }
 
 /* Generic dispatches for un/pack regardless of format */
 
 static nir_ssa_def *
-pan_unpack(nir_builder *b,
-                const struct util_format_description *desc,
-                nir_ssa_def *packed)
+pan_unpack(nir_builder *b, const struct util_format_description *desc,
+           nir_ssa_def *packed)
 {
-        if (desc->is_array) {
-                int c = util_format_get_first_non_void_channel(desc->format);
-                assert(c >= 0);
-                struct util_format_channel_description d = desc->channel[c];
-                nir_ssa_def *unpacked = pan_unpack_pure(b, packed, d.size, desc->nr_channels);
+   if (desc->is_array) {
+      int c = util_format_get_first_non_void_channel(desc->format);
+      assert(c >= 0);
+      struct util_format_channel_description d = desc->channel[c];
+      nir_ssa_def *unpacked =
+         pan_unpack_pure(b, packed, d.size, desc->nr_channels);
 
-                /* Normalized formats are unpacked as integers. We need to
-                 * convert to float for the final result.
-                 */
-                if (d.normalized) {
-                        bool snorm = desc->is_snorm;
-                        unsigned float_sz = (d.size <= 8 ? 16 : 32);
-                        float multiplier = norm_scale(snorm, d.size);
+      /* Normalized formats are unpacked as integers. We need to
+       * convert to float for the final result.
+       */
+      if (d.normalized) {
+         bool snorm = desc->is_snorm;
+         unsigned float_sz = (d.size <= 8 ? 16 : 32);
+         float multiplier = norm_scale(snorm, d.size);
 
-                        nir_ssa_def *as_float =
-                                snorm ? nir_i2fN(b, unpacked, float_sz) :
-                                        nir_u2fN(b, unpacked, float_sz);
+         nir_ssa_def *as_float = snorm ? nir_i2fN(b, unpacked, float_sz)
+                                       : nir_u2fN(b, unpacked, float_sz);
 
-                        return nir_fmul_imm(b, as_float, 1.0 / multiplier);
-                } else {
-                        return unpacked;
-                }
-        }
+         return nir_fmul_imm(b, as_float, 1.0 / multiplier);
+      } else {
+         return unpacked;
+      }
+   }
 
-        switch (desc->format) {
-        case PIPE_FORMAT_R10G10B10A2_UINT:
-        case PIPE_FORMAT_B10G10R10A2_UINT:
-                return pan_unpack_int_1010102(b, packed, false);
-        case PIPE_FORMAT_R10G10B10A2_SINT:
-        case PIPE_FORMAT_B10G10R10A2_SINT:
-                return pan_unpack_int_1010102(b, packed, true);
-        case PIPE_FORMAT_R11G11B10_FLOAT:
-                return pan_unpack_r11g11b10(b, packed);
-        default:
-                break;
-        }
+   switch (desc->format) {
+   case PIPE_FORMAT_R10G10B10A2_UINT:
+   case PIPE_FORMAT_B10G10R10A2_UINT:
+      return pan_unpack_int_1010102(b, packed, false);
+   case PIPE_FORMAT_R10G10B10A2_SINT:
+   case PIPE_FORMAT_B10G10R10A2_SINT:
+      return pan_unpack_int_1010102(b, packed, true);
+   case PIPE_FORMAT_R11G11B10_FLOAT:
+      return pan_unpack_r11g11b10(b, packed);
+   default:
+      break;
+   }
 
-        fprintf(stderr, "%s\n", desc->name);
-        unreachable("Unknown format");
+   fprintf(stderr, "%s\n", desc->name);
+   unreachable("Unknown format");
 }
 
-static nir_ssa_def *
-pan_pack(nir_builder *b,
-                const struct util_format_description *desc,
-                nir_ssa_def *unpacked)
+static nir_ssa_def *pan_pack(nir_builder *b,
+                             const struct util_format_description *desc,
+                             nir_ssa_def * unpacked)
 {
-        if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
-                unpacked = pan_linear_to_srgb(b, unpacked);
+   if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
+      unpacked = pan_linear_to_srgb(b, unpacked);
 
-        if (desc->is_array) {
-                int c = util_format_get_first_non_void_channel(desc->format);
-                assert(c >= 0);
-                struct util_format_channel_description d = desc->channel[c];
+   if (desc->is_array) {
+      int c = util_format_get_first_non_void_channel(desc->format);
+      assert(c >= 0);
+      struct util_format_channel_description d = desc->channel[c];
 
-                /* Pure formats are packed as-is */
-                nir_ssa_def *raw = unpacked;
+      /* Pure formats are packed as-is */
+      nir_ssa_def *raw = unpacked;
 
-                /* Normalized formats get normalized first */
-                if (d.normalized) {
-                        bool snorm = desc->is_snorm;
-                        float multiplier = norm_scale(snorm, d.size);
-                        nir_ssa_def *clamped = pan_fsat(b, unpacked, snorm);
-                        nir_ssa_def *normed = nir_fmul_imm(b, clamped, multiplier);
+      /* Normalized formats get normalized first */
+      if (d.normalized) {
+         bool snorm = desc->is_snorm;
+         float multiplier = norm_scale(snorm, d.size);
+         nir_ssa_def *clamped = pan_fsat(b, unpacked, snorm);
+         nir_ssa_def *normed = nir_fmul_imm(b, clamped, multiplier);
 
-                        raw = nir_f2uN(b, normed, d.size);
-                }
+         raw = nir_f2uN(b, normed, d.size);
+      }
 
-                /* Pack the raw format */
-                switch (d.size) {
-                case 32:
-                        return pan_replicate(b, raw, desc->nr_channels);
-                case 16:
-                        return pan_pack_pure_16(b, raw, desc->nr_channels);
-                case 8:
-                        return pan_pack_pure_8(b, raw, desc->nr_channels);
-                default:
-                        unreachable("Unrenderable size");
-                }
-        }
+      /* Pack the raw format */
+      switch (d.size) {
+      case 32:
+         return pan_replicate(b, raw, desc->nr_channels);
+      case 16:
+         return pan_pack_pure_16(b, raw, desc->nr_channels);
+      case 8:
+         return pan_pack_pure_8(b, raw, desc->nr_channels);
+      default:
+         unreachable("Unrenderable size");
+      }
+   }
 
-        switch (desc->format) {
-        case PIPE_FORMAT_B4G4R4A4_UNORM:
-        case PIPE_FORMAT_B4G4R4X4_UNORM:
-        case PIPE_FORMAT_A4R4_UNORM:
-        case PIPE_FORMAT_R4A4_UNORM:
-        case PIPE_FORMAT_A4B4G4R4_UNORM:
-        case PIPE_FORMAT_R4G4B4A4_UNORM:
-                return pan_pack_unorm(b, unpacked, 4, 4, 4, 4);
-        case PIPE_FORMAT_B5G5R5A1_UNORM:
-        case PIPE_FORMAT_R5G5B5A1_UNORM:
-                return pan_pack_unorm(b, unpacked, 5, 6, 5, 1);
-        case PIPE_FORMAT_R5G6B5_UNORM:
-        case PIPE_FORMAT_B5G6R5_UNORM:
-                return pan_pack_unorm(b, unpacked, 5, 6, 5, 0);
-        case PIPE_FORMAT_R10G10B10A2_UNORM:
-        case PIPE_FORMAT_B10G10R10A2_UNORM:
-                return pan_pack_unorm_1010102(b, unpacked);
-        case PIPE_FORMAT_R10G10B10A2_UINT:
-        case PIPE_FORMAT_B10G10R10A2_UINT:
-                return pan_pack_int_1010102(b, unpacked, false);
-        case PIPE_FORMAT_R10G10B10A2_SINT:
-        case PIPE_FORMAT_B10G10R10A2_SINT:
-                return pan_pack_int_1010102(b, unpacked, true);
-        case PIPE_FORMAT_R11G11B10_FLOAT:
-                return pan_pack_r11g11b10(b, unpacked);
-        default:
-                break;
-        }
+   switch (desc->format) {
+   case PIPE_FORMAT_B4G4R4A4_UNORM:
+   case PIPE_FORMAT_B4G4R4X4_UNORM:
+   case PIPE_FORMAT_A4R4_UNORM:
+   case PIPE_FORMAT_R4A4_UNORM:
+   case PIPE_FORMAT_A4B4G4R4_UNORM:
+   case PIPE_FORMAT_R4G4B4A4_UNORM:
+      return pan_pack_unorm(b, unpacked, 4, 4, 4, 4);
+   case PIPE_FORMAT_B5G5R5A1_UNORM:
+   case PIPE_FORMAT_R5G5B5A1_UNORM:
+      return pan_pack_unorm(b, unpacked, 5, 6, 5, 1);
+   case PIPE_FORMAT_R5G6B5_UNORM:
+   case PIPE_FORMAT_B5G6R5_UNORM:
+      return pan_pack_unorm(b, unpacked, 5, 6, 5, 0);
+   case PIPE_FORMAT_R10G10B10A2_UNORM:
+   case PIPE_FORMAT_B10G10R10A2_UNORM:
+      return pan_pack_unorm_1010102(b, unpacked);
+   case PIPE_FORMAT_R10G10B10A2_UINT:
+   case PIPE_FORMAT_B10G10R10A2_UINT:
+      return pan_pack_int_1010102(b, unpacked, false);
+   case PIPE_FORMAT_R10G10B10A2_SINT:
+   case PIPE_FORMAT_B10G10R10A2_SINT:
+      return pan_pack_int_1010102(b, unpacked, true);
+   case PIPE_FORMAT_R11G11B10_FLOAT:
+      return pan_pack_r11g11b10(b, unpacked);
+   default:
+      break;
+   }
 
-        fprintf(stderr, "%s\n", desc->name);
-        unreachable("Unknown format");
+   fprintf(stderr, "%s\n", desc->name);
+   unreachable("Unknown format");
 }
 
 static void
-pan_lower_fb_store(nir_shader *shader,
-                nir_builder *b,
-                nir_intrinsic_instr *intr,
-                const struct util_format_description *desc,
-                bool reorder_comps)
+pan_lower_fb_store(nir_shader *shader, nir_builder *b,
+                   nir_intrinsic_instr *intr,
+                   const struct util_format_description *desc,
+                   bool reorder_comps)
 {
-        /* For stores, add conversion before */
-        nir_ssa_def *unpacked =
-                nir_ssa_for_src(b, intr->src[1], intr->num_components);
-        unpacked = nir_pad_vec4(b, unpacked);
+   /* For stores, add conversion before */
+   nir_ssa_def *unpacked =
+      nir_ssa_for_src(b, intr->src[1], intr->num_components);
+   unpacked = nir_pad_vec4(b, unpacked);
 
-        /* Re-order the components */
-        if (reorder_comps)
-                unpacked = pan_pack_reorder(b, desc, unpacked);
+   /* Re-order the components */
+   if (reorder_comps)
+      unpacked = pan_pack_reorder(b, desc, unpacked);
 
-        nir_ssa_def *packed = pan_pack(b, desc, unpacked);
+   nir_ssa_def *packed = pan_pack(b, desc, unpacked);
 
-        nir_store_raw_output_pan(b, packed);
+   nir_store_raw_output_pan(b, packed);
 }
 
 static nir_ssa_def *
 pan_sample_id(nir_builder *b, int sample)
 {
-        return (sample >= 0) ? nir_imm_int(b, sample) : nir_load_sample_id(b);
+   return (sample >= 0) ? nir_imm_int(b, sample) : nir_load_sample_id(b);
 }
 
 static void
-pan_lower_fb_load(nir_shader *shader,
-                nir_builder *b,
-                nir_intrinsic_instr *intr,
-                const struct util_format_description *desc,
-                bool reorder_comps,
-                int sample)
+pan_lower_fb_load(nir_shader *shader, nir_builder *b, nir_intrinsic_instr *intr,
+                  const struct util_format_description *desc,
+                  bool reorder_comps, int sample)
 {
-        nir_io_semantics sem = {
-                .location = nir_intrinsic_get_var(intr, 0)->data.location,
-        };
+   nir_io_semantics sem = {
+      .location = nir_intrinsic_get_var(intr, 0)->data.location,
+   };
 
-        nir_ssa_def *packed =
-                nir_load_raw_output_pan(b, 4, 32, pan_sample_id(b, sample),
-                                        .io_semantics = sem);
+   nir_ssa_def *packed = nir_load_raw_output_pan(
+      b, 4, 32, pan_sample_id(b, sample), .io_semantics = sem);
 
-        /* Convert the raw value */
-        nir_ssa_def *unpacked = pan_unpack(b, desc, packed);
+   /* Convert the raw value */
+   nir_ssa_def *unpacked = pan_unpack(b, desc, packed);
 
-        /* Convert to the size of the load intrinsic.
-         *
-         * We can assume that the type will match with the framebuffer format:
-         *
-         * Page 170 of the PDF of the OpenGL ES 3.0.6 spec says:
-         *
-         * If [UNORM or SNORM, convert to fixed-point]; otherwise no type
-         * conversion is applied. If the values written by the fragment shader
-         * do not match the format(s) of the corresponding color buffer(s),
-         * the result is undefined.
-         */
+   /* Convert to the size of the load intrinsic.
+    *
+    * We can assume that the type will match with the framebuffer format:
+    *
+    * Page 170 of the PDF of the OpenGL ES 3.0.6 spec says:
+    *
+    * If [UNORM or SNORM, convert to fixed-point]; otherwise no type
+    * conversion is applied. If the values written by the fragment shader
+    * do not match the format(s) of the corresponding color buffer(s),
+    * the result is undefined.
+    */
 
-        unsigned bits = nir_dest_bit_size(intr->dest);
+   unsigned bits = nir_dest_bit_size(intr->dest);
 
-        nir_alu_type src_type = nir_alu_type_get_base_type(
-                        pan_unpacked_type_for_format(desc));
+   nir_alu_type src_type =
+      nir_alu_type_get_base_type(pan_unpacked_type_for_format(desc));
 
-        unpacked = nir_convert_to_bit_size(b, unpacked, src_type, bits);
-        unpacked = nir_resize_vector(b, unpacked, intr->dest.ssa.num_components);
+   unpacked = nir_convert_to_bit_size(b, unpacked, src_type, bits);
+   unpacked = nir_resize_vector(b, unpacked, intr->dest.ssa.num_components);
 
-        /* Reorder the components */
-        if (reorder_comps)
-                unpacked = pan_unpack_reorder(b, desc, unpacked);
+   /* Reorder the components */
+   if (reorder_comps)
+      unpacked = pan_unpack_reorder(b, desc, unpacked);
 
-        nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, unpacked, &intr->instr);
+   nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, unpacked, &intr->instr);
 }
 
 bool
 pan_lower_framebuffer(nir_shader *shader, const enum pipe_format *rt_fmts,
-                      uint8_t raw_fmt_mask, bool is_blend, bool broken_ld_special)
+                      uint8_t raw_fmt_mask, bool is_blend,
+                      bool broken_ld_special)
 {
-        if (shader->info.stage != MESA_SHADER_FRAGMENT)
-               return false;
+   if (shader->info.stage != MESA_SHADER_FRAGMENT)
+      return false;
 
-        bool progress = false;
+   bool progress = false;
 
-        nir_foreach_function(func, shader) {
-                nir_foreach_block(block, func->impl) {
-                        nir_foreach_instr_safe(instr, block) {
-                                if (instr->type != nir_instr_type_intrinsic)
-                                        continue;
+   nir_foreach_function(func, shader) {
+      nir_foreach_block(block, func->impl) {
+         nir_foreach_instr_safe(instr, block) {
+            if (instr->type != nir_instr_type_intrinsic)
+               continue;
 
-                                nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+            nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 
-                                bool is_load = intr->intrinsic == nir_intrinsic_load_deref;
-                                bool is_store = intr->intrinsic == nir_intrinsic_store_deref;
+            bool is_load = intr->intrinsic == nir_intrinsic_load_deref;
+            bool is_store = intr->intrinsic == nir_intrinsic_store_deref;
 
-                                if (!(is_load || (is_store && is_blend)))
-                                        continue;
+            if (!(is_load || (is_store && is_blend)))
+               continue;
 
-                                nir_variable *var = nir_intrinsic_get_var(intr, 0);
+            nir_variable *var = nir_intrinsic_get_var(intr, 0);
 
-                                if (var->data.mode != nir_var_shader_out)
-                                        continue;
+            if (var->data.mode != nir_var_shader_out)
+               continue;
 
-                                if (var->data.location < FRAG_RESULT_DATA0)
-                                        continue;
+            if (var->data.location < FRAG_RESULT_DATA0)
+               continue;
 
-                                unsigned rt = var->data.location - FRAG_RESULT_DATA0;
+            unsigned rt = var->data.location - FRAG_RESULT_DATA0;
 
-                                if (rt_fmts[rt] == PIPE_FORMAT_NONE)
-                                        continue;
+            if (rt_fmts[rt] == PIPE_FORMAT_NONE)
+               continue;
 
-                                const struct util_format_description *desc =
-                                   util_format_description(rt_fmts[rt]);
+            const struct util_format_description *desc =
+               util_format_description(rt_fmts[rt]);
 
-                                /* Don't lower */
-                                if (pan_is_format_native(desc, broken_ld_special, is_store))
-                                        continue;
+            /* Don't lower */
+            if (pan_is_format_native(desc, broken_ld_special, is_store))
+               continue;
 
-                                /* EXT_shader_framebuffer_fetch requires
-                                 * per-sample loads.
-                                 * MSAA blend shaders are not yet handled, so
-                                 * for now always load sample 0. */
-                                int sample = is_blend ? 0 : -1;
-                                bool reorder_comps = raw_fmt_mask & BITFIELD_BIT(rt);
+            /* EXT_shader_framebuffer_fetch requires
+             * per-sample loads.
+             * MSAA blend shaders are not yet handled, so
+             * for now always load sample 0. */
+            int sample = is_blend ? 0 : -1;
+            bool reorder_comps = raw_fmt_mask & BITFIELD_BIT(rt);
 
-                                nir_builder b;
-                                nir_builder_init(&b, func->impl);
+            nir_builder b;
+            nir_builder_init(&b, func->impl);
 
-                                if (is_store) {
-                                        b.cursor = nir_before_instr(instr);
-                                        pan_lower_fb_store(shader, &b, intr, desc, reorder_comps);
-                                } else {
-                                        b.cursor = nir_after_instr(instr);
-                                        pan_lower_fb_load(shader, &b, intr, desc, reorder_comps, sample);
-                                }
+            if (is_store) {
+               b.cursor = nir_before_instr(instr);
+               pan_lower_fb_store(shader, &b, intr, desc, reorder_comps);
+            } else {
+               b.cursor = nir_after_instr(instr);
+               pan_lower_fb_load(shader, &b, intr, desc, reorder_comps, sample);
+            }
 
-                                nir_instr_remove(instr);
+            nir_instr_remove(instr);
 
-                                progress = true;
-                        }
-                }
+            progress = true;
+         }
+      }
 
-                nir_metadata_preserve(func->impl, nir_metadata_block_index |
-                                nir_metadata_dominance);
-        }
+      nir_metadata_preserve(func->impl,
+                            nir_metadata_block_index | nir_metadata_dominance);
+   }
 
-        return progress;
+   return progress;
 }
diff --git a/src/panfrost/util/pan_lower_framebuffer.h b/src/panfrost/util/pan_lower_framebuffer.h
index aab8e4bcdef..96f711a9d43 100644
--- a/src/panfrost/util/pan_lower_framebuffer.h
+++ b/src/panfrost/util/pan_lower_framebuffer.h
@@ -30,11 +30,11 @@
 #include "compiler/nir/nir.h"
 #include "util/format/u_format.h"
 
-nir_alu_type pan_unpacked_type_for_format(const struct util_format_description *desc);
+nir_alu_type
+pan_unpacked_type_for_format(const struct util_format_description *desc);
 
-bool pan_lower_framebuffer(nir_shader *shader,
-                           const enum pipe_format *rt_fmts,
-                           uint8_t raw_fmt_mask,
-                           bool is_blend, bool broken_ld_special);
+bool pan_lower_framebuffer(nir_shader *shader, const enum pipe_format *rt_fmts,
+                           uint8_t raw_fmt_mask, bool is_blend,
+                           bool broken_ld_special);
 
 #endif
diff --git a/src/panfrost/util/pan_lower_helper_invocation.c b/src/panfrost/util/pan_lower_helper_invocation.c
index 23a37a15dd3..9e31414a262 100644
--- a/src/panfrost/util/pan_lower_helper_invocation.c
+++ b/src/panfrost/util/pan_lower_helper_invocation.c
@@ -21,8 +21,8 @@
  * SOFTWARE.
  */
 
-#include "pan_ir.h"
 #include "compiler/nir/nir_builder.h"
+#include "pan_ir.h"
 
 /* Lower gl_HelperInvocation to (gl_SampleMaskIn == 0), this depends on
  * architectural details but is required for correct operation with
@@ -32,27 +32,26 @@
 static bool
 pan_lower_helper_invocation_instr(nir_builder *b, nir_instr *instr, void *data)
 {
-        if (instr->type != nir_instr_type_intrinsic)
-                return false;
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
 
-        nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-        if (intr->intrinsic != nir_intrinsic_load_helper_invocation)
-                return false;
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   if (intr->intrinsic != nir_intrinsic_load_helper_invocation)
+      return false;
 
-        b->cursor = nir_before_instr(instr);
+   b->cursor = nir_before_instr(instr);
 
-        nir_ssa_def *mask = nir_load_sample_mask_in(b);
-        nir_ssa_def *eq = nir_ieq(b, mask, nir_imm_int(b, 0));
-        nir_ssa_def_rewrite_uses(&intr->dest.ssa, eq);
+   nir_ssa_def *mask = nir_load_sample_mask_in(b);
+   nir_ssa_def *eq = nir_ieq(b, mask, nir_imm_int(b, 0));
+   nir_ssa_def_rewrite_uses(&intr->dest.ssa, eq);
 
-        return true;
+   return true;
 }
 
 bool
 pan_lower_helper_invocation(nir_shader *shader)
 {
-        return nir_shader_instructions_pass(shader,
-                                            pan_lower_helper_invocation_instr,
-                                            nir_metadata_block_index | nir_metadata_dominance,
-                                            NULL);
+   return nir_shader_instructions_pass(
+      shader, pan_lower_helper_invocation_instr,
+      nir_metadata_block_index | nir_metadata_dominance, NULL);
 }
diff --git a/src/panfrost/util/pan_lower_sample_position.c b/src/panfrost/util/pan_lower_sample_position.c
index 12a0c47bbff..f51b9c9689c 100644
--- a/src/panfrost/util/pan_lower_sample_position.c
+++ b/src/panfrost/util/pan_lower_sample_position.c
@@ -21,8 +21,8 @@
  * SOFTWARE.
  */
 
-#include "pan_ir.h"
 #include "compiler/nir/nir_builder.h"
+#include "pan_ir.h"
 
 /* Sample positions are supplied in a packed 8:8 fixed-point vec2 format in GPU
  * memory indexed by the sample. We lower in NIR to take advantage of possible
@@ -33,43 +33,42 @@
  * it's a pretty trivial difference */
 
 static bool
-pan_lower_sample_pos_impl(struct nir_builder *b,
-                nir_instr *instr, UNUSED void *data)
+pan_lower_sample_pos_impl(struct nir_builder *b, nir_instr *instr,
+                          UNUSED void *data)
 {
-        if (instr->type != nir_instr_type_intrinsic)
-                return false;
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
 
-        nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-        if (intr->intrinsic != nir_intrinsic_load_sample_pos)
-                return false;
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   if (intr->intrinsic != nir_intrinsic_load_sample_pos)
+      return false;
 
-        b->cursor = nir_before_instr(instr);
+   b->cursor = nir_before_instr(instr);
 
-        /* Elements are 4 bytes */
-        nir_ssa_def *addr = nir_iadd(b,
-                        nir_load_sample_positions_pan(b),
-                        nir_u2u64(b, nir_imul_imm(b, nir_load_sample_id(b), 4)));
+   /* Elements are 4 bytes */
+   nir_ssa_def *addr =
+      nir_iadd(b, nir_load_sample_positions_pan(b),
+               nir_u2u64(b, nir_imul_imm(b, nir_load_sample_id(b), 4)));
 
-        /* Decode 8:8 fixed-point */
-        nir_ssa_def *raw = nir_load_global(b, addr, 2, 2, 16);
-        nir_ssa_def *decoded = nir_fmul_imm(b, nir_i2f16(b, raw), 1.0 / 256.0);
+   /* Decode 8:8 fixed-point */
+   nir_ssa_def *raw = nir_load_global(b, addr, 2, 2, 16);
+   nir_ssa_def *decoded = nir_fmul_imm(b, nir_i2f16(b, raw), 1.0 / 256.0);
 
-        /* Make NIR validator happy */
-        if (decoded->bit_size != nir_dest_bit_size(intr->dest))
-                decoded = nir_f2fN(b, decoded, nir_dest_bit_size(intr->dest));
+   /* Make NIR validator happy */
+   if (decoded->bit_size != nir_dest_bit_size(intr->dest))
+      decoded = nir_f2fN(b, decoded, nir_dest_bit_size(intr->dest));
 
-        nir_ssa_def_rewrite_uses(&intr->dest.ssa, decoded);
-        return true;
+   nir_ssa_def_rewrite_uses(&intr->dest.ssa, decoded);
+   return true;
 }
 
 bool
 pan_lower_sample_pos(nir_shader *shader)
 {
-        if (shader->info.stage != MESA_SHADER_FRAGMENT)
-                return false;
+   if (shader->info.stage != MESA_SHADER_FRAGMENT)
+      return false;
 
-        return nir_shader_instructions_pass(shader,
-                        pan_lower_sample_pos_impl,
-                        nir_metadata_block_index | nir_metadata_dominance,
-                        NULL);
+   return nir_shader_instructions_pass(
+      shader, pan_lower_sample_pos_impl,
+      nir_metadata_block_index | nir_metadata_dominance, NULL);
 }
diff --git a/src/panfrost/util/pan_lower_store_component.c b/src/panfrost/util/pan_lower_store_component.c
index 5178317232b..aa00e6118e2 100644
--- a/src/panfrost/util/pan_lower_store_component.c
+++ b/src/panfrost/util/pan_lower_store_component.c
@@ -24,8 +24,8 @@
  *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
  */
 
-#include "pan_ir.h"
 #include "compiler/nir/nir_builder.h"
+#include "pan_ir.h"
 
 /*
  * If the shader packs multiple varyings into the same location with different
@@ -36,70 +36,69 @@
 static bool
 lower_store_component(nir_builder *b, nir_instr *instr, void *data)
 {
-        if (instr->type != nir_instr_type_intrinsic)
-                return false;
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
 
-        nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 
-        if (intr->intrinsic != nir_intrinsic_store_output)
-                return false;
+   if (intr->intrinsic != nir_intrinsic_store_output)
+      return false;
 
-        struct hash_table_u64 *slots = data;
-        unsigned component = nir_intrinsic_component(intr);
-        nir_src *slot_src = nir_get_io_offset_src(intr);
-        uint64_t slot = nir_src_as_uint(*slot_src) + nir_intrinsic_base(intr);
+   struct hash_table_u64 *slots = data;
+   unsigned component = nir_intrinsic_component(intr);
+   nir_src *slot_src = nir_get_io_offset_src(intr);
+   uint64_t slot = nir_src_as_uint(*slot_src) + nir_intrinsic_base(intr);
 
-        nir_intrinsic_instr *prev = _mesa_hash_table_u64_search(slots, slot);
-        unsigned mask = (prev ? nir_intrinsic_write_mask(prev) : 0);
+   nir_intrinsic_instr *prev = _mesa_hash_table_u64_search(slots, slot);
+   unsigned mask = (prev ? nir_intrinsic_write_mask(prev) : 0);
 
-        nir_ssa_def *value = intr->src[0].ssa;
-        b->cursor = nir_before_instr(&intr->instr);
+   nir_ssa_def *value = intr->src[0].ssa;
+   b->cursor = nir_before_instr(&intr->instr);
 
-        nir_ssa_def *undef = nir_ssa_undef(b, 1, value->bit_size);
-        nir_ssa_def *channels[4] = { undef, undef, undef, undef };
+   nir_ssa_def *undef = nir_ssa_undef(b, 1, value->bit_size);
+   nir_ssa_def *channels[4] = {undef, undef, undef, undef};
 
-        /* Copy old */
-        u_foreach_bit(i, mask) {
-                assert(prev != NULL);
-                nir_ssa_def *prev_ssa = prev->src[0].ssa;
-                channels[i] = nir_channel(b, prev_ssa, i);
-        }
+   /* Copy old */
+   u_foreach_bit(i, mask) {
+      assert(prev != NULL);
+      nir_ssa_def *prev_ssa = prev->src[0].ssa;
+      channels[i] = nir_channel(b, prev_ssa, i);
+   }
 
-        /* Copy new */
-        unsigned new_mask = nir_intrinsic_write_mask(intr);
-        mask |= (new_mask << component);
+   /* Copy new */
+   unsigned new_mask = nir_intrinsic_write_mask(intr);
+   mask |= (new_mask << component);
 
-        u_foreach_bit(i, new_mask) {
-                assert(component + i < 4);
-                channels[component + i] = nir_channel(b, value, i);
-        }
+   u_foreach_bit(i, new_mask) {
+      assert(component + i < 4);
+      channels[component + i] = nir_channel(b, value, i);
+   }
 
-        intr->num_components = util_last_bit(mask);
-        nir_instr_rewrite_src_ssa(instr, &intr->src[0],
-                        nir_vec(b, channels, intr->num_components));
+   intr->num_components = util_last_bit(mask);
+   nir_instr_rewrite_src_ssa(instr, &intr->src[0],
+                             nir_vec(b, channels, intr->num_components));
 
-        nir_intrinsic_set_component(intr, 0);
-        nir_intrinsic_set_write_mask(intr, mask);
+   nir_intrinsic_set_component(intr, 0);
+   nir_intrinsic_set_write_mask(intr, mask);
 
-        if (prev) {
-                _mesa_hash_table_u64_remove(slots, slot);
-                nir_instr_remove(&prev->instr);
-        }
+   if (prev) {
+      _mesa_hash_table_u64_remove(slots, slot);
+      nir_instr_remove(&prev->instr);
+   }
 
-        _mesa_hash_table_u64_insert(slots, slot, intr);
-        return false;
+   _mesa_hash_table_u64_insert(slots, slot, intr);
+   return false;
 }
 
 bool
 pan_nir_lower_store_component(nir_shader *s)
 {
-        assert(s->info.stage == MESA_SHADER_VERTEX);
+   assert(s->info.stage == MESA_SHADER_VERTEX);
 
-        struct hash_table_u64 *stores = _mesa_hash_table_u64_create(NULL);
-        bool progress = nir_shader_instructions_pass(s, lower_store_component,
-                                                     nir_metadata_block_index |
-                                                     nir_metadata_dominance,
-                                                     stores);
-        _mesa_hash_table_u64_destroy(stores);
-        return progress;
+   struct hash_table_u64 *stores = _mesa_hash_table_u64_create(NULL);
+   bool progress = nir_shader_instructions_pass(
+      s, lower_store_component,
+      nir_metadata_block_index | nir_metadata_dominance, stores);
+   _mesa_hash_table_u64_destroy(stores);
+   return progress;
 }
diff --git a/src/panfrost/util/pan_lower_writeout.c b/src/panfrost/util/pan_lower_writeout.c
index d4099fb5288..bbd4a1f01d9 100644
--- a/src/panfrost/util/pan_lower_writeout.c
+++ b/src/panfrost/util/pan_lower_writeout.c
@@ -22,8 +22,8 @@
  * SOFTWARE.
  */
 
-#include "pan_ir.h"
 #include "compiler/nir/nir_builder.h"
+#include "pan_ir.h"
 
 /* Midgard can write all of color, depth and stencil in a single writeout
  * operation, so we merge depth/stencil stores with color stores.
@@ -42,150 +42,153 @@
 static nir_alu_type
 pan_nir_rt_store_type(nir_intrinsic_instr *store)
 {
-        return store ? nir_intrinsic_src_type(store) : nir_type_float32;
+   return store ? nir_intrinsic_src_type(store) : nir_type_float32;
 }
 
 static void
-pan_nir_emit_combined_store(nir_builder *b,
-                            nir_intrinsic_instr *rt0_store,
-                            unsigned writeout,
-                            nir_intrinsic_instr **stores)
+pan_nir_emit_combined_store(nir_builder *b, nir_intrinsic_instr *rt0_store,
+                            unsigned writeout, nir_intrinsic_instr **stores)
 {
-        nir_intrinsic_instr *intr = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_combined_output_pan);
+   nir_intrinsic_instr *intr = nir_intrinsic_instr_create(
+      b->shader, nir_intrinsic_store_combined_output_pan);
 
-        intr->num_components = rt0_store ? rt0_store->src[0].ssa->num_components : 4;
+   intr->num_components = rt0_store ? rt0_store->src[0].ssa->num_components : 4;
 
-        if (rt0_store)
-                nir_intrinsic_set_io_semantics(intr, nir_intrinsic_io_semantics(rt0_store));
-        nir_intrinsic_set_src_type(intr, pan_nir_rt_store_type(rt0_store));
-        nir_intrinsic_set_dest_type(intr, pan_nir_rt_store_type(stores[2]));
-        nir_intrinsic_set_component(intr, writeout);
+   if (rt0_store)
+      nir_intrinsic_set_io_semantics(intr,
+                                     nir_intrinsic_io_semantics(rt0_store));
+   nir_intrinsic_set_src_type(intr, pan_nir_rt_store_type(rt0_store));
+   nir_intrinsic_set_dest_type(intr, pan_nir_rt_store_type(stores[2]));
+   nir_intrinsic_set_component(intr, writeout);
 
-        nir_ssa_def *zero = nir_imm_int(b, 0);
-        nir_ssa_def *zero4 = nir_imm_ivec4(b, 0, 0, 0, 0);
+   nir_ssa_def *zero = nir_imm_int(b, 0);
+   nir_ssa_def *zero4 = nir_imm_ivec4(b, 0, 0, 0, 0);
 
-        nir_ssa_def *src[] = {
-                rt0_store ? rt0_store->src[0].ssa : zero4,
-                rt0_store ? rt0_store->src[1].ssa : zero,
-                stores[0] ? stores[0]->src[0].ssa : zero,
-                stores[1] ? stores[1]->src[0].ssa : zero,
-                stores[2] ? stores[2]->src[0].ssa : zero4,
-        };
+   nir_ssa_def *src[] = {
+      rt0_store ? rt0_store->src[0].ssa : zero4,
+      rt0_store ? rt0_store->src[1].ssa : zero,
+      stores[0] ? stores[0]->src[0].ssa : zero,
+      stores[1] ? stores[1]->src[0].ssa : zero,
+      stores[2] ? stores[2]->src[0].ssa : zero4,
+   };
 
-        for (int i = 0; i < ARRAY_SIZE(src); ++i)
-                intr->src[i] = nir_src_for_ssa(src[i]);
+   for (int i = 0; i < ARRAY_SIZE(src); ++i)
+      intr->src[i] = nir_src_for_ssa(src[i]);
 
-        nir_builder_instr_insert(b, &intr->instr);
+   nir_builder_instr_insert(b, &intr->instr);
 }
 bool
 pan_nir_lower_zs_store(nir_shader *nir)
 {
-        bool progress = false;
+   bool progress = false;
 
-        if (nir->info.stage != MESA_SHADER_FRAGMENT)
-                return false;
+   if (nir->info.stage != MESA_SHADER_FRAGMENT)
+      return false;
 
-        nir_foreach_function(function, nir) {
-                if (!function->impl) continue;
+   nir_foreach_function(function, nir) {
+      if (!function->impl)
+         continue;
 
-                nir_intrinsic_instr *stores[3] = { NULL };
-                unsigned writeout = 0;
+      nir_intrinsic_instr *stores[3] = {NULL};
+      unsigned writeout = 0;
 
-                nir_foreach_block(block, function->impl) {
-                        nir_foreach_instr_safe(instr, block) {
-                                if (instr->type != nir_instr_type_intrinsic)
-                                        continue;
+      nir_foreach_block(block, function->impl) {
+         nir_foreach_instr_safe(instr, block) {
+            if (instr->type != nir_instr_type_intrinsic)
+               continue;
 
-                                nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-                                if (intr->intrinsic != nir_intrinsic_store_output)
-                                        continue;
+            nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+            if (intr->intrinsic != nir_intrinsic_store_output)
+               continue;
 
-                                nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
-                                if (sem.location == FRAG_RESULT_DEPTH) {
-                                        stores[0] = intr;
-                                        writeout |= PAN_WRITEOUT_Z;
-                                } else if (sem.location == FRAG_RESULT_STENCIL) {
-                                        stores[1] = intr;
-                                        writeout |= PAN_WRITEOUT_S;
-                                } else if (sem.dual_source_blend_index) {
-                                        stores[2] = intr;
-                                        writeout |= PAN_WRITEOUT_2;
-                                }
-                        }
-                }
+            nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
+            if (sem.location == FRAG_RESULT_DEPTH) {
+               stores[0] = intr;
+               writeout |= PAN_WRITEOUT_Z;
+            } else if (sem.location == FRAG_RESULT_STENCIL) {
+               stores[1] = intr;
+               writeout |= PAN_WRITEOUT_S;
+            } else if (sem.dual_source_blend_index) {
+               stores[2] = intr;
+               writeout |= PAN_WRITEOUT_2;
+            }
+         }
+      }
 
-                if (!writeout) continue;
+      if (!writeout)
+         continue;
 
-                nir_block *common_block = NULL;
+      nir_block *common_block = NULL;
 
-                /* Ensure all stores are in the same block */
-                for (unsigned i = 0; i < ARRAY_SIZE(stores); ++i) {
-                        if (!stores[i])
-                                continue;
+      /* Ensure all stores are in the same block */
+      for (unsigned i = 0; i < ARRAY_SIZE(stores); ++i) {
+         if (!stores[i])
+            continue;
 
-                        nir_block *block = stores[i]->instr.block; 
+         nir_block *block = stores[i]->instr.block;
 
-                        if (common_block)
-                                assert(common_block == block);
-                        else
-                                common_block = block;
-                }
+         if (common_block)
+            assert(common_block == block);
+         else
+            common_block = block;
+      }
 
-                bool replaced = false;
+      bool replaced = false;
 
-                nir_foreach_block(block, function->impl) {
-                        nir_foreach_instr_safe(instr, block) {
-                                if (instr->type != nir_instr_type_intrinsic)
-                                        continue;
+      nir_foreach_block(block, function->impl) {
+         nir_foreach_instr_safe(instr, block) {
+            if (instr->type != nir_instr_type_intrinsic)
+               continue;
 
-                                nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-                                if (intr->intrinsic != nir_intrinsic_store_output)
-                                        continue;
+            nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+            if (intr->intrinsic != nir_intrinsic_store_output)
+               continue;
 
-                                nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
+            nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
 
-                                if (sem.location < FRAG_RESULT_DATA0)
-                                        continue;
+            if (sem.location < FRAG_RESULT_DATA0)
+               continue;
 
-                                if (sem.dual_source_blend_index)
-                                        continue;
+            if (sem.dual_source_blend_index)
+               continue;
 
-                                assert(nir_src_is_const(intr->src[1]) && "no indirect outputs");
+            assert(nir_src_is_const(intr->src[1]) && "no indirect outputs");
 
-                                nir_builder b;
-                                nir_builder_init(&b, function->impl);
-                                b.cursor = nir_after_block_before_jump(instr->block);
+            nir_builder b;
+            nir_builder_init(&b, function->impl);
+            b.cursor = nir_after_block_before_jump(instr->block);
 
-                                /* Trying to write depth twice results in the
-                                 * wrong blend shader being executed on
-                                 * Midgard */
-                                unsigned this_store = PAN_WRITEOUT_C | (replaced ? 0 : writeout);
+            /* Trying to write depth twice results in the
+             * wrong blend shader being executed on
+             * Midgard */
+            unsigned this_store = PAN_WRITEOUT_C | (replaced ? 0 : writeout);
 
-                                pan_nir_emit_combined_store(&b, intr, this_store, stores);
+            pan_nir_emit_combined_store(&b, intr, this_store, stores);
 
-                                nir_instr_remove(instr);
+            nir_instr_remove(instr);
 
-                                replaced = true;
-                        }
-                }
+            replaced = true;
+         }
+      }
 
-                /* Insert a store to the depth RT (0xff) if needed */
-                if (!replaced) {
-                        nir_builder b;
-                        nir_builder_init(&b, function->impl);
-                        b.cursor = nir_after_block_before_jump(common_block);
+      /* Insert a store to the depth RT (0xff) if needed */
+      if (!replaced) {
+         nir_builder b;
+         nir_builder_init(&b, function->impl);
+         b.cursor = nir_after_block_before_jump(common_block);
 
-                        pan_nir_emit_combined_store(&b, NULL, writeout, stores);
-                }
+         pan_nir_emit_combined_store(&b, NULL, writeout, stores);
+      }
 
-                for (unsigned i = 0; i < ARRAY_SIZE(stores); ++i) {
-                        if (stores[i])
-                                nir_instr_remove(&stores[i]->instr);
-                }
+      for (unsigned i = 0; i < ARRAY_SIZE(stores); ++i) {
+         if (stores[i])
+            nir_instr_remove(&stores[i]->instr);
+      }
 
-                nir_metadata_preserve(function->impl, nir_metadata_block_index | nir_metadata_dominance);
-                progress = true;
-        }
+      nir_metadata_preserve(function->impl,
+                            nir_metadata_block_index | nir_metadata_dominance);
+      progress = true;
+   }
 
-        return progress;
+   return progress;
 }
diff --git a/src/panfrost/util/pan_lower_xfb.c b/src/panfrost/util/pan_lower_xfb.c
index e9620b2e760..3876e60d1d3 100644
--- a/src/panfrost/util/pan_lower_xfb.c
+++ b/src/panfrost/util/pan_lower_xfb.c
@@ -21,81 +21,77 @@
  * SOFTWARE.
  */
 
-
-#include "pan_ir.h"
 #include "compiler/nir/nir_builder.h"
+#include "pan_ir.h"
 
 static void
 lower_xfb_output(nir_builder *b, nir_intrinsic_instr *intr,
                  unsigned start_component, unsigned num_components,
                  unsigned buffer, unsigned offset_words)
 {
-        assert(buffer < MAX_XFB_BUFFERS);
-        assert(nir_intrinsic_component(intr) == 0); // TODO
+   assert(buffer < MAX_XFB_BUFFERS);
+   assert(nir_intrinsic_component(intr) == 0); // TODO
 
-        /* Transform feedback info in units of words, convert to bytes. */
-        uint16_t stride = b->shader->info.xfb_stride[buffer] * 4;
-        assert(stride != 0);
+   /* Transform feedback info in units of words, convert to bytes. */
+   uint16_t stride = b->shader->info.xfb_stride[buffer] * 4;
+   assert(stride != 0);
 
-        uint16_t offset = offset_words * 4;
+   uint16_t offset = offset_words * 4;
 
-        nir_ssa_def *index = nir_iadd(b,
-                nir_imul(b, nir_load_instance_id(b),
-                            nir_load_num_vertices(b)),
-                nir_load_vertex_id_zero_base(b));
+   nir_ssa_def *index = nir_iadd(
+      b, nir_imul(b, nir_load_instance_id(b), nir_load_num_vertices(b)),
+      nir_load_vertex_id_zero_base(b));
 
-        BITSET_SET(b->shader->info.system_values_read, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
-        BITSET_SET(b->shader->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID);
+   BITSET_SET(b->shader->info.system_values_read,
+              SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
+   BITSET_SET(b->shader->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID);
 
-        nir_ssa_def *buf = nir_load_xfb_address(b, 64, .base = buffer);
-        nir_ssa_def *addr =
-                nir_iadd(b, buf, nir_u2u64(b,
-                                    nir_iadd_imm(b,
-                                                 nir_imul_imm(b, index, stride),
-                                                 offset)));
+   nir_ssa_def *buf = nir_load_xfb_address(b, 64, .base = buffer);
+   nir_ssa_def *addr = nir_iadd(
+      b, buf,
+      nir_u2u64(b, nir_iadd_imm(b, nir_imul_imm(b, index, stride), offset)));
 
-        assert(intr->src[0].is_ssa && "must lower XFB before lowering SSA");
-        nir_ssa_def *src = intr->src[0].ssa;
-        nir_ssa_def *value = nir_channels(b, src, BITFIELD_MASK(num_components) << start_component);
-        nir_store_global(b, addr, 4, value, BITFIELD_MASK(num_components));
+   assert(intr->src[0].is_ssa && "must lower XFB before lowering SSA");
+   nir_ssa_def *src = intr->src[0].ssa;
+   nir_ssa_def *value =
+      nir_channels(b, src, BITFIELD_MASK(num_components) << start_component);
+   nir_store_global(b, addr, 4, value, BITFIELD_MASK(num_components));
 }
 
 static bool
 lower_xfb(nir_builder *b, nir_instr *instr, UNUSED void *data)
 {
-        if (instr->type != nir_instr_type_intrinsic)
-                return false;
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
 
-        nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-        if (intr->intrinsic != nir_intrinsic_store_output)
-                return false;
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   if (intr->intrinsic != nir_intrinsic_store_output)
+      return false;
 
-        bool progress = false;
+   bool progress = false;
 
-        b->cursor = nir_before_instr(&intr->instr);
+   b->cursor = nir_before_instr(&intr->instr);
 
-        for (unsigned i = 0; i < 2; ++i) {
-                nir_io_xfb xfb = i ? nir_intrinsic_io_xfb2(intr) : nir_intrinsic_io_xfb(intr);
-                for (unsigned j = 0; j < 2; ++j) {
-                        if (!xfb.out[j].num_components) continue;
+   for (unsigned i = 0; i < 2; ++i) {
+      nir_io_xfb xfb =
+         i ? nir_intrinsic_io_xfb2(intr) : nir_intrinsic_io_xfb(intr);
+      for (unsigned j = 0; j < 2; ++j) {
+         if (!xfb.out[j].num_components)
+            continue;
 
-                        lower_xfb_output(b, intr, i*2 + j,
-                                                     xfb.out[j].num_components,
-                                                     xfb.out[j].buffer,
-                                                     xfb.out[j].offset);
-                        progress = true;
-                }
-        }
+         lower_xfb_output(b, intr, i * 2 + j, xfb.out[j].num_components,
+                          xfb.out[j].buffer, xfb.out[j].offset);
+         progress = true;
+      }
+   }
 
-        nir_instr_remove(instr);
-        return progress;
+   nir_instr_remove(instr);
+   return progress;
 }
 
 bool
 pan_lower_xfb(nir_shader *nir)
 {
-        return nir_shader_instructions_pass(nir, lower_xfb,
-                                            nir_metadata_block_index |
-                                            nir_metadata_dominance, NULL);
+   return nir_shader_instructions_pass(
+      nir, lower_xfb, nir_metadata_block_index | nir_metadata_dominance, NULL);
 }
-
diff --git a/src/panfrost/util/pan_sysval.c b/src/panfrost/util/pan_sysval.c
index 78caffb8a55..001e30f9e35 100644
--- a/src/panfrost/util/pan_sysval.c
+++ b/src/panfrost/util/pan_sysval.c
@@ -24,171 +24,167 @@
  *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
  */
 
-#include "pan_ir.h"
 #include "compiler/nir/nir_builder.h"
+#include "pan_ir.h"
 
 /* TODO: ssbo_size */
 static int
 panfrost_sysval_for_ssbo(nir_intrinsic_instr *instr)
 {
-        nir_src index = instr->src[0];
-        assert(nir_src_is_const(index));
-        uint32_t uindex = nir_src_as_uint(index);
+   nir_src index = instr->src[0];
+   assert(nir_src_is_const(index));
+   uint32_t uindex = nir_src_as_uint(index);
 
-        return PAN_SYSVAL(SSBO, uindex);
+   return PAN_SYSVAL(SSBO, uindex);
 }
 
 static int
 panfrost_sysval_for_sampler(nir_intrinsic_instr *instr)
 {
-        /* TODO: indirect samplers !!! */
-        nir_src index = instr->src[0];
-        assert(nir_src_is_const(index));
-        uint32_t uindex = nir_src_as_uint(index);
+   /* TODO: indirect samplers !!! */
+   nir_src index = instr->src[0];
+   assert(nir_src_is_const(index));
+   uint32_t uindex = nir_src_as_uint(index);
 
-        return PAN_SYSVAL(SAMPLER, uindex);
+   return PAN_SYSVAL(SAMPLER, uindex);
 }
 
 static int
 panfrost_sysval_for_image_size(nir_intrinsic_instr *instr)
 {
-        nir_src index = instr->src[0];
-        assert(nir_src_is_const(index));
+   nir_src index = instr->src[0];
+   assert(nir_src_is_const(index));
 
-        bool is_array = nir_intrinsic_image_array(instr);
-        uint32_t uindex = nir_src_as_uint(index);
-        unsigned dim = nir_intrinsic_dest_components(instr) - is_array;
+   bool is_array = nir_intrinsic_image_array(instr);
+   uint32_t uindex = nir_src_as_uint(index);
+   unsigned dim = nir_intrinsic_dest_components(instr) - is_array;
 
-        return PAN_SYSVAL(IMAGE_SIZE, PAN_TXS_SYSVAL_ID(uindex, dim, is_array));
+   return PAN_SYSVAL(IMAGE_SIZE, PAN_TXS_SYSVAL_ID(uindex, dim, is_array));
 }
 
 static unsigned
 panfrost_nir_sysval_for_intrinsic(nir_intrinsic_instr *instr)
 {
-        switch (instr->intrinsic) {
-        case nir_intrinsic_load_viewport_scale:
-                return PAN_SYSVAL_VIEWPORT_SCALE;
-        case nir_intrinsic_load_viewport_offset:
-                return PAN_SYSVAL_VIEWPORT_OFFSET;
-        case nir_intrinsic_load_num_workgroups:
-                return PAN_SYSVAL_NUM_WORK_GROUPS;
-        case nir_intrinsic_load_workgroup_size:
-                return PAN_SYSVAL_LOCAL_GROUP_SIZE;
-        case nir_intrinsic_load_work_dim:
-                return PAN_SYSVAL_WORK_DIM;
-        case nir_intrinsic_load_sample_positions_pan:
-                return PAN_SYSVAL_SAMPLE_POSITIONS;
-        case nir_intrinsic_load_first_vertex:
-        case nir_intrinsic_load_base_vertex:
-        case nir_intrinsic_load_base_instance:
-                return PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS;
-        case nir_intrinsic_load_draw_id:
-                return PAN_SYSVAL_DRAWID;
-        case nir_intrinsic_load_ssbo_address: 
-        case nir_intrinsic_get_ssbo_size: 
-                return panfrost_sysval_for_ssbo(instr);
-        case nir_intrinsic_load_xfb_address:
-                return PAN_SYSVAL(XFB, nir_intrinsic_base(instr));
-        case nir_intrinsic_load_num_vertices:
-                return PAN_SYSVAL_NUM_VERTICES;
-        case nir_intrinsic_load_sampler_lod_parameters_pan:
-                return panfrost_sysval_for_sampler(instr);
-        case nir_intrinsic_image_size:
-                return panfrost_sysval_for_image_size(instr);
-        case nir_intrinsic_load_blend_const_color_rgba:
-                return PAN_SYSVAL_BLEND_CONSTANTS;
-        default:
-                return ~0;
-        }
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_viewport_scale:
+      return PAN_SYSVAL_VIEWPORT_SCALE;
+   case nir_intrinsic_load_viewport_offset:
+      return PAN_SYSVAL_VIEWPORT_OFFSET;
+   case nir_intrinsic_load_num_workgroups:
+      return PAN_SYSVAL_NUM_WORK_GROUPS;
+   case nir_intrinsic_load_workgroup_size:
+      return PAN_SYSVAL_LOCAL_GROUP_SIZE;
+   case nir_intrinsic_load_work_dim:
+      return PAN_SYSVAL_WORK_DIM;
+   case nir_intrinsic_load_sample_positions_pan:
+      return PAN_SYSVAL_SAMPLE_POSITIONS;
+   case nir_intrinsic_load_first_vertex:
+   case nir_intrinsic_load_base_vertex:
+   case nir_intrinsic_load_base_instance:
+      return PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS;
+   case nir_intrinsic_load_draw_id:
+      return PAN_SYSVAL_DRAWID;
+   case nir_intrinsic_load_ssbo_address:
+   case nir_intrinsic_get_ssbo_size:
+      return panfrost_sysval_for_ssbo(instr);
+   case nir_intrinsic_load_xfb_address:
+      return PAN_SYSVAL(XFB, nir_intrinsic_base(instr));
+   case nir_intrinsic_load_num_vertices:
+      return PAN_SYSVAL_NUM_VERTICES;
+   case nir_intrinsic_load_sampler_lod_parameters_pan:
+      return panfrost_sysval_for_sampler(instr);
+   case nir_intrinsic_image_size:
+      return panfrost_sysval_for_image_size(instr);
+   case nir_intrinsic_load_blend_const_color_rgba:
+      return PAN_SYSVAL_BLEND_CONSTANTS;
+   default:
+      return ~0;
+   }
 }
 
 int
 panfrost_sysval_for_instr(nir_instr *instr, nir_dest *dest)
 {
-        nir_intrinsic_instr *intr;
-        nir_dest *dst = NULL;
-        nir_tex_instr *tex;
-        unsigned sysval = ~0;
+   nir_intrinsic_instr *intr;
+   nir_dest *dst = NULL;
+   nir_tex_instr *tex;
+   unsigned sysval = ~0;
 
-        switch (instr->type) {
-        case nir_instr_type_intrinsic:
-                intr = nir_instr_as_intrinsic(instr);
-                sysval = panfrost_nir_sysval_for_intrinsic(intr);
-                dst = &intr->dest;
-                break;
-        case nir_instr_type_tex:
-                tex = nir_instr_as_tex(instr);
-                if (tex->op != nir_texop_txs)
-                        break;
+   switch (instr->type) {
+   case nir_instr_type_intrinsic:
+      intr = nir_instr_as_intrinsic(instr);
+      sysval = panfrost_nir_sysval_for_intrinsic(intr);
+      dst = &intr->dest;
+      break;
+   case nir_instr_type_tex:
+      tex = nir_instr_as_tex(instr);
+      if (tex->op != nir_texop_txs)
+         break;
 
-                sysval = PAN_SYSVAL(TEXTURE_SIZE,
-                                    PAN_TXS_SYSVAL_ID(tex->texture_index,
-                                                      nir_tex_instr_dest_size(tex) -
-                                                      (tex->is_array ? 1 : 0),
-                                                      tex->is_array));
-                dst  = &tex->dest;
-                break;
-        default:
-                break;
-        }
+      sysval = PAN_SYSVAL(TEXTURE_SIZE,
+                          PAN_TXS_SYSVAL_ID(tex->texture_index,
+                                            nir_tex_instr_dest_size(tex) -
+                                               (tex->is_array ? 1 : 0),
+                                            tex->is_array));
+      dst = &tex->dest;
+      break;
+   default:
+      break;
+   }
 
-        if (dest && dst)
-                *dest = *dst;
+   if (dest && dst)
+      *dest = *dst;
 
-        return sysval;
+   return sysval;
 }
 
 static unsigned
 pan_add_sysval(struct hash_table_u64 *sysval_to_id,
-               struct panfrost_sysvals *sysvals,
-               int sysval, unsigned id)
+               struct panfrost_sysvals *sysvals, int sysval, unsigned id)
 {
-        assert(id < MAX_SYSVAL_COUNT);
-        _mesa_hash_table_u64_insert(sysval_to_id, sysval, (void *) ((uintptr_t) id + 1));
-        sysvals->sysvals[id] = sysval;
-        return id;
+   assert(id < MAX_SYSVAL_COUNT);
+   _mesa_hash_table_u64_insert(sysval_to_id, sysval,
+                               (void *)((uintptr_t)id + 1));
+   sysvals->sysvals[id] = sysval;
+   return id;
 }
 
 unsigned
 pan_lookup_sysval(struct hash_table_u64 *sysval_to_id,
-                  struct panfrost_sysvals *sysvals,
-                  int sysval)
+                  struct panfrost_sysvals *sysvals, int sysval)
 {
-        /* Try to lookup */
+   /* Try to lookup */
 
-        void *cached = _mesa_hash_table_u64_search(sysval_to_id, sysval);
+   void *cached = _mesa_hash_table_u64_search(sysval_to_id, sysval);
 
-        if (cached) {
-                unsigned id = ((uintptr_t) cached) - 1;
-                assert(id < MAX_SYSVAL_COUNT);
-                assert(sysvals->sysvals[id] == sysval);
-                return id;
-        }
+   if (cached) {
+      unsigned id = ((uintptr_t)cached) - 1;
+      assert(id < MAX_SYSVAL_COUNT);
+      assert(sysvals->sysvals[id] == sysval);
+      return id;
+   }
 
-        /* Else assign */
-        return pan_add_sysval(sysval_to_id, sysvals, sysval,
-                              sysvals->sysval_count++);
+   /* Else assign */
+   return pan_add_sysval(sysval_to_id, sysvals, sysval,
+                         sysvals->sysval_count++);
 }
 
 struct hash_table_u64 *
 panfrost_init_sysvals(struct panfrost_sysvals *sysvals,
-                      struct panfrost_sysvals *fixed_sysvals,
-                      void *memctx)
+                      struct panfrost_sysvals *fixed_sysvals, void *memctx)
 {
-        memset(sysvals, 0, sizeof(*sysvals));
-        struct hash_table_u64 *sysval_to_id =
-                _mesa_hash_table_u64_create(memctx);
+   memset(sysvals, 0, sizeof(*sysvals));
+   struct hash_table_u64 *sysval_to_id = _mesa_hash_table_u64_create(memctx);
 
-        if (fixed_sysvals) {
-                for (unsigned i = 0; i < fixed_sysvals->sysval_count; i++) {
-                        if (!fixed_sysvals->sysvals[i])
-                                continue;
+   if (fixed_sysvals) {
+      for (unsigned i = 0; i < fixed_sysvals->sysval_count; i++) {
+         if (!fixed_sysvals->sysvals[i])
+            continue;
 
-                        pan_add_sysval(sysval_to_id, sysvals,
-                                       fixed_sysvals->sysvals[i], i);
-                }
-                sysvals->sysval_count = fixed_sysvals->sysval_count;
-        }
+         pan_add_sysval(sysval_to_id, sysvals, fixed_sysvals->sysvals[i], i);
+      }
+      sysvals->sysval_count = fixed_sysvals->sysval_count;
+   }
 
-        return sysval_to_id;
+   return sysval_to_id;
 }