amd/common: switch to 3-spaces style

Follow-up of !4319 using the same clang-format config. Acked-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Acked-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5310>
2026-06-16 04:38:30 +02:00 · 2020-09-07 09:58:36 +02:00 · 2020-09-07 09:58:36 +02:00 · e5fb9dca2a
commit e5fb9dca2a
parent 82d2d73e03
22 changed files with 7198 additions and 7379 deletions
--- a/src/amd/common/.editorconfig
+++ b/src/amd/common/.editorconfig
@ -1,3 +0,0 @@
-[*.{c,h}]
-indent_style = tab
-indent_size = tab
--- a/src/amd/common/ac_binary.c
+++ b/src/amd/common/ac_binary.c
@ -21,132 +21,129 @@
 * SOFTWARE.
 */

-#include "ac_gpu_info.h"
 #include "ac_binary.h"

+#include "ac_gpu_info.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"

 #include <gelf.h>
 #include <libelf.h>
+#include <sid.h>
 #include <stdio.h>

-#include <sid.h>
-
-#define SPILLED_SGPRS                                     0x4
-#define SPILLED_VGPRS                                     0x8
+#define SPILLED_SGPRS 0x4
+#define SPILLED_VGPRS 0x8

 /* Parse configuration data in .AMDGPU.config section format. */
-void ac_parse_shader_binary_config(const char *data, size_t nbytes,
-				   unsigned wave_size,
-				   bool really_needs_scratch,
-				   const struct radeon_info *info,
-				   struct ac_shader_config *conf)
+void ac_parse_shader_binary_config(const char *data, size_t nbytes, unsigned wave_size,
+                                   bool really_needs_scratch, const struct radeon_info *info,
+                                   struct ac_shader_config *conf)
 {
-	uint32_t scratch_size = 0;
+   uint32_t scratch_size = 0;

-	for (size_t i = 0; i < nbytes; i += 8) {
-		unsigned reg = util_le32_to_cpu(*(uint32_t*)(data + i));
-		unsigned value = util_le32_to_cpu(*(uint32_t*)(data + i + 4));
-		switch (reg) {
-		case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
-		case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
-		case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
-		case R_00B848_COMPUTE_PGM_RSRC1:
-		case R_00B428_SPI_SHADER_PGM_RSRC1_HS:
-			if (wave_size == 32)
-				conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 8);
-			else
-				conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
+   for (size_t i = 0; i < nbytes; i += 8) {
+      unsigned reg = util_le32_to_cpu(*(uint32_t *)(data + i));
+      unsigned value = util_le32_to_cpu(*(uint32_t *)(data + i + 4));
+      switch (reg) {
+      case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
+      case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
+      case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
+      case R_00B848_COMPUTE_PGM_RSRC1:
+      case R_00B428_SPI_SHADER_PGM_RSRC1_HS:
+         if (wave_size == 32)
+            conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 8);
+         else
+            conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);

-			conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
-			/* TODO: LLVM doesn't set FLOAT_MODE for non-compute shaders */
-			conf->float_mode =  G_00B028_FLOAT_MODE(value);
-			conf->rsrc1 = value;
-			break;
-		case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
-			conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
-			/* TODO: LLVM doesn't set SHARED_VGPR_CNT for all shader types */
-			conf->num_shared_vgprs = G_00B02C_SHARED_VGPR_CNT(value);
-			conf->rsrc2 = value;
-			break;
-		case R_00B12C_SPI_SHADER_PGM_RSRC2_VS:
-			conf->num_shared_vgprs = G_00B12C_SHARED_VGPR_CNT(value);
-			conf->rsrc2 = value;
-			break;
-		case R_00B22C_SPI_SHADER_PGM_RSRC2_GS:
-			conf->num_shared_vgprs = G_00B22C_SHARED_VGPR_CNT(value);
-			conf->rsrc2 = value;
-			break;
-		case R_00B42C_SPI_SHADER_PGM_RSRC2_HS:
-			conf->num_shared_vgprs = G_00B42C_SHARED_VGPR_CNT(value);
-			conf->rsrc2 = value;
-			break;
-		case R_00B84C_COMPUTE_PGM_RSRC2:
-			conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value));
-			conf->rsrc2 = value;
-			break;
-		case R_00B8A0_COMPUTE_PGM_RSRC3:
-			conf->num_shared_vgprs = G_00B8A0_SHARED_VGPR_CNT(value);
-			conf->rsrc3 = value;
-			break;
-		case R_0286CC_SPI_PS_INPUT_ENA:
-			conf->spi_ps_input_ena = value;
-			break;
-		case R_0286D0_SPI_PS_INPUT_ADDR:
-			conf->spi_ps_input_addr = value;
-			break;
-		case R_0286E8_SPI_TMPRING_SIZE:
-		case R_00B860_COMPUTE_TMPRING_SIZE:
-			/* WAVESIZE is in units of 256 dwords. */
-			scratch_size = value;
-			break;
-		case SPILLED_SGPRS:
-			conf->spilled_sgprs = value;
-			break;
-		case SPILLED_VGPRS:
-			conf->spilled_vgprs = value;
-			break;
-		default:
-			{
-				static bool printed;
+         conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
+         /* TODO: LLVM doesn't set FLOAT_MODE for non-compute shaders */
+         conf->float_mode = G_00B028_FLOAT_MODE(value);
+         conf->rsrc1 = value;
+         break;
+      case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
+         conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
+         /* TODO: LLVM doesn't set SHARED_VGPR_CNT for all shader types */
+         conf->num_shared_vgprs = G_00B02C_SHARED_VGPR_CNT(value);
+         conf->rsrc2 = value;
+         break;
+      case R_00B12C_SPI_SHADER_PGM_RSRC2_VS:
+         conf->num_shared_vgprs = G_00B12C_SHARED_VGPR_CNT(value);
+         conf->rsrc2 = value;
+         break;
+      case R_00B22C_SPI_SHADER_PGM_RSRC2_GS:
+         conf->num_shared_vgprs = G_00B22C_SHARED_VGPR_CNT(value);
+         conf->rsrc2 = value;
+         break;
+      case R_00B42C_SPI_SHADER_PGM_RSRC2_HS:
+         conf->num_shared_vgprs = G_00B42C_SHARED_VGPR_CNT(value);
+         conf->rsrc2 = value;
+         break;
+      case R_00B84C_COMPUTE_PGM_RSRC2:
+         conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value));
+         conf->rsrc2 = value;
+         break;
+      case R_00B8A0_COMPUTE_PGM_RSRC3:
+         conf->num_shared_vgprs = G_00B8A0_SHARED_VGPR_CNT(value);
+         conf->rsrc3 = value;
+         break;
+      case R_0286CC_SPI_PS_INPUT_ENA:
+         conf->spi_ps_input_ena = value;
+         break;
+      case R_0286D0_SPI_PS_INPUT_ADDR:
+         conf->spi_ps_input_addr = value;
+         break;
+      case R_0286E8_SPI_TMPRING_SIZE:
+      case R_00B860_COMPUTE_TMPRING_SIZE:
+         /* WAVESIZE is in units of 256 dwords. */
+         scratch_size = value;
+         break;
+      case SPILLED_SGPRS:
+         conf->spilled_sgprs = value;
+         break;
+      case SPILLED_VGPRS:
+         conf->spilled_vgprs = value;
+         break;
+      default: {
+         static bool printed;

-				if (!printed) {
-					fprintf(stderr, "Warning: LLVM emitted unknown "
-						"config register: 0x%x\n", reg);
-					printed = true;
-				}
-			}
-			break;
-		}
-	}
+         if (!printed) {
+            fprintf(stderr,
+                    "Warning: LLVM emitted unknown "
+                    "config register: 0x%x\n",
+                    reg);
+            printed = true;
+         }
+      } break;
+      }
+   }

-	if (!conf->spi_ps_input_addr)
-		conf->spi_ps_input_addr = conf->spi_ps_input_ena;
+   if (!conf->spi_ps_input_addr)
+      conf->spi_ps_input_addr = conf->spi_ps_input_ena;

-	if (really_needs_scratch) {
-		/* sgprs spills aren't spilling */
-	        conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(scratch_size) * 256 * 4;
-	}
+   if (really_needs_scratch) {
+      /* sgprs spills aren't spilling */
+      conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(scratch_size) * 256 * 4;
+   }

-	/* GFX 10.3 internally:
-	 * - aligns VGPRS to 16 for Wave32 and 8 for Wave64
-	 * - aligns LDS to 1024
-	 *
-	 * For shader-db stats, set num_vgprs that the hw actually uses.
-	 */
-	if (info->chip_class >= GFX10_3) {
-		conf->num_vgprs = align(conf->num_vgprs, wave_size == 32 ? 16 : 8);
-	}
+   /* GFX 10.3 internally:
+    * - aligns VGPRS to 16 for Wave32 and 8 for Wave64
+    * - aligns LDS to 1024
+    *
+    * For shader-db stats, set num_vgprs that the hw actually uses.
+    */
+   if (info->chip_class >= GFX10_3) {
+      conf->num_vgprs = align(conf->num_vgprs, wave_size == 32 ? 16 : 8);
+   }

-	/* Enable 64-bit and 16-bit denormals, because there is no performance
-	 * cost.
-	 *
-	 * Don't enable denormals for 32-bit floats, because:
-	 * - denormals disable output modifiers
-	 * - denormals break v_mad_f32
-	 * - GFX6 & GFX7 would be very slow
-	 */
-	conf->float_mode &= ~V_00B028_FP_ALL_DENORMS;
-	conf->float_mode |= V_00B028_FP_64_DENORMS;
+   /* Enable 64-bit and 16-bit denormals, because there is no performance
+    * cost.
+    *
+    * Don't enable denormals for 32-bit floats, because:
+    * - denormals disable output modifiers
+    * - denormals break v_mad_f32
+    * - GFX6 & GFX7 would be very slow
+    */
+   conf->float_mode &= ~V_00B028_FP_ALL_DENORMS;
+   conf->float_mode |= V_00B028_FP_64_DENORMS;
 }
--- a/src/amd/common/ac_binary.h
+++ b/src/amd/common/ac_binary.h
@ -24,9 +24,9 @@
 #ifndef AC_BINARY_H
 #define AC_BINARY_H

+#include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
-#include <stdbool.h>

 #ifdef __cplusplus
 extern "C" {
@ -35,26 +35,24 @@ extern "C" {
 struct radeon_info;

 struct ac_shader_config {
-	unsigned num_sgprs;
-	unsigned num_vgprs;
-	unsigned num_shared_vgprs; /* GFX10: number of VGPRs shared between half-waves */
-	unsigned spilled_sgprs;
-	unsigned spilled_vgprs;
-	unsigned lds_size; /* in HW allocation units; i.e 256 bytes on SI, 512 bytes on CI+ */
-	unsigned spi_ps_input_ena;
-	unsigned spi_ps_input_addr;
-	unsigned float_mode;
-	unsigned scratch_bytes_per_wave;
-	unsigned rsrc1;
-	unsigned rsrc2;
-	unsigned rsrc3;
+   unsigned num_sgprs;
+   unsigned num_vgprs;
+   unsigned num_shared_vgprs; /* GFX10: number of VGPRs shared between half-waves */
+   unsigned spilled_sgprs;
+   unsigned spilled_vgprs;
+   unsigned lds_size; /* in HW allocation units; i.e 256 bytes on SI, 512 bytes on CI+ */
+   unsigned spi_ps_input_ena;
+   unsigned spi_ps_input_addr;
+   unsigned float_mode;
+   unsigned scratch_bytes_per_wave;
+   unsigned rsrc1;
+   unsigned rsrc2;
+   unsigned rsrc3;
 };

-void ac_parse_shader_binary_config(const char *data, size_t nbytes,
-				   unsigned wave_size,
-				   bool really_needs_scratch,
-				   const struct radeon_info *info,
-				   struct ac_shader_config *conf);
+void ac_parse_shader_binary_config(const char *data, size_t nbytes, unsigned wave_size,
+                                   bool really_needs_scratch, const struct radeon_info *info,
+                                   struct ac_shader_config *conf);

 #ifdef __cplusplus
 }
--- a/src/amd/common/ac_debug.c
+++ b/src/amd/common/ac_debug.c
--- a/src/amd/common/ac_debug.h
+++ b/src/amd/common/ac_debug.h
@ -24,15 +24,15 @@
 #ifndef AC_DEBUG_H
 #define AC_DEBUG_H

-#include <stdint.h>
-#include <stdio.h>
-#include <stdbool.h>
-
 #include "amd_family.h"

-#define AC_ENCODE_TRACE_POINT(id)       (0xcafe0000 | ((id) & 0xffff))
-#define AC_IS_TRACE_POINT(x)            (((x) & 0xcafe0000) == 0xcafe0000)
-#define AC_GET_TRACE_POINT_ID(x)        ((x) & 0xffff)
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#define AC_ENCODE_TRACE_POINT(id) (0xcafe0000 | ((id)&0xffff))
+#define AC_IS_TRACE_POINT(x)      (((x)&0xcafe0000) == 0xcafe0000)
+#define AC_GET_TRACE_POINT_ID(x)  ((x)&0xffff)

 #define AC_MAX_WAVES_PER_CHIP (64 * 40)

@ -41,36 +41,36 @@ extern "C" {
 #endif

 struct ac_wave_info {
-	unsigned se; /* shader engine */
-	unsigned sh; /* shader array */
-	unsigned cu; /* compute unit */
-	unsigned simd;
-	unsigned wave;
-	uint32_t status;
-	uint64_t pc; /* program counter */
-	uint32_t inst_dw0;
-	uint32_t inst_dw1;
-	uint64_t exec;
-	bool matched; /* whether the wave is used by a currently-bound shader */
+   unsigned se; /* shader engine */
+   unsigned sh; /* shader array */
+   unsigned cu; /* compute unit */
+   unsigned simd;
+   unsigned wave;
+   uint32_t status;
+   uint64_t pc; /* program counter */
+   uint32_t inst_dw0;
+   uint32_t inst_dw1;
+   uint64_t exec;
+   bool matched; /* whether the wave is used by a currently-bound shader */
 };

 typedef void *(*ac_debug_addr_callback)(void *data, uint64_t addr);

 const char *ac_get_register_name(enum chip_class chip_class, unsigned offset);
-void ac_dump_reg(FILE *file, enum chip_class chip_class, unsigned offset,
-		 uint32_t value, uint32_t field_mask);
+void ac_dump_reg(FILE *file, enum chip_class chip_class, unsigned offset, uint32_t value,
+                 uint32_t field_mask);
 void ac_parse_ib_chunk(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids,
-		       unsigned trace_id_count, enum chip_class chip_class,
-		       ac_debug_addr_callback addr_callback, void *addr_callback_data);
-void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids,
-		 unsigned trace_id_count, const char *name, enum chip_class chip_class,
-		 ac_debug_addr_callback addr_callback, void *addr_callback_data);
+                       unsigned trace_id_count, enum chip_class chip_class,
+                       ac_debug_addr_callback addr_callback, void *addr_callback_data);
+void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids, unsigned trace_id_count,
+                 const char *name, enum chip_class chip_class, ac_debug_addr_callback addr_callback,
+                 void *addr_callback_data);

-bool ac_vm_fault_occured(enum chip_class chip_class,
-			 uint64_t *old_dmesg_timestamp, uint64_t *out_addr);
+bool ac_vm_fault_occured(enum chip_class chip_class, uint64_t *old_dmesg_timestamp,
+                         uint64_t *out_addr);

 unsigned ac_get_wave_info(enum chip_class chip_class,
-			  struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP]);
+                          struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP]);

 #ifdef __cplusplus
 }
--- a/src/amd/common/ac_exp_param.h
+++ b/src/amd/common/ac_exp_param.h
@ -25,16 +25,17 @@
 #ifndef AC_EXP_PARAM_H
 #define AC_EXP_PARAM_H

-enum {
-	/* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */
-	AC_EXP_PARAM_OFFSET_0 = 0,
-	AC_EXP_PARAM_OFFSET_31 = 31,
-	/* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */
-	AC_EXP_PARAM_DEFAULT_VAL_0000 = 64,
-	AC_EXP_PARAM_DEFAULT_VAL_0001,
-	AC_EXP_PARAM_DEFAULT_VAL_1110,
-	AC_EXP_PARAM_DEFAULT_VAL_1111,
-	AC_EXP_PARAM_UNDEFINED = 255,
+enum
+{
+   /* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */
+   AC_EXP_PARAM_OFFSET_0 = 0,
+   AC_EXP_PARAM_OFFSET_31 = 31,
+   /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */
+   AC_EXP_PARAM_DEFAULT_VAL_0000 = 64,
+   AC_EXP_PARAM_DEFAULT_VAL_0001,
+   AC_EXP_PARAM_DEFAULT_VAL_1110,
+   AC_EXP_PARAM_DEFAULT_VAL_1111,
+   AC_EXP_PARAM_UNDEFINED = 255,
 };

 #endif
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@ -26,10 +26,11 @@
 #ifndef AC_GPU_INFO_H
 #define AC_GPU_INFO_H

+#include "amd_family.h"
+
+#include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
-#include <stdbool.h>
-#include "amd_family.h"

 #ifdef __cplusplus
 extern "C" {
@ -38,186 +39,179 @@ extern "C" {
 struct amdgpu_gpu_info;

 struct radeon_info {
-	/* PCI info: domain:bus:dev:func */
-	uint32_t                    pci_domain;
-	uint32_t                    pci_bus;
-	uint32_t                    pci_dev;
-	uint32_t                    pci_func;
+   /* PCI info: domain:bus:dev:func */
+   uint32_t pci_domain;
+   uint32_t pci_bus;
+   uint32_t pci_dev;
+   uint32_t pci_func;

-	/* Device info. */
-	const char                  *name;
-	const char                  *marketing_name;
-	bool                        is_pro_graphics;
-	uint32_t                    pci_id;
-	uint32_t                    pci_rev_id;
-	enum radeon_family          family;
-	enum chip_class             chip_class;
-	uint32_t                    family_id;
-	uint32_t                    chip_external_rev;
-	uint32_t                    clock_crystal_freq;
+   /* Device info. */
+   const char *name;
+   const char *marketing_name;
+   bool is_pro_graphics;
+   uint32_t pci_id;
+   uint32_t pci_rev_id;
+   enum radeon_family family;
+   enum chip_class chip_class;
+   uint32_t family_id;
+   uint32_t chip_external_rev;
+   uint32_t clock_crystal_freq;

-	/* Features. */
-	bool                        has_graphics; /* false if the chip is compute-only */
-	uint32_t                    num_rings[NUM_RING_TYPES];
-	uint32_t                    ib_pad_dw_mask[NUM_RING_TYPES];
-	bool                        has_clear_state;
-	bool                        has_distributed_tess;
-	bool                        has_dcc_constant_encode;
-	bool                        has_rbplus; /* if RB+ registers exist */
-	bool                        rbplus_allowed; /* if RB+ is allowed */
-	bool                        has_load_ctx_reg_pkt;
-	bool                        has_out_of_order_rast;
-	bool                        has_packed_math_16bit;
-	bool                        cpdma_prefetch_writes_memory;
-	bool                        has_gfx9_scissor_bug;
-	bool                        has_tc_compat_zrange_bug;
-	bool                        has_msaa_sample_loc_bug;
-	bool                        has_ls_vgpr_init_bug;
+   /* Features. */
+   bool has_graphics; /* false if the chip is compute-only */
+   uint32_t num_rings[NUM_RING_TYPES];
+   uint32_t ib_pad_dw_mask[NUM_RING_TYPES];
+   bool has_clear_state;
+   bool has_distributed_tess;
+   bool has_dcc_constant_encode;
+   bool has_rbplus;     /* if RB+ registers exist */
+   bool rbplus_allowed; /* if RB+ is allowed */
+   bool has_load_ctx_reg_pkt;
+   bool has_out_of_order_rast;
+   bool has_packed_math_16bit;
+   bool cpdma_prefetch_writes_memory;
+   bool has_gfx9_scissor_bug;
+   bool has_tc_compat_zrange_bug;
+   bool has_msaa_sample_loc_bug;
+   bool has_ls_vgpr_init_bug;

-	/* Display features. */
-	/* There are 2 display DCC codepaths, because display expects unaligned DCC. */
-	/* Disable RB and pipe alignment to skip the retile blit. (1 RB chips only) */
-	bool                        use_display_dcc_unaligned;
-	/* Allocate both aligned and unaligned DCC and use the retile blit. */
-	bool                        use_display_dcc_with_retile_blit;
+   /* Display features. */
+   /* There are 2 display DCC codepaths, because display expects unaligned DCC. */
+   /* Disable RB and pipe alignment to skip the retile blit. (1 RB chips only) */
+   bool use_display_dcc_unaligned;
+   /* Allocate both aligned and unaligned DCC and use the retile blit. */
+   bool use_display_dcc_with_retile_blit;

-	/* Memory info. */
-	uint32_t                    pte_fragment_size;
-	uint32_t                    gart_page_size;
-	uint64_t                    gart_size;
-	uint64_t                    vram_size;
-	uint64_t                    vram_vis_size;
-	uint32_t                    vram_bit_width;
-	uint32_t                    vram_type;
-	unsigned                    gds_size;
-	unsigned                    gds_gfx_partition_size;
-	uint64_t                    max_alloc_size;
-	uint32_t                    min_alloc_size;
-	uint32_t                    address32_hi;
-	bool                        has_dedicated_vram;
-	bool                        has_l2_uncached;
-	bool                        r600_has_virtual_memory;
-	uint32_t                    num_sdp_interfaces;
-	uint32_t                    num_tcc_blocks;
-	uint32_t                    tcc_cache_line_size;
-	bool			    tcc_harvested;
-	unsigned                    pc_lines;
-	uint32_t                    lds_size_per_workgroup;
-	uint32_t                    lds_granularity;
-	uint32_t                    max_memory_clock;
-	uint32_t                    ce_ram_size;
-	uint32_t                    l1_cache_size;
-	uint32_t                    l2_cache_size;
+   /* Memory info. */
+   uint32_t pte_fragment_size;
+   uint32_t gart_page_size;
+   uint64_t gart_size;
+   uint64_t vram_size;
+   uint64_t vram_vis_size;
+   uint32_t vram_bit_width;
+   uint32_t vram_type;
+   unsigned gds_size;
+   unsigned gds_gfx_partition_size;
+   uint64_t max_alloc_size;
+   uint32_t min_alloc_size;
+   uint32_t address32_hi;
+   bool has_dedicated_vram;
+   bool has_l2_uncached;
+   bool r600_has_virtual_memory;
+   uint32_t num_sdp_interfaces;
+   uint32_t num_tcc_blocks;
+   uint32_t tcc_cache_line_size;
+   bool tcc_harvested;
+   unsigned pc_lines;
+   uint32_t lds_size_per_workgroup;
+   uint32_t lds_granularity;
+   uint32_t max_memory_clock;
+   uint32_t ce_ram_size;
+   uint32_t l1_cache_size;
+   uint32_t l2_cache_size;

-	/* CP info. */
-	bool                        gfx_ib_pad_with_type2;
-	unsigned                    ib_alignment; /* both start and size alignment */
-	uint32_t                    me_fw_version;
-	uint32_t                    me_fw_feature;
-	uint32_t                    pfp_fw_version;
-	uint32_t                    pfp_fw_feature;
-	uint32_t                    ce_fw_version;
-	uint32_t                    ce_fw_feature;
+   /* CP info. */
+   bool gfx_ib_pad_with_type2;
+   unsigned ib_alignment; /* both start and size alignment */
+   uint32_t me_fw_version;
+   uint32_t me_fw_feature;
+   uint32_t pfp_fw_version;
+   uint32_t pfp_fw_feature;
+   uint32_t ce_fw_version;
+   uint32_t ce_fw_feature;

-	/* Multimedia info. */
-	bool                        has_hw_decode;
-	bool                        uvd_enc_supported;
-	uint32_t                    uvd_fw_version;
-	uint32_t                    vce_fw_version;
-	uint32_t                    vce_harvest_config;
+   /* Multimedia info. */
+   bool has_hw_decode;
+   bool uvd_enc_supported;
+   uint32_t uvd_fw_version;
+   uint32_t vce_fw_version;
+   uint32_t vce_harvest_config;

-	/* Kernel & winsys capabilities. */
-	uint32_t                    drm_major; /* version */
-	uint32_t                    drm_minor;
-	uint32_t                    drm_patchlevel;
-	bool                        is_amdgpu;
-	bool                        has_userptr;
-	bool                        has_syncobj;
-	bool                        has_syncobj_wait_for_submit;
-	bool                        has_timeline_syncobj;
-	bool                        has_fence_to_handle;
-	bool                        has_ctx_priority;
-	bool                        has_local_buffers;
-	bool                        kernel_flushes_hdp_before_ib;
-	bool                        htile_cmask_support_1d_tiling;
-	bool                        si_TA_CS_BC_BASE_ADDR_allowed;
-	bool                        has_bo_metadata;
-	bool                        has_gpu_reset_status_query;
-	bool                        has_eqaa_surface_allocator;
-	bool                        has_format_bc1_through_bc7;
-	bool                        kernel_flushes_tc_l2_after_ib;
-	bool                        has_indirect_compute_dispatch;
-	bool                        has_unaligned_shader_loads;
-	bool                        has_sparse_vm_mappings;
-	bool                        has_2d_tiling;
-	bool                        has_read_registers_query;
-	bool                        has_gds_ordered_append;
-	bool                        has_scheduled_fence_dependency;
-	/* Whether SR-IOV is enabled or amdgpu.mcbp=1 was set on the kernel command line. */
-	bool                        mid_command_buffer_preemption_enabled;
+   /* Kernel & winsys capabilities. */
+   uint32_t drm_major; /* version */
+   uint32_t drm_minor;
+   uint32_t drm_patchlevel;
+   bool is_amdgpu;
+   bool has_userptr;
+   bool has_syncobj;
+   bool has_syncobj_wait_for_submit;
+   bool has_timeline_syncobj;
+   bool has_fence_to_handle;
+   bool has_ctx_priority;
+   bool has_local_buffers;
+   bool kernel_flushes_hdp_before_ib;
+   bool htile_cmask_support_1d_tiling;
+   bool si_TA_CS_BC_BASE_ADDR_allowed;
+   bool has_bo_metadata;
+   bool has_gpu_reset_status_query;
+   bool has_eqaa_surface_allocator;
+   bool has_format_bc1_through_bc7;
+   bool kernel_flushes_tc_l2_after_ib;
+   bool has_indirect_compute_dispatch;
+   bool has_unaligned_shader_loads;
+   bool has_sparse_vm_mappings;
+   bool has_2d_tiling;
+   bool has_read_registers_query;
+   bool has_gds_ordered_append;
+   bool has_scheduled_fence_dependency;
+   /* Whether SR-IOV is enabled or amdgpu.mcbp=1 was set on the kernel command line. */
+   bool mid_command_buffer_preemption_enabled;

-	/* Shader cores. */
-	uint32_t                    cu_mask[4][2];
-	uint32_t                    r600_max_quad_pipes; /* wave size / 16 */
-	uint32_t                    max_shader_clock;
-	uint32_t                    num_good_compute_units;
-	uint32_t                    max_good_cu_per_sa;
-	uint32_t                    min_good_cu_per_sa; /* min != max if SAs have different # of CUs */
-	uint32_t                    max_se; /* shader engines */
-	uint32_t                    max_sh_per_se; /* shader arrays per shader engine */
-	uint32_t                    max_wave64_per_simd;
-	uint32_t                    num_physical_sgprs_per_simd;
-	uint32_t                    num_physical_wave64_vgprs_per_simd;
-	uint32_t                    num_simd_per_compute_unit;
-	uint32_t                    min_sgpr_alloc;
-	uint32_t                    max_sgpr_alloc;
-	uint32_t                    sgpr_alloc_granularity;
-	uint32_t                    min_wave64_vgpr_alloc;
-	uint32_t                    max_vgpr_alloc;
-	uint32_t                    wave64_vgpr_alloc_granularity;
-	bool                        use_late_alloc; /* VS and GS: late pos/param allocation */
+   /* Shader cores. */
+   uint32_t cu_mask[4][2];
+   uint32_t r600_max_quad_pipes; /* wave size / 16 */
+   uint32_t max_shader_clock;
+   uint32_t num_good_compute_units;
+   uint32_t max_good_cu_per_sa;
+   uint32_t min_good_cu_per_sa; /* min != max if SAs have different # of CUs */
+   uint32_t max_se;             /* shader engines */
+   uint32_t max_sh_per_se;      /* shader arrays per shader engine */
+   uint32_t max_wave64_per_simd;
+   uint32_t num_physical_sgprs_per_simd;
+   uint32_t num_physical_wave64_vgprs_per_simd;
+   uint32_t num_simd_per_compute_unit;
+   uint32_t min_sgpr_alloc;
+   uint32_t max_sgpr_alloc;
+   uint32_t sgpr_alloc_granularity;
+   uint32_t min_wave64_vgpr_alloc;
+   uint32_t max_vgpr_alloc;
+   uint32_t wave64_vgpr_alloc_granularity;
+   bool use_late_alloc; /* VS and GS: late pos/param allocation */

-	/* Render backends (color + depth blocks). */
-	uint32_t                    r300_num_gb_pipes;
-	uint32_t                    r300_num_z_pipes;
-	uint32_t                    r600_gb_backend_map; /* R600 harvest config */
-	bool                        r600_gb_backend_map_valid;
-	uint32_t                    r600_num_banks;
-	uint32_t                    gb_addr_config;
-	uint32_t                    pa_sc_tile_steering_override; /* CLEAR_STATE also sets this */
-	uint32_t                    num_render_backends;
-	uint32_t                    num_tile_pipes; /* pipe count from PIPE_CONFIG */
-	uint32_t                    pipe_interleave_bytes;
-	uint32_t                    enabled_rb_mask; /* GCN harvest config */
-	uint64_t                    max_alignment; /* from addrlib */
-	uint32_t                    pbb_max_alloc_count;
+   /* Render backends (color + depth blocks). */
+   uint32_t r300_num_gb_pipes;
+   uint32_t r300_num_z_pipes;
+   uint32_t r600_gb_backend_map; /* R600 harvest config */
+   bool r600_gb_backend_map_valid;
+   uint32_t r600_num_banks;
+   uint32_t gb_addr_config;
+   uint32_t pa_sc_tile_steering_override; /* CLEAR_STATE also sets this */
+   uint32_t num_render_backends;
+   uint32_t num_tile_pipes; /* pipe count from PIPE_CONFIG */
+   uint32_t pipe_interleave_bytes;
+   uint32_t enabled_rb_mask; /* GCN harvest config */
+   uint64_t max_alignment;   /* from addrlib */
+   uint32_t pbb_max_alloc_count;

-	/* Tile modes. */
-	uint32_t                    si_tile_mode_array[32];
-	uint32_t                    cik_macrotile_mode_array[16];
+   /* Tile modes. */
+   uint32_t si_tile_mode_array[32];
+   uint32_t cik_macrotile_mode_array[16];
 };

-bool ac_query_gpu_info(int fd, void *dev_p,
-		       struct radeon_info *info,
-		       struct amdgpu_gpu_info *amdinfo);
+bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
+                       struct amdgpu_gpu_info *amdinfo);

 void ac_compute_driver_uuid(char *uuid, size_t size);

 void ac_compute_device_uuid(struct radeon_info *info, char *uuid, size_t size);
 void ac_print_gpu_info(struct radeon_info *info);
 int ac_get_gs_table_depth(enum chip_class chip_class, enum radeon_family family);
-void ac_get_raster_config(struct radeon_info *info,
-			  uint32_t *raster_config_p,
-			  uint32_t *raster_config_1_p,
-			  uint32_t *se_tile_repeat_p);
-void ac_get_harvested_configs(struct radeon_info *info,
-			      unsigned raster_config,
-			      unsigned *cik_raster_config_1_p,
-			      unsigned *raster_config_se);
-unsigned ac_get_compute_resource_limits(struct radeon_info *info,
-					unsigned waves_per_threadgroup,
-					unsigned max_waves_per_sh,
-					unsigned threadgroups_per_cu);
+void ac_get_raster_config(struct radeon_info *info, uint32_t *raster_config_p,
+                          uint32_t *raster_config_1_p, uint32_t *se_tile_repeat_p);
+void ac_get_harvested_configs(struct radeon_info *info, unsigned raster_config,
+                              unsigned *cik_raster_config_1_p, unsigned *raster_config_se);
+unsigned ac_get_compute_resource_limits(struct radeon_info *info, unsigned waves_per_threadgroup,
+                                        unsigned max_waves_per_sh, unsigned threadgroups_per_cu);

 #ifdef __cplusplus
 }
--- a/src/amd/common/ac_rtld.c
+++ b/src/amd/common/ac_rtld.c
--- a/src/amd/common/ac_rtld.h
+++ b/src/amd/common/ac_rtld.h
@ -24,12 +24,12 @@
 #ifndef AC_RTLD_H
 #define AC_RTLD_H

-#include <stdbool.h>
-#include <stdint.h>
-#include <stddef.h>
-
-#include "util/u_dynarray.h"
 #include "compiler/shader_enums.h"
+#include "util/u_dynarray.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>

 #ifdef __cplusplus
 extern "C" {
@ -40,37 +40,37 @@ struct ac_shader_config;
 struct radeon_info;

 struct ac_rtld_symbol {
-	const char *name;
-	uint32_t size;
-	uint32_t align;
-	uint64_t offset; /* filled in by ac_rtld_open */
-	unsigned part_idx; /* shader part in which this symbol appears */
+   const char *name;
+   uint32_t size;
+   uint32_t align;
+   uint64_t offset;   /* filled in by ac_rtld_open */
+   unsigned part_idx; /* shader part in which this symbol appears */
 };

 struct ac_rtld_options {
-	/* Loader will insert an s_sethalt 1 instruction as the
-	 * first instruction. */
-	bool halt_at_entry:1;
+   /* Loader will insert an s_sethalt 1 instruction as the
+    * first instruction. */
+   bool halt_at_entry : 1;
 };

 /* Lightweight wrapper around underlying ELF objects. */
 struct ac_rtld_binary {
-	struct ac_rtld_options options;
-	unsigned wave_size;
+   struct ac_rtld_options options;
+   unsigned wave_size;

-	/* Required buffer sizes, currently read/executable only. */
-	uint64_t rx_size;
+   /* Required buffer sizes, currently read/executable only. */
+   uint64_t rx_size;

-	/* Size of executable code, for reporting purposes. */
-	uint64_t exec_size;
+   /* Size of executable code, for reporting purposes. */
+   uint64_t exec_size;

-	uint64_t rx_end_markers;
+   uint64_t rx_end_markers;

-	unsigned num_parts;
-	struct ac_rtld_part *parts;
+   unsigned num_parts;
+   struct ac_rtld_part *parts;

-	struct util_dynarray lds_symbols;
-	uint32_t lds_size;
+   struct util_dynarray lds_symbols;
+   uint32_t lds_size;
 };

 /**
@ -82,8 +82,7 @@ struct ac_rtld_binary {
 * \param value to be filled in by the callback
 * \return whether the symbol was found successfully
 */
-typedef bool (*ac_rtld_get_external_symbol_cb)(
-	void *cb_data, const char *symbol, uint64_t *value);
+typedef bool (*ac_rtld_get_external_symbol_cb)(void *cb_data, const char *symbol, uint64_t *value);

 /**
 * Lifetimes of \ref info, in-memory ELF objects, and the names of
@ -91,50 +90,48 @@ typedef bool (*ac_rtld_get_external_symbol_cb)(
 * the opened binary.
 */
 struct ac_rtld_open_info {
-	const struct radeon_info *info;
-	struct ac_rtld_options options;
-	gl_shader_stage shader_type;
-	unsigned wave_size;
+   const struct radeon_info *info;
+   struct ac_rtld_options options;
+   gl_shader_stage shader_type;
+   unsigned wave_size;

-	unsigned num_parts;
-	const char * const *elf_ptrs; /* in-memory ELF objects of each part */
-	const size_t *elf_sizes; /* sizes of corresponding in-memory ELF objects in bytes */
+   unsigned num_parts;
+   const char *const *elf_ptrs; /* in-memory ELF objects of each part */
+   const size_t *elf_sizes;     /* sizes of corresponding in-memory ELF objects in bytes */

-	/* Shared LDS symbols are layouted such that they are accessible from
-	 * all shader parts. Non-shared (private) LDS symbols of one part may
-	 * overlap private LDS symbols of another shader part.
-	 */
-	unsigned num_shared_lds_symbols;
-	const struct ac_rtld_symbol *shared_lds_symbols;
+   /* Shared LDS symbols are layouted such that they are accessible from
+    * all shader parts. Non-shared (private) LDS symbols of one part may
+    * overlap private LDS symbols of another shader part.
+    */
+   unsigned num_shared_lds_symbols;
+   const struct ac_rtld_symbol *shared_lds_symbols;
 };

-bool ac_rtld_open(struct ac_rtld_binary *binary,
-		  struct ac_rtld_open_info i);
+bool ac_rtld_open(struct ac_rtld_binary *binary, struct ac_rtld_open_info i);

 void ac_rtld_close(struct ac_rtld_binary *binary);

-bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name,
-				 const char **data, size_t *nbytes);
+bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name, const char **data,
+                                 size_t *nbytes);

-bool ac_rtld_read_config(const struct radeon_info *info,
-			 struct ac_rtld_binary *binary,
-			 struct ac_shader_config *config);
+bool ac_rtld_read_config(const struct radeon_info *info, struct ac_rtld_binary *binary,
+                         struct ac_shader_config *config);

 struct ac_rtld_upload_info {
-	struct ac_rtld_binary *binary;
+   struct ac_rtld_binary *binary;

-	/** GPU mapping of the read/executable buffer. */
-	uint64_t rx_va;
+   /** GPU mapping of the read/executable buffer. */
+   uint64_t rx_va;

-	/** CPU mapping of the read/executable buffer */
-	char *rx_ptr;
+   /** CPU mapping of the read/executable buffer */
+   char *rx_ptr;

-	/** Optional callback function that will be queried for symbols not
-	 * defined in any of the binary's parts. */
-	ac_rtld_get_external_symbol_cb get_external_symbol;
+   /** Optional callback function that will be queried for symbols not
+    * defined in any of the binary's parts. */
+   ac_rtld_get_external_symbol_cb get_external_symbol;

-	/** Caller-defined data that will be passed to callback functions. */
-	void *cb_data;
+   /** Caller-defined data that will be passed to callback functions. */
+   void *cb_data;
 };

 bool ac_rtld_upload(struct ac_rtld_upload_info *u);
--- a/src/amd/common/ac_shader_args.c
+++ b/src/amd/common/ac_shader_args.c
@ -22,34 +22,33 @@
 */

 #include "ac_shader_args.h"
+
 #include "nir/nir_builder.h"

-void
-ac_add_arg(struct ac_shader_args *info, enum ac_arg_regfile regfile,
-	   unsigned size, enum ac_arg_type type, struct ac_arg *arg)
+void ac_add_arg(struct ac_shader_args *info, enum ac_arg_regfile regfile, unsigned size,
+                enum ac_arg_type type, struct ac_arg *arg)
 {
-	assert(info->arg_count < AC_MAX_ARGS);
+   assert(info->arg_count < AC_MAX_ARGS);

-	unsigned offset;
-	if (regfile == AC_ARG_SGPR) {
-		offset = info->num_sgprs_used;
-		info->num_sgprs_used += size;
-	} else {
-		assert(regfile == AC_ARG_VGPR);
-		offset = info->num_vgprs_used;
-		info->num_vgprs_used += size;
-	}
+   unsigned offset;
+   if (regfile == AC_ARG_SGPR) {
+      offset = info->num_sgprs_used;
+      info->num_sgprs_used += size;
+   } else {
+      assert(regfile == AC_ARG_VGPR);
+      offset = info->num_vgprs_used;
+      info->num_vgprs_used += size;
+   }

-	info->args[info->arg_count].file = regfile;
-	info->args[info->arg_count].offset = offset;
-	info->args[info->arg_count].size = size;
-	info->args[info->arg_count].type = type;
+   info->args[info->arg_count].file = regfile;
+   info->args[info->arg_count].offset = offset;
+   info->args[info->arg_count].size = size;
+   info->args[info->arg_count].type = type;

-	if (arg) {
-		arg->arg_index = info->arg_count;
-		arg->used = true;
-	}
+   if (arg) {
+      arg->arg_index = info->arg_count;
+      arg->used = true;
+   }

-	info->arg_count++;
+   info->arg_count++;
 }
-
--- a/src/amd/common/ac_shader_args.h
+++ b/src/amd/common/ac_shader_args.h
@ -24,91 +24,90 @@
 #ifndef AC_SHADER_ARGS_H
 #define AC_SHADER_ARGS_H

-#include <stdint.h>
 #include <stdbool.h>
+#include <stdint.h>

 #define AC_MAX_INLINE_PUSH_CONSTS 8

-enum ac_arg_regfile {
-	AC_ARG_SGPR,
-	AC_ARG_VGPR,
+enum ac_arg_regfile
+{
+   AC_ARG_SGPR,
+   AC_ARG_VGPR,
 };

-enum ac_arg_type {
-	AC_ARG_FLOAT,
-	AC_ARG_INT,
-	AC_ARG_CONST_PTR, /* Pointer to i8 array */
-	AC_ARG_CONST_FLOAT_PTR, /* Pointer to f32 array */
-	AC_ARG_CONST_PTR_PTR, /* Pointer to pointer to i8 array */
-	AC_ARG_CONST_DESC_PTR, /* Pointer to v4i32 array */
-	AC_ARG_CONST_IMAGE_PTR, /* Pointer to v8i32 array */
+enum ac_arg_type
+{
+   AC_ARG_FLOAT,
+   AC_ARG_INT,
+   AC_ARG_CONST_PTR,       /* Pointer to i8 array */
+   AC_ARG_CONST_FLOAT_PTR, /* Pointer to f32 array */
+   AC_ARG_CONST_PTR_PTR,   /* Pointer to pointer to i8 array */
+   AC_ARG_CONST_DESC_PTR,  /* Pointer to v4i32 array */
+   AC_ARG_CONST_IMAGE_PTR, /* Pointer to v8i32 array */
 };

 struct ac_arg {
-	uint8_t arg_index;
-	bool used;
+   uint8_t arg_index;
+   bool used;
 };

-
 #define AC_MAX_ARGS 128

 struct ac_shader_args {
-	/* Info on how to declare arguments */
-	struct {
-		enum ac_arg_type type;
-		enum ac_arg_regfile file;
-		uint8_t offset;
-		uint8_t size;
-		bool skip;
-	} args[AC_MAX_ARGS];
+   /* Info on how to declare arguments */
+   struct {
+      enum ac_arg_type type;
+      enum ac_arg_regfile file;
+      uint8_t offset;
+      uint8_t size;
+      bool skip;
+   } args[AC_MAX_ARGS];

-	uint8_t arg_count;
-	uint8_t sgpr_count;
-	uint8_t num_sgprs_used;
-	uint8_t num_vgprs_used;
+   uint8_t arg_count;
+   uint8_t sgpr_count;
+   uint8_t num_sgprs_used;
+   uint8_t num_vgprs_used;

-	struct ac_arg base_vertex;
-	struct ac_arg start_instance;
-	struct ac_arg draw_id;
-	struct ac_arg vertex_id;
-	struct ac_arg instance_id;
-	struct ac_arg tcs_patch_id;
-	struct ac_arg tcs_rel_ids;
-	struct ac_arg tes_patch_id;
-	struct ac_arg gs_prim_id;
-	struct ac_arg gs_invocation_id;
+   struct ac_arg base_vertex;
+   struct ac_arg start_instance;
+   struct ac_arg draw_id;
+   struct ac_arg vertex_id;
+   struct ac_arg instance_id;
+   struct ac_arg tcs_patch_id;
+   struct ac_arg tcs_rel_ids;
+   struct ac_arg tes_patch_id;
+   struct ac_arg gs_prim_id;
+   struct ac_arg gs_invocation_id;

-	/* PS */
-	struct ac_arg frag_pos[4];
-	struct ac_arg front_face;
-	struct ac_arg ancillary;
-	struct ac_arg sample_coverage;
-	struct ac_arg prim_mask;
-	struct ac_arg persp_sample;
-	struct ac_arg persp_center;
-	struct ac_arg persp_centroid;
-	struct ac_arg pull_model;
-	struct ac_arg linear_sample;
-	struct ac_arg linear_center;
-	struct ac_arg linear_centroid;
+   /* PS */
+   struct ac_arg frag_pos[4];
+   struct ac_arg front_face;
+   struct ac_arg ancillary;
+   struct ac_arg sample_coverage;
+   struct ac_arg prim_mask;
+   struct ac_arg persp_sample;
+   struct ac_arg persp_center;
+   struct ac_arg persp_centroid;
+   struct ac_arg pull_model;
+   struct ac_arg linear_sample;
+   struct ac_arg linear_center;
+   struct ac_arg linear_centroid;

-	/* CS */
-	struct ac_arg local_invocation_ids;
-	struct ac_arg num_work_groups;
-	struct ac_arg workgroup_ids[3];
-	struct ac_arg tg_size;
+   /* CS */
+   struct ac_arg local_invocation_ids;
+   struct ac_arg num_work_groups;
+   struct ac_arg workgroup_ids[3];
+   struct ac_arg tg_size;

-	/* Vulkan only */
-	struct ac_arg push_constants;
-	struct ac_arg inline_push_consts[AC_MAX_INLINE_PUSH_CONSTS];
-	unsigned num_inline_push_consts;
-	unsigned base_inline_push_consts;
-	struct ac_arg view_index;
+   /* Vulkan only */
+   struct ac_arg push_constants;
+   struct ac_arg inline_push_consts[AC_MAX_INLINE_PUSH_CONSTS];
+   unsigned num_inline_push_consts;
+   unsigned base_inline_push_consts;
+   struct ac_arg view_index;
 };

-void ac_add_arg(struct ac_shader_args *info, enum ac_arg_regfile regfile,
-		unsigned registers, enum ac_arg_type type,
-		struct ac_arg *arg);
+void ac_add_arg(struct ac_shader_args *info, enum ac_arg_regfile regfile, unsigned registers,
+                enum ac_arg_type type, struct ac_arg *arg);

 #endif
-
--- a/src/amd/common/ac_shader_util.c
+++ b/src/amd/common/ac_shader_util.c
@ -21,277 +21,303 @@
 * IN THE SOFTWARE.
 */

+#include "ac_shader_util.h"
+
+#include "sid.h"
+
 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>

-#include "ac_shader_util.h"
-#include "sid.h"
-
-unsigned
-ac_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
-			   bool writes_samplemask)
+unsigned ac_get_spi_shader_z_format(bool writes_z, bool writes_stencil, bool writes_samplemask)
 {
-	if (writes_z) {
-		/* Z needs 32 bits. */
-		if (writes_samplemask)
-			return V_028710_SPI_SHADER_32_ABGR;
-		else if (writes_stencil)
-			return V_028710_SPI_SHADER_32_GR;
-		else
-			return V_028710_SPI_SHADER_32_R;
-	} else if (writes_stencil || writes_samplemask) {
-		/* Both stencil and sample mask need only 16 bits. */
-		return V_028710_SPI_SHADER_UINT16_ABGR;
-	} else {
-		return V_028710_SPI_SHADER_ZERO;
-	}
+   if (writes_z) {
+      /* Z needs 32 bits. */
+      if (writes_samplemask)
+         return V_028710_SPI_SHADER_32_ABGR;
+      else if (writes_stencil)
+         return V_028710_SPI_SHADER_32_GR;
+      else
+         return V_028710_SPI_SHADER_32_R;
+   } else if (writes_stencil || writes_samplemask) {
+      /* Both stencil and sample mask need only 16 bits. */
+      return V_028710_SPI_SHADER_UINT16_ABGR;
+   } else {
+      return V_028710_SPI_SHADER_ZERO;
+   }
 }

-unsigned
-ac_get_cb_shader_mask(unsigned spi_shader_col_format)
+unsigned ac_get_cb_shader_mask(unsigned spi_shader_col_format)
 {
-	unsigned i, cb_shader_mask = 0;
+   unsigned i, cb_shader_mask = 0;

-	for (i = 0; i < 8; i++) {
-		switch ((spi_shader_col_format >> (i * 4)) & 0xf) {
-		case V_028714_SPI_SHADER_ZERO:
-			break;
-		case V_028714_SPI_SHADER_32_R:
-			cb_shader_mask |= 0x1 << (i * 4);
-			break;
-		case V_028714_SPI_SHADER_32_GR:
-			cb_shader_mask |= 0x3 << (i * 4);
-			break;
-		case V_028714_SPI_SHADER_32_AR:
-			cb_shader_mask |= 0x9u << (i * 4);
-			break;
-		case V_028714_SPI_SHADER_FP16_ABGR:
-		case V_028714_SPI_SHADER_UNORM16_ABGR:
-		case V_028714_SPI_SHADER_SNORM16_ABGR:
-		case V_028714_SPI_SHADER_UINT16_ABGR:
-		case V_028714_SPI_SHADER_SINT16_ABGR:
-		case V_028714_SPI_SHADER_32_ABGR:
-			cb_shader_mask |= 0xfu << (i * 4);
-			break;
-		default:
-			assert(0);
-		}
-	}
-	return cb_shader_mask;
+   for (i = 0; i < 8; i++) {
+      switch ((spi_shader_col_format >> (i * 4)) & 0xf) {
+      case V_028714_SPI_SHADER_ZERO:
+         break;
+      case V_028714_SPI_SHADER_32_R:
+         cb_shader_mask |= 0x1 << (i * 4);
+         break;
+      case V_028714_SPI_SHADER_32_GR:
+         cb_shader_mask |= 0x3 << (i * 4);
+         break;
+      case V_028714_SPI_SHADER_32_AR:
+         cb_shader_mask |= 0x9u << (i * 4);
+         break;
+      case V_028714_SPI_SHADER_FP16_ABGR:
+      case V_028714_SPI_SHADER_UNORM16_ABGR:
+      case V_028714_SPI_SHADER_SNORM16_ABGR:
+      case V_028714_SPI_SHADER_UINT16_ABGR:
+      case V_028714_SPI_SHADER_SINT16_ABGR:
+      case V_028714_SPI_SHADER_32_ABGR:
+         cb_shader_mask |= 0xfu << (i * 4);
+         break;
+      default:
+         assert(0);
+      }
+   }
+   return cb_shader_mask;
 }

 /**
 * Calculate the appropriate setting of VGT_GS_MODE when \p shader is a
 * geometry shader.
 */
-uint32_t
-ac_vgt_gs_mode(unsigned gs_max_vert_out, enum chip_class chip_class)
+uint32_t ac_vgt_gs_mode(unsigned gs_max_vert_out, enum chip_class chip_class)
 {
-	unsigned cut_mode;
+   unsigned cut_mode;

-	if (gs_max_vert_out <= 128) {
-		cut_mode = V_028A40_GS_CUT_128;
-	} else if (gs_max_vert_out <= 256) {
-		cut_mode = V_028A40_GS_CUT_256;
-	} else if (gs_max_vert_out <= 512) {
-		cut_mode = V_028A40_GS_CUT_512;
-	} else {
-		assert(gs_max_vert_out <= 1024);
-		cut_mode = V_028A40_GS_CUT_1024;
-	}
+   if (gs_max_vert_out <= 128) {
+      cut_mode = V_028A40_GS_CUT_128;
+   } else if (gs_max_vert_out <= 256) {
+      cut_mode = V_028A40_GS_CUT_256;
+   } else if (gs_max_vert_out <= 512) {
+      cut_mode = V_028A40_GS_CUT_512;
+   } else {
+      assert(gs_max_vert_out <= 1024);
+      cut_mode = V_028A40_GS_CUT_1024;
+   }

-	return S_028A40_MODE(V_028A40_GS_SCENARIO_G) |
-	       S_028A40_CUT_MODE(cut_mode)|
-	       S_028A40_ES_WRITE_OPTIMIZE(chip_class <= GFX8) |
-	       S_028A40_GS_WRITE_OPTIMIZE(1) |
-	       S_028A40_ONCHIP(chip_class >= GFX9 ? 1 : 0);
+   return S_028A40_MODE(V_028A40_GS_SCENARIO_G) | S_028A40_CUT_MODE(cut_mode) |
+          S_028A40_ES_WRITE_OPTIMIZE(chip_class <= GFX8) | S_028A40_GS_WRITE_OPTIMIZE(1) |
+          S_028A40_ONCHIP(chip_class >= GFX9 ? 1 : 0);
 }

 /// Translate a (dfmt, nfmt) pair into a chip-appropriate combined format
 /// value for LLVM8+ tbuffer intrinsics.
-unsigned
-ac_get_tbuffer_format(enum chip_class chip_class,
-		      unsigned dfmt, unsigned nfmt)
+unsigned ac_get_tbuffer_format(enum chip_class chip_class, unsigned dfmt, unsigned nfmt)
 {
-	// Some games try to access vertex buffers without a valid format.
-	// This is a game bug, but we should still handle it gracefully.
-	if (dfmt == V_008F0C_IMG_FORMAT_INVALID)
-		return V_008F0C_IMG_FORMAT_INVALID;
+   // Some games try to access vertex buffers without a valid format.
+   // This is a game bug, but we should still handle it gracefully.
+   if (dfmt == V_008F0C_IMG_FORMAT_INVALID)
+      return V_008F0C_IMG_FORMAT_INVALID;

-	if (chip_class >= GFX10) {
-		unsigned format;
-		switch (dfmt) {
-		default: unreachable("bad dfmt");
-		case V_008F0C_BUF_DATA_FORMAT_INVALID: format = V_008F0C_IMG_FORMAT_INVALID; break;
-		case V_008F0C_BUF_DATA_FORMAT_8: format = V_008F0C_IMG_FORMAT_8_UINT; break;
-		case V_008F0C_BUF_DATA_FORMAT_8_8: format = V_008F0C_IMG_FORMAT_8_8_UINT; break;
-		case V_008F0C_BUF_DATA_FORMAT_8_8_8_8: format = V_008F0C_IMG_FORMAT_8_8_8_8_UINT; break;
-		case V_008F0C_BUF_DATA_FORMAT_16: format = V_008F0C_IMG_FORMAT_16_UINT; break;
-		case V_008F0C_BUF_DATA_FORMAT_16_16: format = V_008F0C_IMG_FORMAT_16_16_UINT; break;
-		case V_008F0C_BUF_DATA_FORMAT_16_16_16_16: format = V_008F0C_IMG_FORMAT_16_16_16_16_UINT; break;
-		case V_008F0C_BUF_DATA_FORMAT_32: format = V_008F0C_IMG_FORMAT_32_UINT; break;
-		case V_008F0C_BUF_DATA_FORMAT_32_32: format = V_008F0C_IMG_FORMAT_32_32_UINT; break;
-		case V_008F0C_BUF_DATA_FORMAT_32_32_32: format = V_008F0C_IMG_FORMAT_32_32_32_UINT; break;
-		case V_008F0C_BUF_DATA_FORMAT_32_32_32_32: format = V_008F0C_IMG_FORMAT_32_32_32_32_UINT; break;
-		case V_008F0C_BUF_DATA_FORMAT_2_10_10_10: format = V_008F0C_IMG_FORMAT_2_10_10_10_UINT; break;
-		}
+   if (chip_class >= GFX10) {
+      unsigned format;
+      switch (dfmt) {
+      default:
+         unreachable("bad dfmt");
+      case V_008F0C_BUF_DATA_FORMAT_INVALID:
+         format = V_008F0C_IMG_FORMAT_INVALID;
+         break;
+      case V_008F0C_BUF_DATA_FORMAT_8:
+         format = V_008F0C_IMG_FORMAT_8_UINT;
+         break;
+      case V_008F0C_BUF_DATA_FORMAT_8_8:
+         format = V_008F0C_IMG_FORMAT_8_8_UINT;
+         break;
+      case V_008F0C_BUF_DATA_FORMAT_8_8_8_8:
+         format = V_008F0C_IMG_FORMAT_8_8_8_8_UINT;
+         break;
+      case V_008F0C_BUF_DATA_FORMAT_16:
+         format = V_008F0C_IMG_FORMAT_16_UINT;
+         break;
+      case V_008F0C_BUF_DATA_FORMAT_16_16:
+         format = V_008F0C_IMG_FORMAT_16_16_UINT;
+         break;
+      case V_008F0C_BUF_DATA_FORMAT_16_16_16_16:
+         format = V_008F0C_IMG_FORMAT_16_16_16_16_UINT;
+         break;
+      case V_008F0C_BUF_DATA_FORMAT_32:
+         format = V_008F0C_IMG_FORMAT_32_UINT;
+         break;
+      case V_008F0C_BUF_DATA_FORMAT_32_32:
+         format = V_008F0C_IMG_FORMAT_32_32_UINT;
+         break;
+      case V_008F0C_BUF_DATA_FORMAT_32_32_32:
+         format = V_008F0C_IMG_FORMAT_32_32_32_UINT;
+         break;
+      case V_008F0C_BUF_DATA_FORMAT_32_32_32_32:
+         format = V_008F0C_IMG_FORMAT_32_32_32_32_UINT;
+         break;
+      case V_008F0C_BUF_DATA_FORMAT_2_10_10_10:
+         format = V_008F0C_IMG_FORMAT_2_10_10_10_UINT;
+         break;
+      }

-		// Use the regularity properties of the combined format enum.
-		//
-		// Note: float is incompatible with 8-bit data formats,
-		//       [us]{norm,scaled} are incomparible with 32-bit data formats.
-		//       [us]scaled are not writable.
-		switch (nfmt) {
-		case V_008F0C_BUF_NUM_FORMAT_UNORM: format -= 4; break;
-		case V_008F0C_BUF_NUM_FORMAT_SNORM: format -= 3; break;
-		case V_008F0C_BUF_NUM_FORMAT_USCALED: format -= 2; break;
-		case V_008F0C_BUF_NUM_FORMAT_SSCALED: format -= 1; break;
-		default: unreachable("bad nfmt");
-		case V_008F0C_BUF_NUM_FORMAT_UINT: break;
-		case V_008F0C_BUF_NUM_FORMAT_SINT: format += 1; break;
-		case V_008F0C_BUF_NUM_FORMAT_FLOAT: format += 2; break;
-		}
+      // Use the regularity properties of the combined format enum.
+      //
+      // Note: float is incompatible with 8-bit data formats,
+      //       [us]{norm,scaled} are incomparible with 32-bit data formats.
+      //       [us]scaled are not writable.
+      switch (nfmt) {
+      case V_008F0C_BUF_NUM_FORMAT_UNORM:
+         format -= 4;
+         break;
+      case V_008F0C_BUF_NUM_FORMAT_SNORM:
+         format -= 3;
+         break;
+      case V_008F0C_BUF_NUM_FORMAT_USCALED:
+         format -= 2;
+         break;
+      case V_008F0C_BUF_NUM_FORMAT_SSCALED:
+         format -= 1;
+         break;
+      default:
+         unreachable("bad nfmt");
+      case V_008F0C_BUF_NUM_FORMAT_UINT:
+         break;
+      case V_008F0C_BUF_NUM_FORMAT_SINT:
+         format += 1;
+         break;
+      case V_008F0C_BUF_NUM_FORMAT_FLOAT:
+         format += 2;
+         break;
+      }

-		return format;
-	} else {
-		return dfmt | (nfmt << 4);
-	}
+      return format;
+   } else {
+      return dfmt | (nfmt << 4);
+   }
 }

 static const struct ac_data_format_info data_format_table[] = {
-	[V_008F0C_BUF_DATA_FORMAT_INVALID]     = {  0, 4, 0, V_008F0C_BUF_DATA_FORMAT_INVALID	 },
-	[V_008F0C_BUF_DATA_FORMAT_8]           = {  1, 1, 1, V_008F0C_BUF_DATA_FORMAT_8		 },
-	[V_008F0C_BUF_DATA_FORMAT_16]          = {  2, 1, 2, V_008F0C_BUF_DATA_FORMAT_16	 },
-	[V_008F0C_BUF_DATA_FORMAT_8_8]         = {  2, 2, 1, V_008F0C_BUF_DATA_FORMAT_8		 },
-	[V_008F0C_BUF_DATA_FORMAT_32]          = {  4, 1, 4, V_008F0C_BUF_DATA_FORMAT_32	 },
-	[V_008F0C_BUF_DATA_FORMAT_16_16]       = {  4, 2, 2, V_008F0C_BUF_DATA_FORMAT_16         },
-	[V_008F0C_BUF_DATA_FORMAT_10_11_11]    = {  4, 3, 0, V_008F0C_BUF_DATA_FORMAT_10_11_11	 },
-	[V_008F0C_BUF_DATA_FORMAT_11_11_10]    = {  4, 3, 0, V_008F0C_BUF_DATA_FORMAT_11_11_10	 },
-	[V_008F0C_BUF_DATA_FORMAT_10_10_10_2]  = {  4, 4, 0, V_008F0C_BUF_DATA_FORMAT_10_10_10_2 },
-	[V_008F0C_BUF_DATA_FORMAT_2_10_10_10]  = {  4, 4, 0, V_008F0C_BUF_DATA_FORMAT_2_10_10_10 },
-	[V_008F0C_BUF_DATA_FORMAT_8_8_8_8]     = {  4, 4, 1, V_008F0C_BUF_DATA_FORMAT_8		 },
-	[V_008F0C_BUF_DATA_FORMAT_32_32]       = {  8, 2, 4, V_008F0C_BUF_DATA_FORMAT_32	 },
-	[V_008F0C_BUF_DATA_FORMAT_16_16_16_16] = {  8, 4, 2, V_008F0C_BUF_DATA_FORMAT_16	 },
-	[V_008F0C_BUF_DATA_FORMAT_32_32_32]    = { 12, 3, 4, V_008F0C_BUF_DATA_FORMAT_32	 },
-	[V_008F0C_BUF_DATA_FORMAT_32_32_32_32] = { 16, 4, 4, V_008F0C_BUF_DATA_FORMAT_32	 },
+   [V_008F0C_BUF_DATA_FORMAT_INVALID] = {0, 4, 0, V_008F0C_BUF_DATA_FORMAT_INVALID},
+   [V_008F0C_BUF_DATA_FORMAT_8] = {1, 1, 1, V_008F0C_BUF_DATA_FORMAT_8},
+   [V_008F0C_BUF_DATA_FORMAT_16] = {2, 1, 2, V_008F0C_BUF_DATA_FORMAT_16},
+   [V_008F0C_BUF_DATA_FORMAT_8_8] = {2, 2, 1, V_008F0C_BUF_DATA_FORMAT_8},
+   [V_008F0C_BUF_DATA_FORMAT_32] = {4, 1, 4, V_008F0C_BUF_DATA_FORMAT_32},
+   [V_008F0C_BUF_DATA_FORMAT_16_16] = {4, 2, 2, V_008F0C_BUF_DATA_FORMAT_16},
+   [V_008F0C_BUF_DATA_FORMAT_10_11_11] = {4, 3, 0, V_008F0C_BUF_DATA_FORMAT_10_11_11},
+   [V_008F0C_BUF_DATA_FORMAT_11_11_10] = {4, 3, 0, V_008F0C_BUF_DATA_FORMAT_11_11_10},
+   [V_008F0C_BUF_DATA_FORMAT_10_10_10_2] = {4, 4, 0, V_008F0C_BUF_DATA_FORMAT_10_10_10_2},
+   [V_008F0C_BUF_DATA_FORMAT_2_10_10_10] = {4, 4, 0, V_008F0C_BUF_DATA_FORMAT_2_10_10_10},
+   [V_008F0C_BUF_DATA_FORMAT_8_8_8_8] = {4, 4, 1, V_008F0C_BUF_DATA_FORMAT_8},
+   [V_008F0C_BUF_DATA_FORMAT_32_32] = {8, 2, 4, V_008F0C_BUF_DATA_FORMAT_32},
+   [V_008F0C_BUF_DATA_FORMAT_16_16_16_16] = {8, 4, 2, V_008F0C_BUF_DATA_FORMAT_16},
+   [V_008F0C_BUF_DATA_FORMAT_32_32_32] = {12, 3, 4, V_008F0C_BUF_DATA_FORMAT_32},
+   [V_008F0C_BUF_DATA_FORMAT_32_32_32_32] = {16, 4, 4, V_008F0C_BUF_DATA_FORMAT_32},
 };

-const struct ac_data_format_info *
-ac_get_data_format_info(unsigned dfmt)
+const struct ac_data_format_info *ac_get_data_format_info(unsigned dfmt)
 {
-	assert(dfmt < ARRAY_SIZE(data_format_table));
-	return &data_format_table[dfmt];
+   assert(dfmt < ARRAY_SIZE(data_format_table));
+   return &data_format_table[dfmt];
 }

-enum ac_image_dim
-ac_get_sampler_dim(enum chip_class chip_class, enum glsl_sampler_dim dim,
-		   bool is_array)
+enum ac_image_dim ac_get_sampler_dim(enum chip_class chip_class, enum glsl_sampler_dim dim,
+                                     bool is_array)
 {
-	switch (dim) {
-	case GLSL_SAMPLER_DIM_1D:
-		if (chip_class == GFX9)
-			return is_array ? ac_image_2darray : ac_image_2d;
-		return is_array ? ac_image_1darray : ac_image_1d;
-	case GLSL_SAMPLER_DIM_2D:
-	case GLSL_SAMPLER_DIM_RECT:
-	case GLSL_SAMPLER_DIM_EXTERNAL:
-		return is_array ? ac_image_2darray : ac_image_2d;
-	case GLSL_SAMPLER_DIM_3D:
-		return ac_image_3d;
-	case GLSL_SAMPLER_DIM_CUBE:
-		return ac_image_cube;
-	case GLSL_SAMPLER_DIM_MS:
-		return is_array ? ac_image_2darraymsaa : ac_image_2dmsaa;
-	case GLSL_SAMPLER_DIM_SUBPASS:
-		return ac_image_2darray;
-	case GLSL_SAMPLER_DIM_SUBPASS_MS:
-		return ac_image_2darraymsaa;
-	default:
-		unreachable("bad sampler dim");
-	}
+   switch (dim) {
+   case GLSL_SAMPLER_DIM_1D:
+      if (chip_class == GFX9)
+         return is_array ? ac_image_2darray : ac_image_2d;
+      return is_array ? ac_image_1darray : ac_image_1d;
+   case GLSL_SAMPLER_DIM_2D:
+   case GLSL_SAMPLER_DIM_RECT:
+   case GLSL_SAMPLER_DIM_EXTERNAL:
+      return is_array ? ac_image_2darray : ac_image_2d;
+   case GLSL_SAMPLER_DIM_3D:
+      return ac_image_3d;
+   case GLSL_SAMPLER_DIM_CUBE:
+      return ac_image_cube;
+   case GLSL_SAMPLER_DIM_MS:
+      return is_array ? ac_image_2darraymsaa : ac_image_2dmsaa;
+   case GLSL_SAMPLER_DIM_SUBPASS:
+      return ac_image_2darray;
+   case GLSL_SAMPLER_DIM_SUBPASS_MS:
+      return ac_image_2darraymsaa;
+   default:
+      unreachable("bad sampler dim");
+   }
 }

-enum ac_image_dim
-ac_get_image_dim(enum chip_class chip_class, enum glsl_sampler_dim sdim,
-		 bool is_array)
+enum ac_image_dim ac_get_image_dim(enum chip_class chip_class, enum glsl_sampler_dim sdim,
+                                   bool is_array)
 {
-	enum ac_image_dim dim = ac_get_sampler_dim(chip_class, sdim, is_array);
+   enum ac_image_dim dim = ac_get_sampler_dim(chip_class, sdim, is_array);

-	/* Match the resource type set in the descriptor. */
-	if (dim == ac_image_cube ||
-	    (chip_class <= GFX8 && dim == ac_image_3d))
-		dim = ac_image_2darray;
-	else if (sdim == GLSL_SAMPLER_DIM_2D && !is_array && chip_class == GFX9) {
-		/* When a single layer of a 3D texture is bound, the shader
-		 * will refer to a 2D target, but the descriptor has a 3D type.
-		 * Since the HW ignores BASE_ARRAY in this case, we need to
-		 * send 3 coordinates. This doesn't hurt when the underlying
-		 * texture is non-3D.
-		 */
-		dim = ac_image_3d;
-	}
+   /* Match the resource type set in the descriptor. */
+   if (dim == ac_image_cube || (chip_class <= GFX8 && dim == ac_image_3d))
+      dim = ac_image_2darray;
+   else if (sdim == GLSL_SAMPLER_DIM_2D && !is_array && chip_class == GFX9) {
+      /* When a single layer of a 3D texture is bound, the shader
+       * will refer to a 2D target, but the descriptor has a 3D type.
+       * Since the HW ignores BASE_ARRAY in this case, we need to
+       * send 3 coordinates. This doesn't hurt when the underlying
+       * texture is non-3D.
+       */
+      dim = ac_image_3d;
+   }

-	return dim;
+   return dim;
 }

-unsigned
-ac_get_fs_input_vgpr_cnt(const struct ac_shader_config *config,
-			 signed char *face_vgpr_index_ptr,
-			 signed char *ancillary_vgpr_index_ptr)
+unsigned ac_get_fs_input_vgpr_cnt(const struct ac_shader_config *config,
+                                  signed char *face_vgpr_index_ptr,
+                                  signed char *ancillary_vgpr_index_ptr)
 {
-	unsigned num_input_vgprs = 0;
-	signed char face_vgpr_index = -1;
-	signed char ancillary_vgpr_index = -1;
+   unsigned num_input_vgprs = 0;
+   signed char face_vgpr_index = -1;
+   signed char ancillary_vgpr_index = -1;

-	if (G_0286CC_PERSP_SAMPLE_ENA(config->spi_ps_input_addr))
-		num_input_vgprs += 2;
-	if (G_0286CC_PERSP_CENTER_ENA(config->spi_ps_input_addr))
-		num_input_vgprs += 2;
-	if (G_0286CC_PERSP_CENTROID_ENA(config->spi_ps_input_addr))
-		num_input_vgprs += 2;
-	if (G_0286CC_PERSP_PULL_MODEL_ENA(config->spi_ps_input_addr))
-		num_input_vgprs += 3;
-	if (G_0286CC_LINEAR_SAMPLE_ENA(config->spi_ps_input_addr))
-		num_input_vgprs += 2;
-	if (G_0286CC_LINEAR_CENTER_ENA(config->spi_ps_input_addr))
-		num_input_vgprs += 2;
-	if (G_0286CC_LINEAR_CENTROID_ENA(config->spi_ps_input_addr))
-		num_input_vgprs += 2;
-	if (G_0286CC_LINE_STIPPLE_TEX_ENA(config->spi_ps_input_addr))
-		num_input_vgprs += 1;
-	if (G_0286CC_POS_X_FLOAT_ENA(config->spi_ps_input_addr))
-		num_input_vgprs += 1;
-	if (G_0286CC_POS_Y_FLOAT_ENA(config->spi_ps_input_addr))
-		num_input_vgprs += 1;
-	if (G_0286CC_POS_Z_FLOAT_ENA(config->spi_ps_input_addr))
-		num_input_vgprs += 1;
-	if (G_0286CC_POS_W_FLOAT_ENA(config->spi_ps_input_addr))
-		num_input_vgprs += 1;
-	if (G_0286CC_FRONT_FACE_ENA(config->spi_ps_input_addr)) {
-		face_vgpr_index = num_input_vgprs;
-		num_input_vgprs += 1;
-	}
-	if (G_0286CC_ANCILLARY_ENA(config->spi_ps_input_addr)) {
-		ancillary_vgpr_index = num_input_vgprs;
-		num_input_vgprs += 1;
-	}
-	if (G_0286CC_SAMPLE_COVERAGE_ENA(config->spi_ps_input_addr))
-		num_input_vgprs += 1;
-	if (G_0286CC_POS_FIXED_PT_ENA(config->spi_ps_input_addr))
-		num_input_vgprs += 1;
+   if (G_0286CC_PERSP_SAMPLE_ENA(config->spi_ps_input_addr))
+      num_input_vgprs += 2;
+   if (G_0286CC_PERSP_CENTER_ENA(config->spi_ps_input_addr))
+      num_input_vgprs += 2;
+   if (G_0286CC_PERSP_CENTROID_ENA(config->spi_ps_input_addr))
+      num_input_vgprs += 2;
+   if (G_0286CC_PERSP_PULL_MODEL_ENA(config->spi_ps_input_addr))
+      num_input_vgprs += 3;
+   if (G_0286CC_LINEAR_SAMPLE_ENA(config->spi_ps_input_addr))
+      num_input_vgprs += 2;
+   if (G_0286CC_LINEAR_CENTER_ENA(config->spi_ps_input_addr))
+      num_input_vgprs += 2;
+   if (G_0286CC_LINEAR_CENTROID_ENA(config->spi_ps_input_addr))
+      num_input_vgprs += 2;
+   if (G_0286CC_LINE_STIPPLE_TEX_ENA(config->spi_ps_input_addr))
+      num_input_vgprs += 1;
+   if (G_0286CC_POS_X_FLOAT_ENA(config->spi_ps_input_addr))
+      num_input_vgprs += 1;
+   if (G_0286CC_POS_Y_FLOAT_ENA(config->spi_ps_input_addr))
+      num_input_vgprs += 1;
+   if (G_0286CC_POS_Z_FLOAT_ENA(config->spi_ps_input_addr))
+      num_input_vgprs += 1;
+   if (G_0286CC_POS_W_FLOAT_ENA(config->spi_ps_input_addr))
+      num_input_vgprs += 1;
+   if (G_0286CC_FRONT_FACE_ENA(config->spi_ps_input_addr)) {
+      face_vgpr_index = num_input_vgprs;
+      num_input_vgprs += 1;
+   }
+   if (G_0286CC_ANCILLARY_ENA(config->spi_ps_input_addr)) {
+      ancillary_vgpr_index = num_input_vgprs;
+      num_input_vgprs += 1;
+   }
+   if (G_0286CC_SAMPLE_COVERAGE_ENA(config->spi_ps_input_addr))
+      num_input_vgprs += 1;
+   if (G_0286CC_POS_FIXED_PT_ENA(config->spi_ps_input_addr))
+      num_input_vgprs += 1;

-	if (face_vgpr_index_ptr)
-		*face_vgpr_index_ptr = face_vgpr_index;
-	if (ancillary_vgpr_index_ptr)
-		*ancillary_vgpr_index_ptr = ancillary_vgpr_index;
+   if (face_vgpr_index_ptr)
+      *face_vgpr_index_ptr = face_vgpr_index;
+   if (ancillary_vgpr_index_ptr)
+      *ancillary_vgpr_index_ptr = ancillary_vgpr_index;

-	return num_input_vgprs;
+   return num_input_vgprs;
 }

-void ac_choose_spi_color_formats(unsigned format, unsigned swap,
-				 unsigned ntype, bool is_depth,
-				 struct ac_spi_color_formats *formats)
+void ac_choose_spi_color_formats(unsigned format, unsigned swap, unsigned ntype, bool is_depth,
+                                 struct ac_spi_color_formats *formats)
 {
   /* Alpha is needed for alpha-to-coverage.
    * Blending may be with or without alpha.
--- a/src/amd/common/ac_shader_util.h
+++ b/src/amd/common/ac_shader_util.h
@ -24,75 +24,64 @@
 #ifndef AC_SHADER_UTIL_H
 #define AC_SHADER_UTIL_H

+#include "ac_binary.h"
+#include "amd_family.h"
+#include "compiler/nir/nir.h"
+
 #include <stdbool.h>
 #include <stdint.h>

-#include "amd_family.h"
-#include "ac_binary.h"
-#include "compiler/nir/nir.h"
-
 #ifdef __cplusplus
 extern "C" {
 #endif

-enum ac_image_dim {
-	ac_image_1d,
-	ac_image_2d,
-	ac_image_3d,
-	ac_image_cube, // includes cube arrays
-	ac_image_1darray,
-	ac_image_2darray,
-	ac_image_2dmsaa,
-	ac_image_2darraymsaa,
+enum ac_image_dim
+{
+   ac_image_1d,
+   ac_image_2d,
+   ac_image_3d,
+   ac_image_cube, // includes cube arrays
+   ac_image_1darray,
+   ac_image_2darray,
+   ac_image_2dmsaa,
+   ac_image_2darraymsaa,
 };

 struct ac_data_format_info {
-	uint8_t element_size;
-	uint8_t num_channels;
-	uint8_t chan_byte_size;
-	uint8_t chan_format;
+   uint8_t element_size;
+   uint8_t num_channels;
+   uint8_t chan_byte_size;
+   uint8_t chan_format;
 };

 struct ac_spi_color_formats {
-	unsigned normal : 8;
-	unsigned alpha : 8;
-	unsigned blend : 8;
-	unsigned blend_alpha : 8;
+   unsigned normal : 8;
+   unsigned alpha : 8;
+   unsigned blend : 8;
+   unsigned blend_alpha : 8;
 };

-unsigned
-ac_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
-			   bool writes_samplemask);
+unsigned ac_get_spi_shader_z_format(bool writes_z, bool writes_stencil, bool writes_samplemask);

-unsigned
-ac_get_cb_shader_mask(unsigned spi_shader_col_format);
+unsigned ac_get_cb_shader_mask(unsigned spi_shader_col_format);

-uint32_t
-ac_vgt_gs_mode(unsigned gs_max_vert_out, enum chip_class chip_class);
+uint32_t ac_vgt_gs_mode(unsigned gs_max_vert_out, enum chip_class chip_class);

-unsigned
-ac_get_tbuffer_format(enum chip_class chip_class,
-		      unsigned dfmt, unsigned nfmt);
+unsigned ac_get_tbuffer_format(enum chip_class chip_class, unsigned dfmt, unsigned nfmt);

-const struct ac_data_format_info *
-ac_get_data_format_info(unsigned dfmt);
+const struct ac_data_format_info *ac_get_data_format_info(unsigned dfmt);

-enum ac_image_dim
-ac_get_sampler_dim(enum chip_class chip_class, enum glsl_sampler_dim dim,
-		   bool is_array);
+enum ac_image_dim ac_get_sampler_dim(enum chip_class chip_class, enum glsl_sampler_dim dim,
+                                     bool is_array);

-enum ac_image_dim
-ac_get_image_dim(enum chip_class chip_class, enum glsl_sampler_dim sdim,
-		 bool is_array);
+enum ac_image_dim ac_get_image_dim(enum chip_class chip_class, enum glsl_sampler_dim sdim,
+                                   bool is_array);

-unsigned
-ac_get_fs_input_vgpr_cnt(const struct ac_shader_config *config,
-			 signed char *face_vgpr_index,
-			 signed char *ancillary_vgpr_index);
+unsigned ac_get_fs_input_vgpr_cnt(const struct ac_shader_config *config,
+                                  signed char *face_vgpr_index, signed char *ancillary_vgpr_index);

-void ac_choose_spi_color_formats(unsigned format, unsigned swap,
-				 unsigned ntype, bool is_depth,
-				 struct ac_spi_color_formats *formats);
+void ac_choose_spi_color_formats(unsigned format, unsigned swap, unsigned ntype, bool is_depth,
+                                 struct ac_spi_color_formats *formats);

 #ifdef __cplusplus
 }
--- a/src/amd/common/ac_shadowed_regs.c
+++ b/src/amd/common/ac_shadowed_regs.c
--- a/src/amd/common/ac_shadowed_regs.h
+++ b/src/amd/common/ac_shadowed_regs.h
@ -35,7 +35,8 @@ struct ac_reg_range {
   unsigned size;
 };

-enum ac_reg_range_type {
+enum ac_reg_range_type
+{
   SI_REG_RANGE_UCONFIG,
   SI_REG_RANGE_CONTEXT,
   SI_REG_RANGE_SH,
@ -46,14 +47,13 @@ enum ac_reg_range_type {
   SI_NUM_ALL_REG_RANGES,
 };

-typedef void (*set_context_reg_seq_array_fn)(struct radeon_cmdbuf *cs, unsigned reg,
-                                             unsigned num, const uint32_t *values);
+typedef void (*set_context_reg_seq_array_fn)(struct radeon_cmdbuf *cs, unsigned reg, unsigned num,
+                                             const uint32_t *values);

 void ac_get_reg_ranges(enum chip_class chip_class, enum radeon_family family,
                       enum ac_reg_range_type type, unsigned *num_ranges,
                       const struct ac_reg_range **ranges);
-void ac_emulate_clear_state(const struct radeon_info *info,
-                            struct radeon_cmdbuf *cs,
+void ac_emulate_clear_state(const struct radeon_info *info, struct radeon_cmdbuf *cs,
                            set_context_reg_seq_array_fn set_context_reg_seq_array);
 void ac_check_shadowed_regs(enum chip_class chip_class, enum radeon_family family,
                            unsigned reg_offset, unsigned count);
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
--- a/src/amd/common/ac_surface.h
+++ b/src/amd/common/ac_surface.h
@ -26,11 +26,11 @@
 #ifndef AC_SURFACE_H
 #define AC_SURFACE_H

-#include <stdint.h>
-#include <stdbool.h>
-
 #include "amd_family.h"

+#include <stdbool.h>
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@ -41,280 +41,274 @@ struct ac_addrlib;
 struct amdgpu_gpu_info;
 struct radeon_info;

-#define RADEON_SURF_MAX_LEVELS                  15
+#define RADEON_SURF_MAX_LEVELS 15

-enum radeon_surf_mode {
-    RADEON_SURF_MODE_LINEAR_ALIGNED = 1,
-    RADEON_SURF_MODE_1D = 2,
-    RADEON_SURF_MODE_2D = 3,
+enum radeon_surf_mode
+{
+   RADEON_SURF_MODE_LINEAR_ALIGNED = 1,
+   RADEON_SURF_MODE_1D = 2,
+   RADEON_SURF_MODE_2D = 3,
 };

 /* This describes D/S/Z/R swizzle modes.
 * Defined in the GB_TILE_MODEn.MICRO_TILE_MODE_NEW order.
 */
-enum radeon_micro_mode {
-    RADEON_MICRO_MODE_DISPLAY = 0,
-    RADEON_MICRO_MODE_STANDARD = 1,
-    RADEON_MICRO_MODE_DEPTH = 2,
-    RADEON_MICRO_MODE_RENDER = 3, /* gfx9 and older: rotated */
+enum radeon_micro_mode
+{
+   RADEON_MICRO_MODE_DISPLAY = 0,
+   RADEON_MICRO_MODE_STANDARD = 1,
+   RADEON_MICRO_MODE_DEPTH = 2,
+   RADEON_MICRO_MODE_RENDER = 3, /* gfx9 and older: rotated */
 };

 /* the first 16 bits are reserved for libdrm_radeon, don't use them */
-#define RADEON_SURF_SCANOUT                     (1 << 16)
-#define RADEON_SURF_ZBUFFER                     (1 << 17)
-#define RADEON_SURF_SBUFFER                     (1 << 18)
-#define RADEON_SURF_Z_OR_SBUFFER                (RADEON_SURF_ZBUFFER | RADEON_SURF_SBUFFER)
+#define RADEON_SURF_SCANOUT      (1 << 16)
+#define RADEON_SURF_ZBUFFER      (1 << 17)
+#define RADEON_SURF_SBUFFER      (1 << 18)
+#define RADEON_SURF_Z_OR_SBUFFER (RADEON_SURF_ZBUFFER | RADEON_SURF_SBUFFER)
 /* bits 19 and 20 are reserved for libdrm_radeon, don't use them */
-#define RADEON_SURF_FMASK                       (1 << 21)
-#define RADEON_SURF_DISABLE_DCC                 (1 << 22)
-#define RADEON_SURF_TC_COMPATIBLE_HTILE         (1 << 23)
-#define RADEON_SURF_IMPORTED                    (1 << 24)
-#define RADEON_SURF_CONTIGUOUS_DCC_LAYERS       (1 << 25)
-#define RADEON_SURF_SHAREABLE                   (1 << 26)
-#define RADEON_SURF_NO_RENDER_TARGET            (1 << 27)
+#define RADEON_SURF_FMASK                 (1 << 21)
+#define RADEON_SURF_DISABLE_DCC           (1 << 22)
+#define RADEON_SURF_TC_COMPATIBLE_HTILE   (1 << 23)
+#define RADEON_SURF_IMPORTED              (1 << 24)
+#define RADEON_SURF_CONTIGUOUS_DCC_LAYERS (1 << 25)
+#define RADEON_SURF_SHAREABLE             (1 << 26)
+#define RADEON_SURF_NO_RENDER_TARGET      (1 << 27)
 /* Force a swizzle mode (gfx9+) or tile mode (gfx6-8).
 * If this is not set, optimize for space. */
-#define RADEON_SURF_FORCE_SWIZZLE_MODE          (1 << 28)
-#define RADEON_SURF_NO_FMASK                    (1 << 29)
-#define RADEON_SURF_NO_HTILE                    (1 << 30)
-#define RADEON_SURF_FORCE_MICRO_TILE_MODE       (1u << 31)
+#define RADEON_SURF_FORCE_SWIZZLE_MODE    (1 << 28)
+#define RADEON_SURF_NO_FMASK              (1 << 29)
+#define RADEON_SURF_NO_HTILE              (1 << 30)
+#define RADEON_SURF_FORCE_MICRO_TILE_MODE (1u << 31)

 struct legacy_surf_level {
-    uint64_t                    offset;
-    uint32_t                    slice_size_dw; /* in dwords; max = 4GB / 4. */
-    uint32_t                    dcc_offset; /* relative offset within DCC mip tree */
-    uint32_t                    dcc_fast_clear_size;
-    uint32_t                    dcc_slice_fast_clear_size;
-    unsigned                    nblk_x:15;
-    unsigned                    nblk_y:15;
-    enum radeon_surf_mode       mode:2;
+   uint64_t offset;
+   uint32_t slice_size_dw; /* in dwords; max = 4GB / 4. */
+   uint32_t dcc_offset;    /* relative offset within DCC mip tree */
+   uint32_t dcc_fast_clear_size;
+   uint32_t dcc_slice_fast_clear_size;
+   unsigned nblk_x : 15;
+   unsigned nblk_y : 15;
+   enum radeon_surf_mode mode : 2;
 };

 struct legacy_surf_fmask {
-    unsigned slice_tile_max; /* max 4M */
-    uint8_t tiling_index;    /* max 31 */
-    uint8_t bankh;           /* max 8 */
-    uint16_t pitch_in_pixels;
-    uint64_t slice_size;
+   unsigned slice_tile_max; /* max 4M */
+   uint8_t tiling_index;    /* max 31 */
+   uint8_t bankh;           /* max 8 */
+   uint16_t pitch_in_pixels;
+   uint64_t slice_size;
 };

 struct legacy_surf_layout {
-    unsigned                    bankw:4;  /* max 8 */
-    unsigned                    bankh:4;  /* max 8 */
-    unsigned                    mtilea:4; /* max 8 */
-    unsigned                    tile_split:13;         /* max 4K */
-    unsigned                    stencil_tile_split:13; /* max 4K */
-    unsigned                    pipe_config:5;      /* max 17 */
-    unsigned                    num_banks:5;        /* max 16 */
-    unsigned                    macro_tile_index:4; /* max 15 */
+   unsigned bankw : 4;               /* max 8 */
+   unsigned bankh : 4;               /* max 8 */
+   unsigned mtilea : 4;              /* max 8 */
+   unsigned tile_split : 13;         /* max 4K */
+   unsigned stencil_tile_split : 13; /* max 4K */
+   unsigned pipe_config : 5;         /* max 17 */
+   unsigned num_banks : 5;           /* max 16 */
+   unsigned macro_tile_index : 4;    /* max 15 */

-    /* Whether the depth miptree or stencil miptree as used by the DB are
-     * adjusted from their TC compatible form to ensure depth/stencil
-     * compatibility. If either is true, the corresponding plane cannot be
-     * sampled from.
-     */
-    unsigned                    depth_adjusted:1;
-    unsigned                    stencil_adjusted:1;
+   /* Whether the depth miptree or stencil miptree as used by the DB are
+    * adjusted from their TC compatible form to ensure depth/stencil
+    * compatibility. If either is true, the corresponding plane cannot be
+    * sampled from.
+    */
+   unsigned depth_adjusted : 1;
+   unsigned stencil_adjusted : 1;

-    struct legacy_surf_level    level[RADEON_SURF_MAX_LEVELS];
-    struct legacy_surf_level    stencil_level[RADEON_SURF_MAX_LEVELS];
-    uint8_t                     tiling_index[RADEON_SURF_MAX_LEVELS];
-    uint8_t                     stencil_tiling_index[RADEON_SURF_MAX_LEVELS];
-    struct legacy_surf_fmask    fmask;
-    unsigned                    cmask_slice_tile_max;
+   struct legacy_surf_level level[RADEON_SURF_MAX_LEVELS];
+   struct legacy_surf_level stencil_level[RADEON_SURF_MAX_LEVELS];
+   uint8_t tiling_index[RADEON_SURF_MAX_LEVELS];
+   uint8_t stencil_tiling_index[RADEON_SURF_MAX_LEVELS];
+   struct legacy_surf_fmask fmask;
+   unsigned cmask_slice_tile_max;
 };

 /* Same as addrlib - AddrResourceType. */
-enum gfx9_resource_type {
-    RADEON_RESOURCE_1D = 0,
-    RADEON_RESOURCE_2D,
-    RADEON_RESOURCE_3D,
+enum gfx9_resource_type
+{
+   RADEON_RESOURCE_1D = 0,
+   RADEON_RESOURCE_2D,
+   RADEON_RESOURCE_3D,
 };

 struct gfx9_surf_flags {
-    uint16_t                    swizzle_mode; /* tile mode */
-    uint16_t                    epitch; /* (pitch - 1) or (height - 1) */
+   uint16_t swizzle_mode; /* tile mode */
+   uint16_t epitch;       /* (pitch - 1) or (height - 1) */
 };

 struct gfx9_surf_meta_flags {
-    unsigned                    rb_aligned:1;   /* optimal for RBs */
-    unsigned                    pipe_aligned:1; /* optimal for TC */
-    unsigned                    independent_64B_blocks:1;
-    unsigned                    independent_128B_blocks:1;
-    unsigned                    max_compressed_block_size:2;
+   unsigned rb_aligned : 1;   /* optimal for RBs */
+   unsigned pipe_aligned : 1; /* optimal for TC */
+   unsigned independent_64B_blocks : 1;
+   unsigned independent_128B_blocks : 1;
+   unsigned max_compressed_block_size : 2;
 };

 struct gfx9_surf_layout {
-    struct gfx9_surf_flags      surf;    /* color or depth surface */
-    struct gfx9_surf_flags      fmask;   /* not added to surf_size */
-    struct gfx9_surf_flags      stencil; /* added to surf_size, use stencil_offset */
+   struct gfx9_surf_flags surf;    /* color or depth surface */
+   struct gfx9_surf_flags fmask;   /* not added to surf_size */
+   struct gfx9_surf_flags stencil; /* added to surf_size, use stencil_offset */

-    struct gfx9_surf_meta_flags dcc;   /* metadata of color */
+   struct gfx9_surf_meta_flags dcc; /* metadata of color */

-    enum gfx9_resource_type     resource_type; /* 1D, 2D or 3D */
-    uint16_t                    surf_pitch; /* in blocks */
-    uint16_t                    surf_height;
+   enum gfx9_resource_type resource_type; /* 1D, 2D or 3D */
+   uint16_t surf_pitch;                   /* in blocks */
+   uint16_t surf_height;

-    uint64_t                    surf_offset; /* 0 unless imported with an offset */
-    /* The size of the 2D plane containing all mipmap levels. */
-    uint64_t                    surf_slice_size;
-    /* Mipmap level offset within the slice in bytes. Only valid for LINEAR. */
-    uint32_t                    offset[RADEON_SURF_MAX_LEVELS];
-    /* Mipmap level pitch in elements. Only valid for LINEAR. */
-    uint16_t                    pitch[RADEON_SURF_MAX_LEVELS];
+   uint64_t surf_offset; /* 0 unless imported with an offset */
+   /* The size of the 2D plane containing all mipmap levels. */
+   uint64_t surf_slice_size;
+   /* Mipmap level offset within the slice in bytes. Only valid for LINEAR. */
+   uint32_t offset[RADEON_SURF_MAX_LEVELS];
+   /* Mipmap level pitch in elements. Only valid for LINEAR. */
+   uint16_t pitch[RADEON_SURF_MAX_LEVELS];

-    uint64_t                    stencil_offset; /* separate stencil */
+   uint64_t stencil_offset; /* separate stencil */

-    uint8_t                     dcc_block_width;
-    uint8_t                     dcc_block_height;
-    uint8_t                     dcc_block_depth;
+   uint8_t dcc_block_width;
+   uint8_t dcc_block_height;
+   uint8_t dcc_block_depth;

-    /* Displayable DCC. This is always rb_aligned=0 and pipe_aligned=0.
-     * The 3D engine doesn't support that layout except for chips with 1 RB.
-     * All other chips must set rb_aligned=1.
-     * A compute shader needs to convert from aligned DCC to unaligned.
-     */
-    uint32_t                    display_dcc_size;
-    uint32_t                    display_dcc_alignment;
-    uint16_t                    display_dcc_pitch_max;  /* (mip chain pitch - 1) */
-    bool                        dcc_retile_use_uint16; /* if all values fit into uint16_t */
-    uint32_t                    dcc_retile_num_elements;
-    void                        *dcc_retile_map;
+   /* Displayable DCC. This is always rb_aligned=0 and pipe_aligned=0.
+    * The 3D engine doesn't support that layout except for chips with 1 RB.
+    * All other chips must set rb_aligned=1.
+    * A compute shader needs to convert from aligned DCC to unaligned.
+    */
+   uint32_t display_dcc_size;
+   uint32_t display_dcc_alignment;
+   uint16_t display_dcc_pitch_max; /* (mip chain pitch - 1) */
+   bool dcc_retile_use_uint16;     /* if all values fit into uint16_t */
+   uint32_t dcc_retile_num_elements;
+   void *dcc_retile_map;
 };

 struct radeon_surf {
-    /* Format properties. */
-    unsigned                    blk_w:4;
-    unsigned                    blk_h:4;
-    unsigned                    bpe:5;
-    /* Number of mipmap levels where DCC is enabled starting from level 0.
-     * Non-zero levels may be disabled due to alignment constraints, but not
-     * the first level.
-     */
-    unsigned                    num_dcc_levels:4;
-    unsigned                    is_linear:1;
-    unsigned                    has_stencil:1;
-    /* This might be true even if micro_tile_mode isn't displayable or rotated. */
-    unsigned                    is_displayable:1;
-    /* Displayable, thin, depth, rotated. AKA D,S,Z,R swizzle modes. */
-    unsigned                    micro_tile_mode:3;
-    uint32_t                    flags;
+   /* Format properties. */
+   unsigned blk_w : 4;
+   unsigned blk_h : 4;
+   unsigned bpe : 5;
+   /* Number of mipmap levels where DCC is enabled starting from level 0.
+    * Non-zero levels may be disabled due to alignment constraints, but not
+    * the first level.
+    */
+   unsigned num_dcc_levels : 4;
+   unsigned is_linear : 1;
+   unsigned has_stencil : 1;
+   /* This might be true even if micro_tile_mode isn't displayable or rotated. */
+   unsigned is_displayable : 1;
+   /* Displayable, thin, depth, rotated. AKA D,S,Z,R swizzle modes. */
+   unsigned micro_tile_mode : 3;
+   uint32_t flags;

-    /* These are return values. Some of them can be set by the caller, but
-     * they will be treated as hints (e.g. bankw, bankh) and might be
-     * changed by the calculator.
-     */
+   /* These are return values. Some of them can be set by the caller, but
+    * they will be treated as hints (e.g. bankw, bankh) and might be
+    * changed by the calculator.
+    */

-    /* Tile swizzle can be OR'd with low bits of the BASE_256B address.
-     * The value is the same for all mipmap levels. Supported tile modes:
-     * - GFX6: Only macro tiling.
-     * - GFX9: Only *_X and *_T swizzle modes. Level 0 must not be in the mip
-     *   tail.
-     *
-     * Only these surfaces are allowed to set it:
-     * - color (if it doesn't have to be displayable)
-     * - DCC (same tile swizzle as color)
-     * - FMASK
-     * - CMASK if it's TC-compatible or if the gen is GFX9
-     * - depth/stencil if HTILE is not TC-compatible and if the gen is not GFX9
-     */
-    uint8_t                     tile_swizzle;
-    uint8_t                     fmask_tile_swizzle;
+   /* Tile swizzle can be OR'd with low bits of the BASE_256B address.
+    * The value is the same for all mipmap levels. Supported tile modes:
+    * - GFX6: Only macro tiling.
+    * - GFX9: Only *_X and *_T swizzle modes. Level 0 must not be in the mip
+    *   tail.
+    *
+    * Only these surfaces are allowed to set it:
+    * - color (if it doesn't have to be displayable)
+    * - DCC (same tile swizzle as color)
+    * - FMASK
+    * - CMASK if it's TC-compatible or if the gen is GFX9
+    * - depth/stencil if HTILE is not TC-compatible and if the gen is not GFX9
+    */
+   uint8_t tile_swizzle;
+   uint8_t fmask_tile_swizzle;

-    uint64_t                    surf_size;
-    uint64_t                    fmask_size;
-    uint32_t                    surf_alignment;
-    uint32_t                    fmask_alignment;
+   uint64_t surf_size;
+   uint64_t fmask_size;
+   uint32_t surf_alignment;
+   uint32_t fmask_alignment;

-    /* DCC and HTILE are very small. */
-    uint32_t                    dcc_size;
-    uint32_t                    dcc_slice_size;
-    uint32_t                    dcc_alignment;
+   /* DCC and HTILE are very small. */
+   uint32_t dcc_size;
+   uint32_t dcc_slice_size;
+   uint32_t dcc_alignment;

-    uint32_t                    htile_size;
-    uint32_t                    htile_slice_size;
-    uint32_t                    htile_alignment;
+   uint32_t htile_size;
+   uint32_t htile_slice_size;
+   uint32_t htile_alignment;

-    uint32_t                    cmask_size;
-    uint32_t                    cmask_slice_size;
-    uint32_t                    cmask_alignment;
+   uint32_t cmask_size;
+   uint32_t cmask_slice_size;
+   uint32_t cmask_alignment;

-    /* All buffers combined. */
-    uint64_t                    htile_offset;
-    uint64_t                    fmask_offset;
-    uint64_t                    cmask_offset;
-    uint64_t                    dcc_offset;
-    uint64_t                    display_dcc_offset;
-    uint64_t                    dcc_retile_map_offset;
-    uint64_t                    total_size;
-    uint32_t                    alignment;
+   /* All buffers combined. */
+   uint64_t htile_offset;
+   uint64_t fmask_offset;
+   uint64_t cmask_offset;
+   uint64_t dcc_offset;
+   uint64_t display_dcc_offset;
+   uint64_t dcc_retile_map_offset;
+   uint64_t total_size;
+   uint32_t alignment;

-    union {
-        /* Return values for GFX8 and older.
-         *
-         * Some of them can be set by the caller if certain parameters are
-         * desirable. The allocator will try to obey them.
-         */
-        struct legacy_surf_layout legacy;
+   union {
+      /* Return values for GFX8 and older.
+       *
+       * Some of them can be set by the caller if certain parameters are
+       * desirable. The allocator will try to obey them.
+       */
+      struct legacy_surf_layout legacy;

-        /* GFX9+ return values. */
-        struct gfx9_surf_layout gfx9;
-    } u;
+      /* GFX9+ return values. */
+      struct gfx9_surf_layout gfx9;
+   } u;
 };

 struct ac_surf_info {
-	uint32_t width;
-	uint32_t height;
-	uint32_t depth;
-	uint8_t samples; /* For Z/S: samples; For color: FMASK coverage samples */
-	uint8_t storage_samples; /* For color: allocated samples */
-	uint8_t levels;
-	uint8_t num_channels; /* heuristic for displayability */
-	uint16_t array_size;
-	uint32_t *surf_index; /* Set a monotonic counter for tile swizzling. */
-	uint32_t *fmask_surf_index;
+   uint32_t width;
+   uint32_t height;
+   uint32_t depth;
+   uint8_t samples;         /* For Z/S: samples; For color: FMASK coverage samples */
+   uint8_t storage_samples; /* For color: allocated samples */
+   uint8_t levels;
+   uint8_t num_channels; /* heuristic for displayability */
+   uint16_t array_size;
+   uint32_t *surf_index; /* Set a monotonic counter for tile swizzling. */
+   uint32_t *fmask_surf_index;
 };

 struct ac_surf_config {
-	struct ac_surf_info info;
-	unsigned is_1d : 1;
-	unsigned is_3d : 1;
-	unsigned is_cube : 1;
+   struct ac_surf_info info;
+   unsigned is_1d : 1;
+   unsigned is_3d : 1;
+   unsigned is_cube : 1;
 };

 struct ac_addrlib *ac_addrlib_create(const struct radeon_info *info,
-				     const struct amdgpu_gpu_info *amdinfo,
-				     uint64_t *max_alignment);
+                                     const struct amdgpu_gpu_info *amdinfo,
+                                     uint64_t *max_alignment);
 void ac_addrlib_destroy(struct ac_addrlib *addrlib);

 int ac_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *info,
-		       const struct ac_surf_config * config,
-		       enum radeon_surf_mode mode,
-		       struct radeon_surf *surf);
+                       const struct ac_surf_config *config, enum radeon_surf_mode mode,
+                       struct radeon_surf *surf);
 void ac_surface_zero_dcc_fields(struct radeon_surf *surf);

-void ac_surface_set_bo_metadata(const struct radeon_info *info,
-                                struct radeon_surf *surf, uint64_t tiling_flags,
-                                enum radeon_surf_mode *mode);
-void ac_surface_get_bo_metadata(const struct radeon_info *info,
-                                struct radeon_surf *surf, uint64_t *tiling_flags);
+void ac_surface_set_bo_metadata(const struct radeon_info *info, struct radeon_surf *surf,
+                                uint64_t tiling_flags, enum radeon_surf_mode *mode);
+void ac_surface_get_bo_metadata(const struct radeon_info *info, struct radeon_surf *surf,
+                                uint64_t *tiling_flags);

-bool ac_surface_set_umd_metadata(const struct radeon_info *info,
-                                 struct radeon_surf *surf,
-                                 unsigned num_storage_samples,
-                                 unsigned num_mipmap_levels,
-                                 unsigned size_metadata,
-                                 uint32_t metadata[64]);
-void ac_surface_get_umd_metadata(const struct radeon_info *info,
-                                 struct radeon_surf *surf,
-                                 unsigned num_mipmap_levels,
-                                 uint32_t desc[8],
+bool ac_surface_set_umd_metadata(const struct radeon_info *info, struct radeon_surf *surf,
+                                 unsigned num_storage_samples, unsigned num_mipmap_levels,
+                                 unsigned size_metadata, uint32_t metadata[64]);
+void ac_surface_get_umd_metadata(const struct radeon_info *info, struct radeon_surf *surf,
+                                 unsigned num_mipmap_levels, uint32_t desc[8],
                                 unsigned *size_metadata, uint32_t metadata[64]);

-void ac_surface_override_offset_stride(const struct radeon_info *info,
-                                       struct radeon_surf *surf,
-                                       unsigned num_mipmap_levels,
-                                       uint64_t offset, unsigned pitch);
+void ac_surface_override_offset_stride(const struct radeon_info *info, struct radeon_surf *surf,
+                                       unsigned num_mipmap_levels, uint64_t offset, unsigned pitch);

 #ifdef __cplusplus
 }
--- a/src/amd/common/amd_family.h
+++ b/src/amd/common/amd_family.h
@ -24,117 +24,120 @@
 #ifndef AMD_FAMILY_H
 #define AMD_FAMILY_H

-enum radeon_family {
-    CHIP_UNKNOWN = 0,
-    CHIP_R300,     /* R3xx-based cores. (GFX2) */
-    CHIP_R350,
-    CHIP_RV350,
-    CHIP_RV370,
-    CHIP_RV380,
-    CHIP_RS400,
-    CHIP_RC410,
-    CHIP_RS480,
-    CHIP_R420,     /* R4xx-based cores. (GFX2) */
-    CHIP_R423,
-    CHIP_R430,
-    CHIP_R480,
-    CHIP_R481,
-    CHIP_RV410,
-    CHIP_RS600,
-    CHIP_RS690,
-    CHIP_RS740,
-    CHIP_RV515,    /* R5xx-based cores. (GFX2) */
-    CHIP_R520,
-    CHIP_RV530,
-    CHIP_R580,
-    CHIP_RV560,
-    CHIP_RV570,
-    CHIP_R600,     /* GFX3 (R6xx) */
-    CHIP_RV610,
-    CHIP_RV630,
-    CHIP_RV670,
-    CHIP_RV620,
-    CHIP_RV635,
-    CHIP_RS780,
-    CHIP_RS880,
-    CHIP_RV770,    /* GFX3 (R7xx) */
-    CHIP_RV730,
-    CHIP_RV710,
-    CHIP_RV740,
-    CHIP_CEDAR,    /* GFX4 (Evergreen) */
-    CHIP_REDWOOD,
-    CHIP_JUNIPER,
-    CHIP_CYPRESS,
-    CHIP_HEMLOCK,
-    CHIP_PALM,
-    CHIP_SUMO,
-    CHIP_SUMO2,
-    CHIP_BARTS,
-    CHIP_TURKS,
-    CHIP_CAICOS,
-    CHIP_CAYMAN,   /* GFX5 (Northern Islands) */
-    CHIP_ARUBA,
-    CHIP_TAHITI,   /* GFX6 (Southern Islands) */
-    CHIP_PITCAIRN,
-    CHIP_VERDE,
-    CHIP_OLAND,
-    CHIP_HAINAN,
-    CHIP_BONAIRE,  /* GFX7 (Sea Islands) */
-    CHIP_KAVERI,
-    CHIP_KABINI,
-    CHIP_HAWAII,
-    CHIP_TONGA,    /* GFX8 (Volcanic Islands & Polaris) */
-    CHIP_ICELAND,
-    CHIP_CARRIZO,
-    CHIP_FIJI,
-    CHIP_STONEY,
-    CHIP_POLARIS10,
-    CHIP_POLARIS11,
-    CHIP_POLARIS12,
-    CHIP_VEGAM,
-    CHIP_VEGA10,   /* GFX9 (Vega) */
-    CHIP_VEGA12,
-    CHIP_VEGA20,
-    CHIP_RAVEN,
-    CHIP_RAVEN2,
-    CHIP_RENOIR,
-    CHIP_ARCTURUS,
-    CHIP_NAVI10,
-    CHIP_NAVI12,
-    CHIP_NAVI14,
-    CHIP_SIENNA_CICHLID,
-    CHIP_NAVY_FLOUNDER,
-    CHIP_LAST,
+enum radeon_family
+{
+   CHIP_UNKNOWN = 0,
+   CHIP_R300, /* R3xx-based cores. (GFX2) */
+   CHIP_R350,
+   CHIP_RV350,
+   CHIP_RV370,
+   CHIP_RV380,
+   CHIP_RS400,
+   CHIP_RC410,
+   CHIP_RS480,
+   CHIP_R420, /* R4xx-based cores. (GFX2) */
+   CHIP_R423,
+   CHIP_R430,
+   CHIP_R480,
+   CHIP_R481,
+   CHIP_RV410,
+   CHIP_RS600,
+   CHIP_RS690,
+   CHIP_RS740,
+   CHIP_RV515, /* R5xx-based cores. (GFX2) */
+   CHIP_R520,
+   CHIP_RV530,
+   CHIP_R580,
+   CHIP_RV560,
+   CHIP_RV570,
+   CHIP_R600, /* GFX3 (R6xx) */
+   CHIP_RV610,
+   CHIP_RV630,
+   CHIP_RV670,
+   CHIP_RV620,
+   CHIP_RV635,
+   CHIP_RS780,
+   CHIP_RS880,
+   CHIP_RV770, /* GFX3 (R7xx) */
+   CHIP_RV730,
+   CHIP_RV710,
+   CHIP_RV740,
+   CHIP_CEDAR, /* GFX4 (Evergreen) */
+   CHIP_REDWOOD,
+   CHIP_JUNIPER,
+   CHIP_CYPRESS,
+   CHIP_HEMLOCK,
+   CHIP_PALM,
+   CHIP_SUMO,
+   CHIP_SUMO2,
+   CHIP_BARTS,
+   CHIP_TURKS,
+   CHIP_CAICOS,
+   CHIP_CAYMAN, /* GFX5 (Northern Islands) */
+   CHIP_ARUBA,
+   CHIP_TAHITI, /* GFX6 (Southern Islands) */
+   CHIP_PITCAIRN,
+   CHIP_VERDE,
+   CHIP_OLAND,
+   CHIP_HAINAN,
+   CHIP_BONAIRE, /* GFX7 (Sea Islands) */
+   CHIP_KAVERI,
+   CHIP_KABINI,
+   CHIP_HAWAII,
+   CHIP_TONGA, /* GFX8 (Volcanic Islands & Polaris) */
+   CHIP_ICELAND,
+   CHIP_CARRIZO,
+   CHIP_FIJI,
+   CHIP_STONEY,
+   CHIP_POLARIS10,
+   CHIP_POLARIS11,
+   CHIP_POLARIS12,
+   CHIP_VEGAM,
+   CHIP_VEGA10, /* GFX9 (Vega) */
+   CHIP_VEGA12,
+   CHIP_VEGA20,
+   CHIP_RAVEN,
+   CHIP_RAVEN2,
+   CHIP_RENOIR,
+   CHIP_ARCTURUS,
+   CHIP_NAVI10,
+   CHIP_NAVI12,
+   CHIP_NAVI14,
+   CHIP_SIENNA_CICHLID,
+   CHIP_NAVY_FLOUNDER,
+   CHIP_LAST,
 };

-enum chip_class {
-    CLASS_UNKNOWN = 0,
-    R300,
-    R400,
-    R500,
-    R600,
-    R700,
-    EVERGREEN,
-    CAYMAN,
-    GFX6,
-    GFX7,
-    GFX8,
-    GFX9,
-    GFX10,
-    GFX10_3,
+enum chip_class
+{
+   CLASS_UNKNOWN = 0,
+   R300,
+   R400,
+   R500,
+   R600,
+   R700,
+   EVERGREEN,
+   CAYMAN,
+   GFX6,
+   GFX7,
+   GFX8,
+   GFX9,
+   GFX10,
+   GFX10_3,
 };

-enum ring_type {
-    RING_GFX = 0,
-    RING_COMPUTE,
-    RING_DMA,
-    RING_UVD,
-    RING_VCE,
-    RING_UVD_ENC,
-    RING_VCN_DEC,
-    RING_VCN_ENC,
-    RING_VCN_JPEG,
-    NUM_RING_TYPES,
+enum ring_type
+{
+   RING_GFX = 0,
+   RING_COMPUTE,
+   RING_DMA,
+   RING_UVD,
+   RING_VCE,
+   RING_UVD_ENC,
+   RING_VCN_DEC,
+   RING_VCN_ENC,
+   RING_VCN_JPEG,
+   NUM_RING_TYPES,
 };

 #endif
--- a/src/amd/common/amd_kernel_code_t.h
+++ b/src/amd/common/amd_kernel_code_t.h
@ -30,13 +30,12 @@
 //---------------------------------------------------------------------------//

 // Sets val bits for specified mask in specified dst packed instance.
-#define AMD_HSA_BITS_SET(dst, mask, val)                                       \
-  dst &= (~(1 << mask ## _SHIFT) & ~mask);                                     \
-  dst |= (((val) << mask ## _SHIFT) & mask)
+#define AMD_HSA_BITS_SET(dst, mask, val)                                                           \
+   dst &= (~(1 << mask##_SHIFT) & ~mask);                                                          \
+   dst |= (((val) << mask##_SHIFT) & mask)

 // Gets bits for specified mask from specified src packed instance.
-#define AMD_HSA_BITS_GET(src, mask)                                            \
-  ((src & mask) >> mask ## _SHIFT)
+#define AMD_HSA_BITS_GET(src, mask) ((src & mask) >> mask##_SHIFT)

 /* Every amd_*_code_t has the following properties, which are composed of
 * a number of bit fields. Every bit field has a mask (AMD_CODE_PROPERTY_*),
@ -47,132 +46,164 @@
 * implementation defined in the C standard and so cannot be used to
 * specify an ABI)
 */
-enum amd_code_property_mask_t {
+enum amd_code_property_mask_t
+{

-  /* Enable the setup of the SGPR user data registers
-   * (AMD_CODE_PROPERTY_ENABLE_SGPR_*), see documentation of amd_kernel_code_t
-   * for initial register state.
-   *
-   * The total number of SGPRuser data registers requested must not
-   * exceed 16. Any requests beyond 16 will be ignored.
-   *
-   * Used to set COMPUTE_PGM_RSRC2.USER_SGPR (set to total count of
-   * SGPR user data registers enabled up to 16).
-   */
+   /* Enable the setup of the SGPR user data registers
+    * (AMD_CODE_PROPERTY_ENABLE_SGPR_*), see documentation of amd_kernel_code_t
+    * for initial register state.
+    *
+    * The total number of SGPRuser data registers requested must not
+    * exceed 16. Any requests beyond 16 will be ignored.
+    *
+    * Used to set COMPUTE_PGM_RSRC2.USER_SGPR (set to total count of
+    * SGPR user data registers enabled up to 16).
+    */

-  AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT = 0,
-  AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_WIDTH = 1,
-  AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,
+   AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT = 0,
+   AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_WIDTH = 1,
+   AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER =
+      ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_WIDTH) - 1)
+      << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,

-  AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT = 1,
-  AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_WIDTH = 1,
-  AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,
+   AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT = 1,
+   AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_WIDTH = 1,
+   AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR =
+      ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_WIDTH) - 1)
+      << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,

-  AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT = 2,
-  AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_WIDTH = 1,
-  AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,
+   AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT = 2,
+   AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_WIDTH = 1,
+   AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR =
+      ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_WIDTH) - 1)
+      << AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,

-  AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT = 3,
-  AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_WIDTH = 1,
-  AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
+   AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT = 3,
+   AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_WIDTH = 1,
+   AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR =
+      ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_WIDTH) - 1)
+      << AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,

-  AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT = 4,
-  AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_WIDTH = 1,
-  AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,
+   AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT = 4,
+   AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_WIDTH = 1,
+   AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID =
+      ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_WIDTH) - 1)
+      << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,

-  AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT = 5,
-  AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_WIDTH = 1,
-  AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,
+   AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT = 5,
+   AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_WIDTH = 1,
+   AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT =
+      ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_WIDTH) - 1)
+      << AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,

-  AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT = 6,
-  AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_WIDTH = 1,
-  AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
+   AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT = 6,
+   AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_WIDTH = 1,
+   AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE =
+      ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_WIDTH) - 1)
+      << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,

-  AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_SHIFT = 7,
-  AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_WIDTH = 1,
-  AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_SHIFT,
+   AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_SHIFT = 7,
+   AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_WIDTH = 1,
+   AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X =
+      ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_WIDTH) - 1)
+      << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_SHIFT,

-  AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_SHIFT = 8,
-  AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_WIDTH = 1,
-  AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_SHIFT,
+   AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_SHIFT = 8,
+   AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_WIDTH = 1,
+   AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y =
+      ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_WIDTH) - 1)
+      << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_SHIFT,

-  AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT = 9,
-  AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH = 1,
-  AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT,
+   AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT = 9,
+   AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH = 1,
+   AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z =
+      ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH) - 1)
+      << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT,

-  AMD_CODE_PROPERTY_RESERVED1_SHIFT = 10,
-  AMD_CODE_PROPERTY_RESERVED1_WIDTH = 6,
-  AMD_CODE_PROPERTY_RESERVED1 = ((1 << AMD_CODE_PROPERTY_RESERVED1_WIDTH) - 1) << AMD_CODE_PROPERTY_RESERVED1_SHIFT,
+   AMD_CODE_PROPERTY_RESERVED1_SHIFT = 10,
+   AMD_CODE_PROPERTY_RESERVED1_WIDTH = 6,
+   AMD_CODE_PROPERTY_RESERVED1 = ((1 << AMD_CODE_PROPERTY_RESERVED1_WIDTH) - 1)
+                                 << AMD_CODE_PROPERTY_RESERVED1_SHIFT,

-  /* Control wave ID base counter for GDS ordered-append. Used to set
-   * COMPUTE_DISPATCH_INITIATOR.ORDERED_APPEND_ENBL. (Not sure if
-   * ORDERED_APPEND_MODE also needs to be settable)
-   */
-  AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_SHIFT = 16,
-  AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_WIDTH = 1,
-  AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS = ((1 << AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_SHIFT,
+   /* Control wave ID base counter for GDS ordered-append. Used to set
+    * COMPUTE_DISPATCH_INITIATOR.ORDERED_APPEND_ENBL. (Not sure if
+    * ORDERED_APPEND_MODE also needs to be settable)
+    */
+   AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_SHIFT = 16,
+   AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_WIDTH = 1,
+   AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS =
+      ((1 << AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_WIDTH) - 1)
+      << AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_SHIFT,

-  /* The interleave (swizzle) element size in bytes required by the
-   * code for private memory. This must be 2, 4, 8 or 16. This value
-   * is provided to the finalizer when it is invoked and is recorded
-   * here. The hardware will interleave the memory requests of each
-   * lane of a wavefront by this element size to ensure each
-   * work-item gets a distinct memory memory location. Therefore, the
-   * finalizer ensures that all load and store operations done to
-   * private memory do not exceed this size. For example, if the
-   * element size is 4 (32-bits or dword) and a 64-bit value must be
-   * loaded, the finalizer will generate two 32-bit loads. This
-   * ensures that the interleaving will get the work-item
-   * specific dword for both halves of the 64-bit value. If it just
-   * did a 64-bit load then it would get one dword which belonged to
-   * its own work-item, but the second dword would belong to the
-   * adjacent lane work-item since the interleaving is in dwords.
-   *
-   * The value used must match the value that the runtime configures
-   * the GPU flat scratch (SH_STATIC_MEM_CONFIG.ELEMENT_SIZE). This
-   * is generally DWORD.
-   *
-   * USE VALUES FROM THE AMD_ELEMENT_BYTE_SIZE_T ENUM.
-   */
-  AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT = 17,
-  AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH = 2,
-  AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE = ((1 << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH) - 1) << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT,
+   /* The interleave (swizzle) element size in bytes required by the
+    * code for private memory. This must be 2, 4, 8 or 16. This value
+    * is provided to the finalizer when it is invoked and is recorded
+    * here. The hardware will interleave the memory requests of each
+    * lane of a wavefront by this element size to ensure each
+    * work-item gets a distinct memory memory location. Therefore, the
+    * finalizer ensures that all load and store operations done to
+    * private memory do not exceed this size. For example, if the
+    * element size is 4 (32-bits or dword) and a 64-bit value must be
+    * loaded, the finalizer will generate two 32-bit loads. This
+    * ensures that the interleaving will get the work-item
+    * specific dword for both halves of the 64-bit value. If it just
+    * did a 64-bit load then it would get one dword which belonged to
+    * its own work-item, but the second dword would belong to the
+    * adjacent lane work-item since the interleaving is in dwords.
+    *
+    * The value used must match the value that the runtime configures
+    * the GPU flat scratch (SH_STATIC_MEM_CONFIG.ELEMENT_SIZE). This
+    * is generally DWORD.
+    *
+    * USE VALUES FROM THE AMD_ELEMENT_BYTE_SIZE_T ENUM.
+    */
+   AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT = 17,
+   AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH = 2,
+   AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE =
+      ((1 << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH) - 1)
+      << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT,

-  /* Are global memory addresses 64 bits. Must match
-   * amd_kernel_code_t.hsail_machine_model ==
-   * HSA_MACHINE_LARGE. Must also match
-   * SH_MEM_CONFIG.PTR32 (GFX6 (SI)/GFX7 (CI)),
-   * SH_MEM_CONFIG.ADDRESS_MODE (GFX8 (VI)+).
-   */
-  AMD_CODE_PROPERTY_IS_PTR64_SHIFT = 19,
-  AMD_CODE_PROPERTY_IS_PTR64_WIDTH = 1,
-  AMD_CODE_PROPERTY_IS_PTR64 = ((1 << AMD_CODE_PROPERTY_IS_PTR64_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_PTR64_SHIFT,
+   /* Are global memory addresses 64 bits. Must match
+    * amd_kernel_code_t.hsail_machine_model ==
+    * HSA_MACHINE_LARGE. Must also match
+    * SH_MEM_CONFIG.PTR32 (GFX6 (SI)/GFX7 (CI)),
+    * SH_MEM_CONFIG.ADDRESS_MODE (GFX8 (VI)+).
+    */
+   AMD_CODE_PROPERTY_IS_PTR64_SHIFT = 19,
+   AMD_CODE_PROPERTY_IS_PTR64_WIDTH = 1,
+   AMD_CODE_PROPERTY_IS_PTR64 = ((1 << AMD_CODE_PROPERTY_IS_PTR64_WIDTH) - 1)
+                                << AMD_CODE_PROPERTY_IS_PTR64_SHIFT,

-  /* Indicate if the generated ISA is using a dynamically sized call
-   * stack. This can happen if calls are implemented using a call
-   * stack and recursion, alloca or calls to indirect functions are
-   * present. In these cases the Finalizer cannot compute the total
-   * private segment size at compile time. In this case the
-   * workitem_private_segment_byte_size only specifies the statically
-   * know private segment size, and additional space must be added
-   * for the call stack.
-   */
-  AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT = 20,
-  AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH = 1,
-  AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK = ((1 << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT,
+   /* Indicate if the generated ISA is using a dynamically sized call
+    * stack. This can happen if calls are implemented using a call
+    * stack and recursion, alloca or calls to indirect functions are
+    * present. In these cases the Finalizer cannot compute the total
+    * private segment size at compile time. In this case the
+    * workitem_private_segment_byte_size only specifies the statically
+    * know private segment size, and additional space must be added
+    * for the call stack.
+    */
+   AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT = 20,
+   AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH = 1,
+   AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK =
+      ((1 << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH) - 1)
+      << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT,

-  /* Indicate if code generated has support for debugging. */
-  AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT = 21,
-  AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH = 1,
-  AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT,
+   /* Indicate if code generated has support for debugging. */
+   AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT = 21,
+   AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH = 1,
+   AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH) - 1)
+                                          << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT,

-  AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT = 22,
-  AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_WIDTH = 1,
-  AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT,
+   AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT = 22,
+   AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_WIDTH = 1,
+   AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_WIDTH) - 1)
+                                          << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT,

-  AMD_CODE_PROPERTY_RESERVED2_SHIFT = 23,
-  AMD_CODE_PROPERTY_RESERVED2_WIDTH = 9,
-  AMD_CODE_PROPERTY_RESERVED2 = ((1 << AMD_CODE_PROPERTY_RESERVED2_WIDTH) - 1) << AMD_CODE_PROPERTY_RESERVED2_SHIFT
+   AMD_CODE_PROPERTY_RESERVED2_SHIFT = 23,
+   AMD_CODE_PROPERTY_RESERVED2_WIDTH = 9,
+   AMD_CODE_PROPERTY_RESERVED2 = ((1 << AMD_CODE_PROPERTY_RESERVED2_WIDTH) - 1)
+                                 << AMD_CODE_PROPERTY_RESERVED2_SHIFT
 };

 /* AMD Kernel Code Object (amd_kernel_code_t). GPU CP uses the AMD Kernel
@ -381,154 +412,154 @@ enum amd_code_property_mask_t {
 */

 typedef struct amd_kernel_code_s {
-  uint32_t amd_kernel_code_version_major;
-  uint32_t amd_kernel_code_version_minor;
-  uint16_t amd_machine_kind;
-  uint16_t amd_machine_version_major;
-  uint16_t amd_machine_version_minor;
-  uint16_t amd_machine_version_stepping;
+   uint32_t amd_kernel_code_version_major;
+   uint32_t amd_kernel_code_version_minor;
+   uint16_t amd_machine_kind;
+   uint16_t amd_machine_version_major;
+   uint16_t amd_machine_version_minor;
+   uint16_t amd_machine_version_stepping;

-  /* Byte offset (possibly negative) from start of amd_kernel_code_t
-   * object to kernel's entry point instruction. The actual code for
-   * the kernel is required to be 256 byte aligned to match hardware
-   * requirements (SQ cache line is 16). The code must be position
-   * independent code (PIC) for AMD devices to give runtime the
-   * option of copying code to discrete GPU memory or APU L2
-   * cache. The Finalizer should endeavour to allocate all kernel
-   * machine code in contiguous memory pages so that a device
-   * pre-fetcher will tend to only pre-fetch Kernel Code objects,
-   * improving cache performance.
-   */
-  int64_t kernel_code_entry_byte_offset;
+   /* Byte offset (possibly negative) from start of amd_kernel_code_t
+    * object to kernel's entry point instruction. The actual code for
+    * the kernel is required to be 256 byte aligned to match hardware
+    * requirements (SQ cache line is 16). The code must be position
+    * independent code (PIC) for AMD devices to give runtime the
+    * option of copying code to discrete GPU memory or APU L2
+    * cache. The Finalizer should endeavour to allocate all kernel
+    * machine code in contiguous memory pages so that a device
+    * pre-fetcher will tend to only pre-fetch Kernel Code objects,
+    * improving cache performance.
+    */
+   int64_t kernel_code_entry_byte_offset;

-  /* Range of bytes to consider prefetching expressed as an offset
-   * and size. The offset is from the start (possibly negative) of
-   * amd_kernel_code_t object. Set both to 0 if no prefetch
-   * information is available.
-   */
-  int64_t kernel_code_prefetch_byte_offset;
-  uint64_t kernel_code_prefetch_byte_size;
+   /* Range of bytes to consider prefetching expressed as an offset
+    * and size. The offset is from the start (possibly negative) of
+    * amd_kernel_code_t object. Set both to 0 if no prefetch
+    * information is available.
+    */
+   int64_t kernel_code_prefetch_byte_offset;
+   uint64_t kernel_code_prefetch_byte_size;

-  /* Number of bytes of scratch backing memory required for full
-   * occupancy of target chip. This takes into account the number of
-   * bytes of scratch per work-item, the wavefront size, the maximum
-   * number of wavefronts per CU, and the number of CUs. This is an
-   * upper limit on scratch. If the grid being dispatched is small it
-   * may only need less than this. If the kernel uses no scratch, or
-   * the Finalizer has not computed this value, it must be 0.
-   */
-  uint64_t max_scratch_backing_memory_byte_size;
+   /* Number of bytes of scratch backing memory required for full
+    * occupancy of target chip. This takes into account the number of
+    * bytes of scratch per work-item, the wavefront size, the maximum
+    * number of wavefronts per CU, and the number of CUs. This is an
+    * upper limit on scratch. If the grid being dispatched is small it
+    * may only need less than this. If the kernel uses no scratch, or
+    * the Finalizer has not computed this value, it must be 0.
+    */
+   uint64_t max_scratch_backing_memory_byte_size;

-  /* Shader program settings for CS. Contains COMPUTE_PGM_RSRC1 and
-   * COMPUTE_PGM_RSRC2 registers.
-   */
-  uint64_t compute_pgm_resource_registers;
+   /* Shader program settings for CS. Contains COMPUTE_PGM_RSRC1 and
+    * COMPUTE_PGM_RSRC2 registers.
+    */
+   uint64_t compute_pgm_resource_registers;

-  /* Code properties. See amd_code_property_mask_t for a full list of
-   * properties.
-   */
-  uint32_t code_properties;
+   /* Code properties. See amd_code_property_mask_t for a full list of
+    * properties.
+    */
+   uint32_t code_properties;

-  /* The amount of memory required for the combined private, spill
-   * and arg segments for a work-item in bytes. If
-   * is_dynamic_callstack is 1 then additional space must be added to
-   * this value for the call stack.
-   */
-  uint32_t workitem_private_segment_byte_size;
+   /* The amount of memory required for the combined private, spill
+    * and arg segments for a work-item in bytes. If
+    * is_dynamic_callstack is 1 then additional space must be added to
+    * this value for the call stack.
+    */
+   uint32_t workitem_private_segment_byte_size;

-  /* The amount of group segment memory required by a work-group in
-   * bytes. This does not include any dynamically allocated group
-   * segment memory that may be added when the kernel is
-   * dispatched.
-   */
-  uint32_t workgroup_group_segment_byte_size;
+   /* The amount of group segment memory required by a work-group in
+    * bytes. This does not include any dynamically allocated group
+    * segment memory that may be added when the kernel is
+    * dispatched.
+    */
+   uint32_t workgroup_group_segment_byte_size;

-  /* Number of byte of GDS required by kernel dispatch. Must be 0 if
-   * not using GDS.
-   */
-  uint32_t gds_segment_byte_size;
+   /* Number of byte of GDS required by kernel dispatch. Must be 0 if
+    * not using GDS.
+    */
+   uint32_t gds_segment_byte_size;

-  /* The size in bytes of the kernarg segment that holds the values
-   * of the arguments to the kernel. This could be used by CP to
-   * prefetch the kernarg segment pointed to by the dispatch packet.
-   */
-  uint64_t kernarg_segment_byte_size;
+   /* The size in bytes of the kernarg segment that holds the values
+    * of the arguments to the kernel. This could be used by CP to
+    * prefetch the kernarg segment pointed to by the dispatch packet.
+    */
+   uint64_t kernarg_segment_byte_size;

-  /* Number of fbarrier's used in the kernel and all functions it
-   * calls. If the implementation uses group memory to allocate the
-   * fbarriers then that amount must already be included in the
-   * workgroup_group_segment_byte_size total.
-   */
-  uint32_t workgroup_fbarrier_count;
+   /* Number of fbarrier's used in the kernel and all functions it
+    * calls. If the implementation uses group memory to allocate the
+    * fbarriers then that amount must already be included in the
+    * workgroup_group_segment_byte_size total.
+    */
+   uint32_t workgroup_fbarrier_count;

-  /* Number of scalar registers used by a wavefront. This includes
-   * the special SGPRs for VCC, Flat Scratch Base, Flat Scratch Size
-   * and XNACK (for GFX8 (VI)). It does not include the 16 SGPR added if a
-   * trap handler is enabled. Used to set COMPUTE_PGM_RSRC1.SGPRS.
-   */
-  uint16_t wavefront_sgpr_count;
+   /* Number of scalar registers used by a wavefront. This includes
+    * the special SGPRs for VCC, Flat Scratch Base, Flat Scratch Size
+    * and XNACK (for GFX8 (VI)). It does not include the 16 SGPR added if a
+    * trap handler is enabled. Used to set COMPUTE_PGM_RSRC1.SGPRS.
+    */
+   uint16_t wavefront_sgpr_count;

-  /* Number of vector registers used by each work-item. Used to set
-   * COMPUTE_PGM_RSRC1.VGPRS.
-   */
-  uint16_t workitem_vgpr_count;
+   /* Number of vector registers used by each work-item. Used to set
+    * COMPUTE_PGM_RSRC1.VGPRS.
+    */
+   uint16_t workitem_vgpr_count;

-  /* If reserved_vgpr_count is 0 then must be 0. Otherwise, this is the
-   * first fixed VGPR number reserved.
-   */
-  uint16_t reserved_vgpr_first;
+   /* If reserved_vgpr_count is 0 then must be 0. Otherwise, this is the
+    * first fixed VGPR number reserved.
+    */
+   uint16_t reserved_vgpr_first;

-  /* The number of consecutive VGPRs reserved by the client. If
-   * is_debug_supported then this count includes VGPRs reserved
-   * for debugger use.
-   */
-  uint16_t reserved_vgpr_count;
+   /* The number of consecutive VGPRs reserved by the client. If
+    * is_debug_supported then this count includes VGPRs reserved
+    * for debugger use.
+    */
+   uint16_t reserved_vgpr_count;

-  /* If reserved_sgpr_count is 0 then must be 0. Otherwise, this is the
-   * first fixed SGPR number reserved.
-   */
-  uint16_t reserved_sgpr_first;
+   /* If reserved_sgpr_count is 0 then must be 0. Otherwise, this is the
+    * first fixed SGPR number reserved.
+    */
+   uint16_t reserved_sgpr_first;

-  /* The number of consecutive SGPRs reserved by the client. If
-   * is_debug_supported then this count includes SGPRs reserved
-   * for debugger use.
-   */
-  uint16_t reserved_sgpr_count;
+   /* The number of consecutive SGPRs reserved by the client. If
+    * is_debug_supported then this count includes SGPRs reserved
+    * for debugger use.
+    */
+   uint16_t reserved_sgpr_count;

-  /* If is_debug_supported is 0 then must be 0. Otherwise, this is the
-   * fixed SGPR number used to hold the wave scratch offset for the
-   * entire kernel execution, or uint16_t(-1) if the register is not
-   * used or not known.
-   */
-  uint16_t debug_wavefront_private_segment_offset_sgpr;
+   /* If is_debug_supported is 0 then must be 0. Otherwise, this is the
+    * fixed SGPR number used to hold the wave scratch offset for the
+    * entire kernel execution, or uint16_t(-1) if the register is not
+    * used or not known.
+    */
+   uint16_t debug_wavefront_private_segment_offset_sgpr;

-  /* If is_debug_supported is 0 then must be 0. Otherwise, this is the
-   * fixed SGPR number of the first of 4 SGPRs used to hold the
-   * scratch V# used for the entire kernel execution, or uint16_t(-1)
-   * if the registers are not used or not known.
-   */
-  uint16_t debug_private_segment_buffer_sgpr;
+   /* If is_debug_supported is 0 then must be 0. Otherwise, this is the
+    * fixed SGPR number of the first of 4 SGPRs used to hold the
+    * scratch V# used for the entire kernel execution, or uint16_t(-1)
+    * if the registers are not used or not known.
+    */
+   uint16_t debug_private_segment_buffer_sgpr;

-  /* The maximum byte alignment of variables used by the kernel in
-   * the specified memory segment. Expressed as a power of two. Must
-   * be at least HSA_POWERTWO_16.
-   */
-  uint8_t kernarg_segment_alignment;
-  uint8_t group_segment_alignment;
-  uint8_t private_segment_alignment;
+   /* The maximum byte alignment of variables used by the kernel in
+    * the specified memory segment. Expressed as a power of two. Must
+    * be at least HSA_POWERTWO_16.
+    */
+   uint8_t kernarg_segment_alignment;
+   uint8_t group_segment_alignment;
+   uint8_t private_segment_alignment;

-  /* Wavefront size expressed as a power of two. Must be a power of 2
-   * in range 1..64 inclusive. Used to support runtime query that
-   * obtains wavefront size, which may be used by application to
-   * allocated dynamic group memory and set the dispatch work-group
-   * size.
-   */
-  uint8_t wavefront_size;
+   /* Wavefront size expressed as a power of two. Must be a power of 2
+    * in range 1..64 inclusive. Used to support runtime query that
+    * obtains wavefront size, which may be used by application to
+    * allocated dynamic group memory and set the dispatch work-group
+    * size.
+    */
+   uint8_t wavefront_size;

-  int32_t call_convention;
-  uint8_t reserved3[12];
-  uint64_t runtime_loader_kernel_symbol;
-  uint64_t control_directives[16];
+   int32_t call_convention;
+   uint8_t reserved3[12];
+   uint64_t runtime_loader_kernel_symbol;
+   uint64_t control_directives[16];
 } amd_kernel_code_t;

 #endif // AMDKERNELCODET_H
--- a/src/amd/common/gfx10_format_table.h
+++ b/src/amd/common/gfx10_format_table.h
@ -27,16 +27,17 @@
 #ifndef GFX10_FORMAT_TABLE_H
 #define GFX10_FORMAT_TABLE_H

-#include <stdbool.h>
 #include "pipe/p_format.h"

-struct gfx10_format {
-    unsigned img_format:9;
+#include <stdbool.h>

-    /* Various formats are only supported with workarounds for vertex fetch,
-     * and some 32_32_32 formats are supported natively, but only for buffers
-     * (possibly with some image support, actually, but no filtering). */
-    bool buffers_only:1;
+struct gfx10_format {
+   unsigned img_format : 9;
+
+   /* Various formats are only supported with workarounds for vertex fetch,
+    * and some 32_32_32 formats are supported natively, but only for buffers
+    * (possibly with some image support, actually, but no filtering). */
+   bool buffers_only : 1;
 };

 extern const struct gfx10_format gfx10_format_table[PIPE_FORMAT_COUNT];
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@ -27,227 +27,227 @@
 #include "amdgfxregs.h"

 /* si values */
-#define SI_CONFIG_REG_OFFSET                 0x00008000
-#define SI_CONFIG_REG_END                    0x0000B000
-#define SI_SH_REG_OFFSET                     0x0000B000
-#define SI_SH_REG_END                        0x0000C000
-#define SI_CONTEXT_REG_OFFSET                0x00028000
-#define SI_CONTEXT_REG_END                   0x00030000
-#define CIK_UCONFIG_REG_OFFSET               0x00030000
-#define CIK_UCONFIG_REG_END                  0x00040000
-#define SI_UCONFIG_PERF_REG_OFFSET           0x00034000
-#define SI_UCONFIG_PERF_REG_END              0x00038000
+#define SI_CONFIG_REG_OFFSET       0x00008000
+#define SI_CONFIG_REG_END          0x0000B000
+#define SI_SH_REG_OFFSET           0x0000B000
+#define SI_SH_REG_END              0x0000C000
+#define SI_CONTEXT_REG_OFFSET      0x00028000
+#define SI_CONTEXT_REG_END         0x00030000
+#define CIK_UCONFIG_REG_OFFSET     0x00030000
+#define CIK_UCONFIG_REG_END        0x00040000
+#define SI_UCONFIG_PERF_REG_OFFSET 0x00034000
+#define SI_UCONFIG_PERF_REG_END    0x00038000

 /* For register shadowing: */
-#define SI_SH_REG_SPACE_SIZE			(SI_SH_REG_END - SI_SH_REG_OFFSET)
-#define SI_CONTEXT_REG_SPACE_SIZE		(SI_CONTEXT_REG_END - SI_CONTEXT_REG_OFFSET)
-#define SI_UCONFIG_REG_SPACE_SIZE		(CIK_UCONFIG_REG_END - CIK_UCONFIG_REG_OFFSET)
-#define SI_UCONFIG_PERF_REG_SPACE_SIZE          (SI_UCONFIG_PERF_REG_END - SI_UCONFIG_PERF_REG_OFFSET)
+#define SI_SH_REG_SPACE_SIZE           (SI_SH_REG_END - SI_SH_REG_OFFSET)
+#define SI_CONTEXT_REG_SPACE_SIZE      (SI_CONTEXT_REG_END - SI_CONTEXT_REG_OFFSET)
+#define SI_UCONFIG_REG_SPACE_SIZE      (CIK_UCONFIG_REG_END - CIK_UCONFIG_REG_OFFSET)
+#define SI_UCONFIG_PERF_REG_SPACE_SIZE (SI_UCONFIG_PERF_REG_END - SI_UCONFIG_PERF_REG_OFFSET)

-#define SI_SHADOWED_SH_REG_OFFSET		0
-#define SI_SHADOWED_CONTEXT_REG_OFFSET		SI_SH_REG_SPACE_SIZE
-#define SI_SHADOWED_UCONFIG_REG_OFFSET		(SI_SH_REG_SPACE_SIZE + SI_CONTEXT_REG_SPACE_SIZE)
-#define SI_SHADOWED_REG_BUFFER_SIZE		(SI_SH_REG_SPACE_SIZE + SI_CONTEXT_REG_SPACE_SIZE + \
-						 SI_UCONFIG_REG_SPACE_SIZE)
+#define SI_SHADOWED_SH_REG_OFFSET      0
+#define SI_SHADOWED_CONTEXT_REG_OFFSET SI_SH_REG_SPACE_SIZE
+#define SI_SHADOWED_UCONFIG_REG_OFFSET (SI_SH_REG_SPACE_SIZE + SI_CONTEXT_REG_SPACE_SIZE)
+#define SI_SHADOWED_REG_BUFFER_SIZE                                                                \
+   (SI_SH_REG_SPACE_SIZE + SI_CONTEXT_REG_SPACE_SIZE + SI_UCONFIG_REG_SPACE_SIZE)

 #define EVENT_TYPE_CACHE_FLUSH                  0x6
-#define EVENT_TYPE_PS_PARTIAL_FLUSH            0x10
+#define EVENT_TYPE_PS_PARTIAL_FLUSH             0x10
 #define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14
-#define EVENT_TYPE_ZPASS_DONE                  0x15
-#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT   0x16
-#define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH	0x1f
-#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS	0x20
-#define		EVENT_TYPE(x)                           ((x) << 0)
-#define		EVENT_INDEX(x)                          ((x) << 8)
-                /* 0 - any non-TS event
-		 * 1 - ZPASS_DONE
-		 * 2 - SAMPLE_PIPELINESTAT
-		 * 3 - SAMPLE_STREAMOUTSTAT*
-		 * 4 - *S_PARTIAL_FLUSH
-		 * 5 - TS events
-		 */
+#define EVENT_TYPE_ZPASS_DONE                   0x15
+#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT    0x16
+#define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH        0x1f
+#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS        0x20
+#define EVENT_TYPE(x)                           ((x) << 0)
+#define EVENT_INDEX(x)                          ((x) << 8)
+/* 0 - any non-TS event
+ * 1 - ZPASS_DONE
+ * 2 - SAMPLE_PIPELINESTAT
+ * 3 - SAMPLE_STREAMOUTSTAT*
+ * 4 - *S_PARTIAL_FLUSH
+ * 5 - TS events
+ */

 /* EVENT_WRITE_EOP (SI-VI) & RELEASE_MEM (GFX9) */
-#define EVENT_TCL1_VOL_ACTION_ENA		(1 << 12)
-#define EVENT_TC_VOL_ACTION_ENA			(1 << 13)
-#define EVENT_TC_WB_ACTION_ENA			(1 << 15)
-#define EVENT_TCL1_ACTION_ENA			(1 << 16)
-#define EVENT_TC_ACTION_ENA			(1 << 17)
-#define EVENT_TC_NC_ACTION_ENA			(1 << 19) /* GFX9+ */
-#define EVENT_TC_WC_ACTION_ENA			(1 << 20) /* GFX9+ */
-#define EVENT_TC_MD_ACTION_ENA			(1 << 21) /* GFX9+ */
+#define EVENT_TCL1_VOL_ACTION_ENA (1 << 12)
+#define EVENT_TC_VOL_ACTION_ENA   (1 << 13)
+#define EVENT_TC_WB_ACTION_ENA    (1 << 15)
+#define EVENT_TCL1_ACTION_ENA     (1 << 16)
+#define EVENT_TC_ACTION_ENA       (1 << 17)
+#define EVENT_TC_NC_ACTION_ENA    (1 << 19) /* GFX9+ */
+#define EVENT_TC_WC_ACTION_ENA    (1 << 20) /* GFX9+ */
+#define EVENT_TC_MD_ACTION_ENA    (1 << 21) /* GFX9+ */

-
-#define PREDICATION_OP_CLEAR 0x0
-#define PREDICATION_OP_ZPASS 0x1
+#define PREDICATION_OP_CLEAR     0x0
+#define PREDICATION_OP_ZPASS     0x1
 #define PREDICATION_OP_PRIMCOUNT 0x2
-#define PREDICATION_OP_BOOL64 0x3
+#define PREDICATION_OP_BOOL64    0x3

 #define PRED_OP(x) ((x) << 16)

 #define PREDICATION_CONTINUE (1 << 31)

-#define PREDICATION_HINT_WAIT (0 << 12)
+#define PREDICATION_HINT_WAIT        (0 << 12)
 #define PREDICATION_HINT_NOWAIT_DRAW (1 << 12)

 #define PREDICATION_DRAW_NOT_VISIBLE (0 << 8)
-#define PREDICATION_DRAW_VISIBLE (1 << 8)
+#define PREDICATION_DRAW_VISIBLE     (1 << 8)

-#define R600_TEXEL_PITCH_ALIGNMENT_MASK        0x7
+#define R600_TEXEL_PITCH_ALIGNMENT_MASK 0x7

 /* All registers defined in this packet section don't exist and the only
 * purpose of these definitions is to define packet encoding that
 * the IB parser understands, and also to have an accurate documentation.
 */
-#define PKT3_NOP                               0x10
-#define PKT3_SET_BASE                          0x11
-#define PKT3_CLEAR_STATE                       0x12
-#define PKT3_INDEX_BUFFER_SIZE                 0x13
-#define PKT3_DISPATCH_DIRECT                   0x15
-#define PKT3_DISPATCH_INDIRECT                 0x16
-#define PKT3_OCCLUSION_QUERY                   0x1F /* new for CIK */
-#define PKT3_SET_PREDICATION                   0x20
-#define PKT3_COND_EXEC                         0x22
-#define PKT3_PRED_EXEC                         0x23
-#define PKT3_DRAW_INDIRECT                     0x24
-#define PKT3_DRAW_INDEX_INDIRECT               0x25
-#define PKT3_INDEX_BASE                        0x26
-#define PKT3_DRAW_INDEX_2                      0x27
-#define PKT3_CONTEXT_CONTROL                   0x28
-#define     CC0_LOAD_GLOBAL_CONFIG(x)          (((unsigned)(x) & 0x1) << 0)
-#define     CC0_LOAD_PER_CONTEXT_STATE(x)      (((unsigned)(x) & 0x1) << 1)
-#define     CC0_LOAD_GLOBAL_UCONFIG(x)         (((unsigned)(x) & 0x1) << 15)
-#define     CC0_LOAD_GFX_SH_REGS(x)            (((unsigned)(x) & 0x1) << 16)
-#define     CC0_LOAD_CS_SH_REGS(x)             (((unsigned)(x) & 0x1) << 24)
-#define     CC0_LOAD_CE_RAM(x)                 (((unsigned)(x) & 0x1) << 28)
-#define     CC0_UPDATE_LOAD_ENABLES(x)         (((unsigned)(x) & 0x1) << 31)
-#define     CC1_SHADOW_GLOBAL_CONFIG(x)        (((unsigned)(x) & 0x1) << 0)
-#define     CC1_SHADOW_PER_CONTEXT_STATE(x)    (((unsigned)(x) & 0x1) << 1)
-#define     CC1_SHADOW_GLOBAL_UCONFIG(x)       (((unsigned)(x) & 0x1) << 15)
-#define     CC1_SHADOW_GFX_SH_REGS(x)          (((unsigned)(x) & 0x1) << 16)
-#define     CC1_SHADOW_CS_SH_REGS(x)           (((unsigned)(x) & 0x1) << 24)
-#define     CC1_UPDATE_SHADOW_ENABLES(x)       (((unsigned)(x) & 0x1) << 31)
-#define PKT3_INDEX_TYPE                        0x2A /* not on GFX9 */
-#define PKT3_DRAW_INDIRECT_MULTI               0x2C
-#define   R_2C3_DRAW_INDEX_LOC                  0x2C3
-#define     S_2C3_COUNT_INDIRECT_ENABLE(x)      (((unsigned)(x) & 0x1) << 30)
-#define     S_2C3_DRAW_INDEX_ENABLE(x)          (((unsigned)(x) & 0x1) << 31)
-#define PKT3_DRAW_INDEX_AUTO                   0x2D
-#define PKT3_DRAW_INDEX_IMMD                   0x2E /* not on CIK */
-#define PKT3_NUM_INSTANCES                     0x2F
-#define PKT3_DRAW_INDEX_MULTI_AUTO             0x30
-#define PKT3_INDIRECT_BUFFER_SI                0x32 /* not on CIK */
-#define PKT3_INDIRECT_BUFFER_CONST             0x33
-#define PKT3_STRMOUT_BUFFER_UPDATE             0x34
-#define		STRMOUT_STORE_BUFFER_FILLED_SIZE	1
-#define		STRMOUT_OFFSET_SOURCE(x)	(((unsigned)(x) & 0x3) << 1)
-#define			STRMOUT_OFFSET_FROM_PACKET		0
-#define			STRMOUT_OFFSET_FROM_VGT_FILLED_SIZE	1
-#define			STRMOUT_OFFSET_FROM_MEM			2
-#define			STRMOUT_OFFSET_NONE			3
-#define		STRMOUT_DATA_TYPE(x)		(((unsigned)(x) & 0x1) << 7)
-#define		STRMOUT_SELECT_BUFFER(x)	(((unsigned)(x) & 0x3) << 8)
-#define PKT3_DRAW_INDEX_OFFSET_2               0x35
-#define PKT3_WRITE_DATA                        0x37
-#define PKT3_DRAW_INDEX_INDIRECT_MULTI         0x38
-#define PKT3_MEM_SEMAPHORE                     0x39
-#define PKT3_MPEG_INDEX                        0x3A /* not on CIK */
-#define PKT3_WAIT_REG_MEM                      0x3C
-#define		WAIT_REG_MEM_EQUAL		3
-#define		WAIT_REG_MEM_NOT_EQUAL		4
-#define		WAIT_REG_MEM_GREATER_OR_EQUAL   5
-#define         WAIT_REG_MEM_MEM_SPACE(x)       (((unsigned)(x) & 0x3) << 4)
-#define         WAIT_REG_MEM_PFP		(1 << 8)
-#define PKT3_MEM_WRITE                         0x3D /* not on CIK */
-#define PKT3_INDIRECT_BUFFER_CIK               0x3F /* new on CIK */
+#define PKT3_NOP                            0x10
+#define PKT3_SET_BASE                       0x11
+#define PKT3_CLEAR_STATE                    0x12
+#define PKT3_INDEX_BUFFER_SIZE              0x13
+#define PKT3_DISPATCH_DIRECT                0x15
+#define PKT3_DISPATCH_INDIRECT              0x16
+#define PKT3_OCCLUSION_QUERY                0x1F /* new for CIK */
+#define PKT3_SET_PREDICATION                0x20
+#define PKT3_COND_EXEC                      0x22
+#define PKT3_PRED_EXEC                      0x23
+#define PKT3_DRAW_INDIRECT                  0x24
+#define PKT3_DRAW_INDEX_INDIRECT            0x25
+#define PKT3_INDEX_BASE                     0x26
+#define PKT3_DRAW_INDEX_2                   0x27
+#define PKT3_CONTEXT_CONTROL                0x28
+#define CC0_LOAD_GLOBAL_CONFIG(x)           (((unsigned)(x)&0x1) << 0)
+#define CC0_LOAD_PER_CONTEXT_STATE(x)       (((unsigned)(x)&0x1) << 1)
+#define CC0_LOAD_GLOBAL_UCONFIG(x)          (((unsigned)(x)&0x1) << 15)
+#define CC0_LOAD_GFX_SH_REGS(x)             (((unsigned)(x)&0x1) << 16)
+#define CC0_LOAD_CS_SH_REGS(x)              (((unsigned)(x)&0x1) << 24)
+#define CC0_LOAD_CE_RAM(x)                  (((unsigned)(x)&0x1) << 28)
+#define CC0_UPDATE_LOAD_ENABLES(x)          (((unsigned)(x)&0x1) << 31)
+#define CC1_SHADOW_GLOBAL_CONFIG(x)         (((unsigned)(x)&0x1) << 0)
+#define CC1_SHADOW_PER_CONTEXT_STATE(x)     (((unsigned)(x)&0x1) << 1)
+#define CC1_SHADOW_GLOBAL_UCONFIG(x)        (((unsigned)(x)&0x1) << 15)
+#define CC1_SHADOW_GFX_SH_REGS(x)           (((unsigned)(x)&0x1) << 16)
+#define CC1_SHADOW_CS_SH_REGS(x)            (((unsigned)(x)&0x1) << 24)
+#define CC1_UPDATE_SHADOW_ENABLES(x)        (((unsigned)(x)&0x1) << 31)
+#define PKT3_INDEX_TYPE                     0x2A /* not on GFX9 */
+#define PKT3_DRAW_INDIRECT_MULTI            0x2C
+#define R_2C3_DRAW_INDEX_LOC                0x2C3
+#define S_2C3_COUNT_INDIRECT_ENABLE(x)      (((unsigned)(x)&0x1) << 30)
+#define S_2C3_DRAW_INDEX_ENABLE(x)          (((unsigned)(x)&0x1) << 31)
+#define PKT3_DRAW_INDEX_AUTO                0x2D
+#define PKT3_DRAW_INDEX_IMMD                0x2E /* not on CIK */
+#define PKT3_NUM_INSTANCES                  0x2F
+#define PKT3_DRAW_INDEX_MULTI_AUTO          0x30
+#define PKT3_INDIRECT_BUFFER_SI             0x32 /* not on CIK */
+#define PKT3_INDIRECT_BUFFER_CONST          0x33
+#define PKT3_STRMOUT_BUFFER_UPDATE          0x34
+#define STRMOUT_STORE_BUFFER_FILLED_SIZE    1
+#define STRMOUT_OFFSET_SOURCE(x)            (((unsigned)(x)&0x3) << 1)
+#define STRMOUT_OFFSET_FROM_PACKET          0
+#define STRMOUT_OFFSET_FROM_VGT_FILLED_SIZE 1
+#define STRMOUT_OFFSET_FROM_MEM             2
+#define STRMOUT_OFFSET_NONE                 3
+#define STRMOUT_DATA_TYPE(x)                (((unsigned)(x)&0x1) << 7)
+#define STRMOUT_SELECT_BUFFER(x)            (((unsigned)(x)&0x3) << 8)
+#define PKT3_DRAW_INDEX_OFFSET_2            0x35
+#define PKT3_WRITE_DATA                     0x37
+#define PKT3_DRAW_INDEX_INDIRECT_MULTI      0x38
+#define PKT3_MEM_SEMAPHORE                  0x39
+#define PKT3_MPEG_INDEX                     0x3A /* not on CIK */
+#define PKT3_WAIT_REG_MEM                   0x3C
+#define WAIT_REG_MEM_EQUAL                  3
+#define WAIT_REG_MEM_NOT_EQUAL              4
+#define WAIT_REG_MEM_GREATER_OR_EQUAL       5
+#define WAIT_REG_MEM_MEM_SPACE(x)           (((unsigned)(x)&0x3) << 4)
+#define WAIT_REG_MEM_PFP                    (1 << 8)
+#define PKT3_MEM_WRITE                      0x3D /* not on CIK */
+#define PKT3_INDIRECT_BUFFER_CIK            0x3F /* new on CIK */

-#define PKT3_COPY_DATA			       0x40
-#define		COPY_DATA_SRC_SEL(x)		((x) & 0xf)
-#define			COPY_DATA_REG		0
-#define			COPY_DATA_SRC_MEM	1 /* only valid as source */
-#define                 COPY_DATA_TC_L2         2
-#define                 COPY_DATA_GDS           3
-#define                 COPY_DATA_PERF          4
-#define                 COPY_DATA_IMM           5
-#define                 COPY_DATA_TIMESTAMP     9
-#define		COPY_DATA_DST_SEL(x)		(((unsigned)(x) & 0xf) << 8)
-#define                 COPY_DATA_DST_MEM_GRBM	1 /* sync across GRBM, deprecated */
-#define                 COPY_DATA_TC_L2         2
-#define                 COPY_DATA_GDS           3
-#define                 COPY_DATA_PERF          4
-#define                 COPY_DATA_DST_MEM       5
-#define		COPY_DATA_COUNT_SEL		(1 << 16)
-#define		COPY_DATA_WR_CONFIRM		(1 << 20)
-#define		COPY_DATA_ENGINE_PFP		(1 << 30)
-#define PKT3_PFP_SYNC_ME		       0x42
+#define PKT3_COPY_DATA                         0x40
+#define COPY_DATA_SRC_SEL(x)                   ((x)&0xf)
+#define COPY_DATA_REG                          0
+#define COPY_DATA_SRC_MEM                      1 /* only valid as source */
+#define COPY_DATA_TC_L2                        2
+#define COPY_DATA_GDS                          3
+#define COPY_DATA_PERF                         4
+#define COPY_DATA_IMM                          5
+#define COPY_DATA_TIMESTAMP                    9
+#define COPY_DATA_DST_SEL(x)                   (((unsigned)(x)&0xf) << 8)
+#define COPY_DATA_DST_MEM_GRBM                 1 /* sync across GRBM, deprecated */
+#define COPY_DATA_TC_L2                        2
+#define COPY_DATA_GDS                          3
+#define COPY_DATA_PERF                         4
+#define COPY_DATA_DST_MEM                      5
+#define COPY_DATA_COUNT_SEL                    (1 << 16)
+#define COPY_DATA_WR_CONFIRM                   (1 << 20)
+#define COPY_DATA_ENGINE_PFP                   (1 << 30)
+#define PKT3_PFP_SYNC_ME                       0x42
 #define PKT3_SURFACE_SYNC                      0x43 /* deprecated on CIK, use ACQUIRE_MEM */
 #define PKT3_ME_INITIALIZE                     0x44 /* not on CIK */
 #define PKT3_COND_WRITE                        0x45
 #define PKT3_EVENT_WRITE                       0x46
 #define PKT3_EVENT_WRITE_EOP                   0x47 /* not on GFX9 */
-#define         EOP_DST_SEL(x)				((x) << 16)
-#define			EOP_DST_SEL_MEM			0
-#define			EOP_DST_SEL_TC_L2		1
-#define         EOP_INT_SEL(x)                          ((x) << 24)
-#define			EOP_INT_SEL_NONE			0
-#define			EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM	3
-#define         EOP_DATA_SEL(x)                         ((x) << 29)
-#define			EOP_DATA_SEL_DISCARD		0
-#define			EOP_DATA_SEL_VALUE_32BIT	1
-#define			EOP_DATA_SEL_VALUE_64BIT	2
-#define			EOP_DATA_SEL_TIMESTAMP		3
-#define			EOP_DATA_SEL_GDS		5
-#define		EOP_DATA_GDS(dw_offset, num_dwords)	((dw_offset) | ((unsigned)(num_dwords) << 16))
+#define EOP_DST_SEL(x)                         ((x) << 16)
+#define EOP_DST_SEL_MEM                        0
+#define EOP_DST_SEL_TC_L2                      1
+#define EOP_INT_SEL(x)                         ((x) << 24)
+#define EOP_INT_SEL_NONE                       0
+#define EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM 3
+#define EOP_DATA_SEL(x)                        ((x) << 29)
+#define EOP_DATA_SEL_DISCARD                   0
+#define EOP_DATA_SEL_VALUE_32BIT               1
+#define EOP_DATA_SEL_VALUE_64BIT               2
+#define EOP_DATA_SEL_TIMESTAMP                 3
+#define EOP_DATA_SEL_GDS                       5
+#define EOP_DATA_GDS(dw_offset, num_dwords)    ((dw_offset) | ((unsigned)(num_dwords) << 16))
 /* CP DMA bug: Any use of CP_DMA.DST_SEL=TC must be avoided when EOS packets
 * are used. Use DST_SEL=MC instead. For prefetch, use SRC_SEL=TC and
 * DST_SEL=MC. Only CIK chips are affected.
 */
 /* fix CP DMA before uncommenting: */
 /*#define PKT3_EVENT_WRITE_EOS                   0x48*/ /* not on GFX9 */
-#define PKT3_RELEASE_MEM                       0x49 /* GFX9+ [any ring] or GFX8 [compute ring only] */
-#define PKT3_CONTEXT_REG_RMW                   0x51 /* older firmware versions on older chips don't have this */
-#define PKT3_ONE_REG_WRITE                     0x57 /* not on CIK */
-#define PKT3_ACQUIRE_MEM                       0x58 /* new for CIK */
-#define PKT3_REWIND                            0x59 /* VI+ [any ring] or CIK [compute ring only] */
-#define PKT3_LOAD_UCONFIG_REG                  0x5E /* GFX7+ */
-#define PKT3_LOAD_SH_REG                       0x5F
-#define PKT3_LOAD_CONTEXT_REG                  0x61
-#define PKT3_SET_CONFIG_REG                    0x68
-#define PKT3_SET_CONTEXT_REG                   0x69
-#define PKT3_SET_SH_REG                        0x76
-#define PKT3_SET_SH_REG_OFFSET                 0x77
-#define PKT3_SET_UCONFIG_REG                   0x79 /* new for CIK */
-#define PKT3_SET_UCONFIG_REG_INDEX             0x7A /* new for GFX9, CP ucode version >= 26 */
-#define PKT3_LOAD_CONST_RAM                    0x80
-#define PKT3_WRITE_CONST_RAM                   0x81
-#define PKT3_DUMP_CONST_RAM                    0x83
-#define PKT3_INCREMENT_CE_COUNTER              0x84
-#define PKT3_INCREMENT_DE_COUNTER              0x85
-#define PKT3_WAIT_ON_CE_COUNTER                0x86
-#define PKT3_SET_SH_REG_INDEX                  0x9B
-#define PKT3_LOAD_CONTEXT_REG_INDEX            0x9F /* new for VI */
+#define PKT3_RELEASE_MEM            0x49 /* GFX9+ [any ring] or GFX8 [compute ring only] */
+#define PKT3_CONTEXT_REG_RMW        0x51 /* older firmware versions on older chips don't have this */
+#define PKT3_ONE_REG_WRITE          0x57 /* not on CIK */
+#define PKT3_ACQUIRE_MEM            0x58 /* new for CIK */
+#define PKT3_REWIND                 0x59 /* VI+ [any ring] or CIK [compute ring only] */
+#define PKT3_LOAD_UCONFIG_REG       0x5E /* GFX7+ */
+#define PKT3_LOAD_SH_REG            0x5F
+#define PKT3_LOAD_CONTEXT_REG       0x61
+#define PKT3_SET_CONFIG_REG         0x68
+#define PKT3_SET_CONTEXT_REG        0x69
+#define PKT3_SET_SH_REG             0x76
+#define PKT3_SET_SH_REG_OFFSET      0x77
+#define PKT3_SET_UCONFIG_REG        0x79 /* new for CIK */
+#define PKT3_SET_UCONFIG_REG_INDEX  0x7A /* new for GFX9, CP ucode version >= 26 */
+#define PKT3_LOAD_CONST_RAM         0x80
+#define PKT3_WRITE_CONST_RAM        0x81
+#define PKT3_DUMP_CONST_RAM         0x83
+#define PKT3_INCREMENT_CE_COUNTER   0x84
+#define PKT3_INCREMENT_DE_COUNTER   0x85
+#define PKT3_WAIT_ON_CE_COUNTER     0x86
+#define PKT3_SET_SH_REG_INDEX       0x9B
+#define PKT3_LOAD_CONTEXT_REG_INDEX 0x9F /* new for VI */

-#define PKT_TYPE_S(x)                   (((unsigned)(x) & 0x3) << 30)
-#define PKT_TYPE_G(x)                   (((x) >> 30) & 0x3)
-#define PKT_TYPE_C                      0x3FFFFFFF
-#define PKT_COUNT_S(x)                  (((unsigned)(x) & 0x3FFF) << 16)
-#define PKT_COUNT_G(x)                  (((x) >> 16) & 0x3FFF)
-#define PKT_COUNT_C                     0xC000FFFF
-#define PKT0_BASE_INDEX_S(x)            (((unsigned)(x) & 0xFFFF) << 0)
-#define PKT0_BASE_INDEX_G(x)            (((x) >> 0) & 0xFFFF)
-#define PKT0_BASE_INDEX_C               0xFFFF0000
-#define PKT3_IT_OPCODE_S(x)             (((unsigned)(x) & 0xFF) << 8)
-#define PKT3_IT_OPCODE_G(x)             (((x) >> 8) & 0xFF)
-#define PKT3_IT_OPCODE_C                0xFFFF00FF
-#define PKT3_PREDICATE(x)               (((x) >> 0) & 0x1)
-#define PKT3_SHADER_TYPE_S(x)           (((unsigned)(x) & 0x1) << 1)
-#define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count))
-#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT_COUNT_S(count) | PKT3_IT_OPCODE_S(op) | PKT3_PREDICATE(predicate))
+#define PKT_TYPE_S(x)         (((unsigned)(x)&0x3) << 30)
+#define PKT_TYPE_G(x)         (((x) >> 30) & 0x3)
+#define PKT_TYPE_C            0x3FFFFFFF
+#define PKT_COUNT_S(x)        (((unsigned)(x)&0x3FFF) << 16)
+#define PKT_COUNT_G(x)        (((x) >> 16) & 0x3FFF)
+#define PKT_COUNT_C           0xC000FFFF
+#define PKT0_BASE_INDEX_S(x)  (((unsigned)(x)&0xFFFF) << 0)
+#define PKT0_BASE_INDEX_G(x)  (((x) >> 0) & 0xFFFF)
+#define PKT0_BASE_INDEX_C     0xFFFF0000
+#define PKT3_IT_OPCODE_S(x)   (((unsigned)(x)&0xFF) << 8)
+#define PKT3_IT_OPCODE_G(x)   (((x) >> 8) & 0xFF)
+#define PKT3_IT_OPCODE_C      0xFFFF00FF
+#define PKT3_PREDICATE(x)     (((x) >> 0) & 0x1)
+#define PKT3_SHADER_TYPE_S(x) (((unsigned)(x)&0x1) << 1)
+#define PKT0(index, count)    (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count))
+#define PKT3(op, count, predicate)                                                                 \
+   (PKT_TYPE_S(3) | PKT_COUNT_S(count) | PKT3_IT_OPCODE_S(op) | PKT3_PREDICATE(predicate))

-#define PKT2_NOP_PAD                    PKT_TYPE_S(2)
-#define PKT3_NOP_PAD                    PKT3(PKT3_NOP, 0x3fff, 0) /* header-only version */
+#define PKT2_NOP_PAD PKT_TYPE_S(2)
+#define PKT3_NOP_PAD PKT3(PKT3_NOP, 0x3fff, 0) /* header-only version */

-#define PKT3_CP_DMA					0x41
+#define PKT3_CP_DMA 0x41
 /* 1. header
 * 2. SRC_ADDR_LO [31:0] or DATA [31:0]
 * 3. CP_SYNC [31] | SRC_SEL [30:29] | ENGINE [27] | DST_SEL [21:20] | SRC_ADDR_HI [15:0]
@ -256,7 +256,7 @@
 * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
 */

-#define PKT3_DMA_DATA					0x50 /* new for CIK */
+#define PKT3_DMA_DATA 0x50 /* new for CIK */
 /* 1. header
 * 2. CP_SYNC [31] | SRC_SEL [30:29] | DST_SEL [21:20] | ENGINE [0]
 * 2. SRC_ADDR_LO [31:0] or DATA [31:0]
@ -267,69 +267,70 @@
 */

 /* SI async DMA packets */
-#define SI_DMA_PACKET(cmd, sub_cmd, n) ((((unsigned)(cmd) & 0xF) << 28) |    \
-                                       (((unsigned)(sub_cmd) & 0xFF) << 20) |\
-                                       (((unsigned)(n) & 0xFFFFF) << 0))
+#define SI_DMA_PACKET(cmd, sub_cmd, n)                                                             \
+   ((((unsigned)(cmd)&0xF) << 28) | (((unsigned)(sub_cmd)&0xFF) << 20) |                           \
+    (((unsigned)(n)&0xFFFFF) << 0))
 /* SI async DMA Packet types */
-#define    SI_DMA_PACKET_WRITE                     0x2
-#define    SI_DMA_PACKET_COPY                      0x3
-#define    SI_DMA_COPY_MAX_BYTE_ALIGNED_SIZE       0xfffe0
+#define SI_DMA_PACKET_WRITE               0x2
+#define SI_DMA_PACKET_COPY                0x3
+#define SI_DMA_COPY_MAX_BYTE_ALIGNED_SIZE 0xfffe0
 /* The documentation says 0xffff8 is the maximum size in dwords, which is
 * 0x3fffe0 in bytes. */
-#define    SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE      0x3fffe0
-#define    SI_DMA_COPY_DWORD_ALIGNED               0x00
-#define    SI_DMA_COPY_BYTE_ALIGNED                0x40
-#define    SI_DMA_COPY_TILED                       0x8
-#define    SI_DMA_PACKET_INDIRECT_BUFFER           0x4
-#define    SI_DMA_PACKET_SEMAPHORE                 0x5
-#define    SI_DMA_PACKET_FENCE                     0x6
-#define    SI_DMA_PACKET_TRAP                      0x7
-#define    SI_DMA_PACKET_SRBM_WRITE                0x9
-#define    SI_DMA_PACKET_CONSTANT_FILL             0xd
-#define    SI_DMA_PACKET_NOP                       0xf
+#define SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE 0x3fffe0
+#define SI_DMA_COPY_DWORD_ALIGNED          0x00
+#define SI_DMA_COPY_BYTE_ALIGNED           0x40
+#define SI_DMA_COPY_TILED                  0x8
+#define SI_DMA_PACKET_INDIRECT_BUFFER      0x4
+#define SI_DMA_PACKET_SEMAPHORE            0x5
+#define SI_DMA_PACKET_FENCE                0x6
+#define SI_DMA_PACKET_TRAP                 0x7
+#define SI_DMA_PACKET_SRBM_WRITE           0x9
+#define SI_DMA_PACKET_CONSTANT_FILL        0xd
+#define SI_DMA_PACKET_NOP                  0xf

 /* CIK async DMA packets */
-#define CIK_SDMA_PACKET(op, sub_op, n)   ((((unsigned)(n) & 0xFFFF) << 16) |	\
-					 (((unsigned)(sub_op) & 0xFF) << 8) |	\
-					 (((unsigned)(op) & 0xFF) << 0))
+#define CIK_SDMA_PACKET(op, sub_op, n)                                                             \
+   ((((unsigned)(n)&0xFFFF) << 16) | (((unsigned)(sub_op)&0xFF) << 8) |                            \
+    (((unsigned)(op)&0xFF) << 0))
 /* CIK async DMA packet types */
-#define    CIK_SDMA_OPCODE_NOP                     0x0
-#define    CIK_SDMA_OPCODE_COPY                    0x1
-#define        CIK_SDMA_COPY_SUB_OPCODE_LINEAR            0x0
-#define        CIK_SDMA_COPY_SUB_OPCODE_TILED             0x1
-#define        CIK_SDMA_COPY_SUB_OPCODE_SOA               0x3
-#define        CIK_SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW 0x4
-#define        CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW  0x5
-#define        CIK_SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW    0x6
-#define    CIK_SDMA_OPCODE_WRITE                   0x2
-#define        SDMA_WRITE_SUB_OPCODE_LINEAR               0x0
-#define        SDMA_WRTIE_SUB_OPCODE_TILED                0x1
-#define    CIK_SDMA_OPCODE_INDIRECT_BUFFER         0x4
-#define    CIK_SDMA_PACKET_FENCE                   0x5
-#define    CIK_SDMA_PACKET_TRAP                    0x6
-#define    CIK_SDMA_PACKET_SEMAPHORE               0x7
-#define    CIK_SDMA_PACKET_CONSTANT_FILL           0xb
-#define    CIK_SDMA_OPCODE_TIMESTAMP               0xd
-#define        SDMA_TS_SUB_OPCODE_SET_LOCAL_TIMESTAMP     0x0
-#define        SDMA_TS_SUB_OPCODE_GET_LOCAL_TIMESTAMP     0x1
-#define        SDMA_TS_SUB_OPCODE_GET_GLOBAL_TIMESTAMP    0x2
-#define    CIK_SDMA_PACKET_SRBM_WRITE              0xe
+#define CIK_SDMA_OPCODE_NOP                        0x0
+#define CIK_SDMA_OPCODE_COPY                       0x1
+#define CIK_SDMA_COPY_SUB_OPCODE_LINEAR            0x0
+#define CIK_SDMA_COPY_SUB_OPCODE_TILED             0x1
+#define CIK_SDMA_COPY_SUB_OPCODE_SOA               0x3
+#define CIK_SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW 0x4
+#define CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW  0x5
+#define CIK_SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW    0x6
+#define CIK_SDMA_OPCODE_WRITE                      0x2
+#define SDMA_WRITE_SUB_OPCODE_LINEAR               0x0
+#define SDMA_WRTIE_SUB_OPCODE_TILED                0x1
+#define CIK_SDMA_OPCODE_INDIRECT_BUFFER            0x4
+#define CIK_SDMA_PACKET_FENCE                      0x5
+#define CIK_SDMA_PACKET_TRAP                       0x6
+#define CIK_SDMA_PACKET_SEMAPHORE                  0x7
+#define CIK_SDMA_PACKET_CONSTANT_FILL              0xb
+#define CIK_SDMA_OPCODE_TIMESTAMP                  0xd
+#define SDMA_TS_SUB_OPCODE_SET_LOCAL_TIMESTAMP     0x0
+#define SDMA_TS_SUB_OPCODE_GET_LOCAL_TIMESTAMP     0x1
+#define SDMA_TS_SUB_OPCODE_GET_GLOBAL_TIMESTAMP    0x2
+#define CIK_SDMA_PACKET_SRBM_WRITE                 0xe
 /* There is apparently an undocumented HW limitation that
   prevents the HW from copying the last 255 bytes of (1 << 22) - 1 */
-#define    CIK_SDMA_COPY_MAX_SIZE                  0x3fff00  /* almost 4 MB*/
-#define    GFX103_SDMA_COPY_MAX_SIZE               0x3fffff00 /* almost 1 GB */
+#define CIK_SDMA_COPY_MAX_SIZE    0x3fff00   /* almost 4 MB*/
+#define GFX103_SDMA_COPY_MAX_SIZE 0x3fffff00 /* almost 1 GB */

-enum amd_cmp_class_flags {
-	S_NAN = 1 << 0,        // Signaling NaN
-	Q_NAN = 1 << 1,        // Quiet NaN
-	N_INFINITY = 1 << 2,   // Negative infinity
-	N_NORMAL = 1 << 3,     // Negative normal
-	N_SUBNORMAL = 1 << 4,  // Negative subnormal
-	N_ZERO = 1 << 5,       // Negative zero
-	P_ZERO = 1 << 6,       // Positive zero
-	P_SUBNORMAL = 1 << 7,  // Positive subnormal
-	P_NORMAL = 1 << 8,     // Positive normal
-	P_INFINITY = 1 << 9    // Positive infinity
+enum amd_cmp_class_flags
+{
+   S_NAN = 1 << 0,       // Signaling NaN
+   Q_NAN = 1 << 1,       // Quiet NaN
+   N_INFINITY = 1 << 2,  // Negative infinity
+   N_NORMAL = 1 << 3,    // Negative normal
+   N_SUBNORMAL = 1 << 4, // Negative subnormal
+   N_ZERO = 1 << 5,      // Negative zero
+   P_ZERO = 1 << 6,      // Positive zero
+   P_SUBNORMAL = 1 << 7, // Positive subnormal
+   P_NORMAL = 1 << 8,    // Positive normal
+   P_INFINITY = 1 << 9   // Positive infinity
 };

 #endif /* _SID_H */