From ecb0ccf60391edd190a22cb096abd8df3fcc49b5 Mon Sep 17 00:00:00 2001
From: Yonggang Luo <luoyonggang@gmail.com>
Date: Tue, 11 Nov 2025 15:33:54 +0800
Subject: [PATCH] treewide: Replace calling to function ALIGN with align
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is done by grep ALIGN( to align(

docs,*.xml,blake3 is excluded

Signed-off-by: Yonggang Luo <luoyonggang@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38365>
---
 src/amd/common/ac_gpu_info.c                  |  4 ++--
 src/amd/common/ac_rgp.c                       |  2 +-
 src/amd/common/ac_rgp_elf_object_pack.c       |  4 ++--
 src/amd/vulkan/layers/radv_sqtt_layer.c       |  2 +-
 src/amd/vulkan/radv_acceleration_structure.c  |  2 +-
 src/amd/vulkan/radv_pipeline.c                |  2 +-
 src/amd/vulkan/radv_queue.c                   |  6 ++---
 src/amd/vulkan/radv_sdma.c                    |  2 +-
 src/amd/vulkan/radv_shader.c                  |  4 ++--
 src/amd/vulkan/radv_shader_object.c           |  2 +-
 src/asahi/compiler/agx_compile.c              |  4 ++--
 src/asahi/layout/layout.h                     |  4 ++--
 src/asahi/vulkan/hk_descriptor_set_layout.c   |  2 +-
 src/compiler/glsl/gl_nir_link_varyings.c      | 10 ++++----
 src/compiler/nir/nir_lower_shader_calls.c     |  8 +++----
 src/compiler/nir/nir_lower_task_shader.c      |  6 ++---
 src/etnaviv/drm/etnaviv_bo_cache.c            |  2 +-
 src/etnaviv/drm/etnaviv_cmd_stream.c          |  2 +-
 src/freedreno/computerator/a6xx.cc            |  2 +-
 src/freedreno/decode/pgmdump.c                |  6 ++---
 src/freedreno/decode/pgmdump2.c               |  4 ++--
 src/freedreno/drm/freedreno_bo_heap.c         |  2 +-
 src/freedreno/ir3/ir3_compiler_nir.c          |  2 +-
 .../ir3/ir3_nir_analyze_ubo_ranges.c          |  2 +-
 src/freedreno/ir3/ir3_ra.c                    |  4 ++--
 src/freedreno/ir3/ir3_shader.c                |  2 +-
 src/freedreno/ir3/ir3_shared_ra.c             |  4 ++--
 src/freedreno/vulkan/tu_descriptor_set.cc     |  2 +-
 src/freedreno/vulkan/tu_pipeline.cc           |  2 +-
 src/freedreno/vulkan/tu_shader.cc             |  6 ++---
 src/freedreno/vulkan/tu_suballoc.cc           |  2 +-
 .../auxiliary/gallivm/lp_bld_nir_soa.c        |  2 +-
 src/gallium/drivers/crocus/crocus_batch.c     |  2 +-
 src/gallium/drivers/crocus/crocus_blorp.c     |  4 ++--
 src/gallium/drivers/crocus/crocus_context.c   |  2 +-
 .../drivers/crocus/crocus_program_cache.c     |  2 +-
 src/gallium/drivers/crocus/crocus_resource.c  |  4 ++--
 src/gallium/drivers/crocus/crocus_state.c     | 14 +++++------
 src/gallium/drivers/d3d12/d3d12_resource.cpp  |  6 ++---
 ...2_video_encoder_bitstream_builder_hevc.cpp |  4 ++--
 src/gallium/drivers/ethosu/ethosu_cmd.c       |  2 +-
 src/gallium/drivers/ethosu/ethosu_coefs.c     |  2 +-
 src/gallium/drivers/ethosu/ethosu_lower.c     |  2 +-
 src/gallium/drivers/ethosu/ethosu_ml.c        |  2 +-
 src/gallium/drivers/ethosu/ethosu_sched.c     | 24 +++++++++----------
 src/gallium/drivers/etnaviv/etnaviv_ml_nn.c   |  6 ++---
 .../drivers/etnaviv/etnaviv_ml_nn_v7.c        | 10 ++++----
 .../drivers/etnaviv/etnaviv_ml_nn_v8.c        |  4 ++--
 .../drivers/freedreno/a6xx/fd6_const.cc       |  2 +-
 .../drivers/freedreno/a6xx/fd6_gmem.cc        |  4 ++--
 .../drivers/freedreno/ir3/ir3_gallium.c       |  2 +-
 .../drivers/iris/i915/iris_kmd_backend.c      |  2 +-
 src/gallium/drivers/iris/iris_bufmgr.c        |  2 +-
 src/gallium/drivers/iris/iris_indirect_gen.c  |  2 +-
 src/gallium/drivers/iris/iris_resource.c      |  2 +-
 src/gallium/drivers/iris/iris_screen.c        |  2 +-
 src/gallium/drivers/iris/iris_state.c         |  8 +++----
 src/gallium/drivers/lima/drm-shim/lima_noop.c |  2 +-
 .../drivers/nouveau/codegen/nv50_ir.cpp       |  2 +-
 .../drivers/nouveau/nv50/nv50_compute.c       |  4 ++--
 src/gallium/drivers/radeonsi/si_shader.c      |  4 ++--
 .../drivers/radeonsi/si_shader_binary.c       |  2 +-
 src/gallium/drivers/radeonsi/si_sqtt.c        |  2 +-
 src/gallium/drivers/rocket/rkt_coefs.c        |  6 ++---
 src/gallium/drivers/rocket/rkt_ml.c           |  8 +++----
 src/gallium/drivers/rocket/rkt_task.c         |  8 +++----
 .../winsys/virgl/drm/virgl_drm_winsys.c       |  4 ++--
 .../winsys/virgl/vtest/virgl_vtest_winsys.c   |  4 ++--
 src/imagination/vulkan/pvr_image.c            | 14 +++++------
 src/imagination/vulkan/pvr_robustness.c       |  2 +-
 src/intel/blorp/blorp.c                       |  4 ++--
 src/intel/blorp/blorp_blit.c                  | 12 +++++-----
 src/intel/blorp/blorp_clear.c                 | 12 +++++-----
 src/intel/blorp/blorp_genX_exec_brw.h         |  4 ++--
 src/intel/blorp/blorp_genX_exec_elk.h         |  4 ++--
 src/intel/common/intel_urb_config.c           | 12 +++++-----
 src/intel/compiler/brw/brw_compile_fs.cpp     |  2 +-
 src/intel/compiler/brw/brw_compile_gs.cpp     |  6 ++---
 src/intel/compiler/brw/brw_compile_mesh.cpp   |  6 ++---
 src/intel/compiler/brw/brw_compile_tcs.cpp    |  2 +-
 src/intel/compiler/brw/brw_compile_tes.cpp    |  2 +-
 src/intel/compiler/brw/brw_eu_emit.c          |  2 +-
 src/intel/compiler/brw/brw_from_nir.cpp       |  4 ++--
 src/intel/compiler/brw/brw_inst.h             |  2 +-
 src/intel/compiler/brw/brw_lower.cpp          |  2 +-
 .../compiler/brw/brw_nir_analyze_ubo_ranges.c |  2 +-
 .../brw/brw_opt_combine_constants.cpp         |  2 +-
 src/intel/compiler/brw/brw_reg_allocate.cpp   |  8 +++----
 src/intel/compiler/brw/brw_rt.h               |  2 +-
 src/intel/compiler/elk/elk_eu_emit.c          |  2 +-
 src/intel/compiler/elk/elk_fs.cpp             | 10 ++++----
 .../compiler/elk/elk_fs_combine_constants.cpp |  4 ++--
 src/intel/compiler/elk/elk_fs_nir.cpp         |  8 +++----
 .../compiler/elk/elk_fs_reg_allocate.cpp      |  8 +++----
 .../compiler/elk/elk_nir_analyze_ubo_ranges.c |  2 +-
 src/intel/compiler/elk/elk_shader.cpp         |  2 +-
 .../compiler/elk/elk_vec4_gs_visitor.cpp      | 10 ++++----
 src/intel/compiler/elk/elk_vec4_tcs.cpp       |  2 +-
 src/intel/isl/isl_tiled_memcpy.c              |  2 +-
 src/intel/tools/aub_write.c                   |  4 ++--
 src/intel/vulkan/anv_cmd_buffer.c             |  2 +-
 src/intel/vulkan/anv_descriptor_set.c         |  6 ++---
 .../vulkan/anv_nir_compute_push_layout.c      |  6 ++---
 src/intel/vulkan/anv_physical_device.c        |  2 +-
 src/intel/vulkan/anv_shader_compile.c         |  2 +-
 src/intel/vulkan/anv_video.c                  |  2 +-
 src/intel/vulkan/genX_cmd_video.c             | 10 ++++----
 src/intel/vulkan/genX_shader.c                |  2 +-
 src/intel/vulkan/genX_simple_shader.c         |  2 +-
 src/intel/vulkan_hasvk/anv_cmd_buffer.c       |  2 +-
 src/intel/vulkan_hasvk/anv_descriptor_set.c   |  6 ++---
 .../anv_nir_compute_push_layout.c             |  2 +-
 src/intel/vulkan_hasvk/anv_pipeline.c         |  2 +-
 src/intel/vulkan_hasvk/genX_cmd_buffer.c      |  2 +-
 src/intel/vulkan_hasvk/genX_pipeline.c        |  2 +-
 .../vulkan/kk_descriptor_set_layout.c         |  2 +-
 src/microsoft/vulkan/dzn_cmd_buffer.c         |  2 +-
 src/microsoft/vulkan/dzn_descriptor_set.c     |  4 ++--
 .../vulkan/nvk_descriptor_set_layout.c        |  2 +-
 119 files changed, 246 insertions(+), 246 deletions(-)

diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 5c1299ff311..26c3195471e 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -2317,8 +2317,8 @@ void ac_get_task_info(const struct radeon_info *info,
    /* Ensure that the addresses of each ring are 256 byte aligned. */
    task_info->payload_entry_size = payload_entry_size;
    task_info->num_entries = num_entries;
-   task_info->draw_ring_offset = ALIGN(AC_TASK_CTRLBUF_BYTES, 256);
-   task_info->payload_ring_offset = ALIGN(task_info->draw_ring_offset + draw_ring_bytes, 256);
+   task_info->draw_ring_offset = align(AC_TASK_CTRLBUF_BYTES, 256);
+   task_info->payload_ring_offset = align(task_info->draw_ring_offset + draw_ring_bytes, 256);
    task_info->bo_size_bytes = task_info->payload_ring_offset + payload_ring_bytes;
 }
 
diff --git a/src/amd/common/ac_rgp.c b/src/amd/common/ac_rgp.c
index e273d636546..107189b0e9c 100644
--- a/src/amd/common/ac_rgp.c
+++ b/src/amd/common/ac_rgp.c
@@ -1046,7 +1046,7 @@ ac_sqtt_dump_data(const struct radeon_info *rad_info, struct ac_sqtt_trace *sqtt
                                       sizeof(struct sqtt_code_object_database_record),
                                       record, &elf_size_calc, flags);
          /* Align to 4 bytes per the RGP file spec. */
-         code_object_record.size = ALIGN(elf_size_calc, 4);
+         code_object_record.size = align(elf_size_calc, 4);
          fseek(output, file_offset, SEEK_SET);
          fwrite(&code_object_record, sizeof(struct sqtt_code_object_database_record),
                 1, output);
diff --git a/src/amd/common/ac_rgp_elf_object_pack.c b/src/amd/common/ac_rgp_elf_object_pack.c
index d84d71dff76..2e0bd05acfa 100644
--- a/src/amd/common/ac_rgp_elf_object_pack.c
+++ b/src/amd/common/ac_rgp_elf_object_pack.c
@@ -245,7 +245,7 @@ ac_rgp_write_msgpack(FILE *output,
                      }
             }
    ac_msgpack_resize_if_required(&msgpack, 4 - (msgpack.offset % 4));
-   msgpack.offset = ALIGN(msgpack.offset, 4);
+   msgpack.offset = align(msgpack.offset, 4);
    fwrite(msgpack.mem, 1, msgpack.offset, output);
    *written_size = msgpack.offset;
    ac_msgpack_destroy(&msgpack);
@@ -321,7 +321,7 @@ ac_rgp_file_write_elf_text(FILE *output, uint32_t *elf_size_calc,
    }
 
    symbol_offset += rgp_shader_data->code_size;
-   uint32_t aligned = ALIGN(symbol_offset, 256) - symbol_offset;
+   uint32_t aligned = align(symbol_offset, 256) - symbol_offset;
    fseek(output, aligned, SEEK_CUR);
    *elf_size_calc += aligned;
    *text_size = symbol_offset + aligned;
diff --git a/src/amd/vulkan/layers/radv_sqtt_layer.c b/src/amd/vulkan/layers/radv_sqtt_layer.c
index 847384ae56e..245e6f6c754 100644
--- a/src/amd/vulkan/layers/radv_sqtt_layer.c
+++ b/src/amd/vulkan/layers/radv_sqtt_layer.c
@@ -1393,7 +1393,7 @@ radv_fill_code_object_record(struct radv_device *device, struct rgp_shader_data
    shader_data->vgpr_count = shader->config.num_vgprs;
    shader_data->sgpr_count = shader->config.num_sgprs;
    shader_data->scratch_memory_size = shader->config.scratch_bytes_per_wave;
-   shader_data->lds_size = ALIGN(shader->config.lds_size, lds_increment);
+   shader_data->lds_size = align(shader->config.lds_size, lds_increment);
    shader_data->wavefront_size = shader->info.wave_size;
    shader_data->base_address = va & 0xffffffffffff;
    shader_data->elf_symbol_offset = 0;
diff --git a/src/amd/vulkan/radv_acceleration_structure.c b/src/amd/vulkan/radv_acceleration_structure.c
index 447ee443365..fe753276189 100644
--- a/src/amd/vulkan/radv_acceleration_structure.c
+++ b/src/amd/vulkan/radv_acceleration_structure.c
@@ -162,7 +162,7 @@ radv_get_acceleration_structure_layout(struct radv_device *device,
       offset += bvh_size / 64 * 4;
 
    /* The BVH and hence bvh_offset needs 64 byte alignment for RT nodes. */
-   offset = ALIGN(offset, 64);
+   offset = align(offset, 64);
    accel_struct->bvh_offset = offset;
 
    /* root node */
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 44bef619563..793c652dba3 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -543,7 +543,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
 
    if (radv_shader_should_clear_lds(device, stage->nir)) {
       const unsigned chunk_size = 16; /* max single store size */
-      const unsigned shared_size = ALIGN(stage->nir->info.shared_size, chunk_size);
+      const unsigned shared_size = align(stage->nir->info.shared_size, chunk_size);
       NIR_PASS(_, stage->nir, nir_clear_shared_memory, shared_size, chunk_size);
    }
 
diff --git a/src/amd/vulkan/radv_queue.c b/src/amd/vulkan/radv_queue.c
index 6343a219304..7a03e163bc7 100644
--- a/src/amd/vulkan/radv_queue.c
+++ b/src/amd/vulkan/radv_queue.c
@@ -146,9 +146,9 @@ radv_sparse_image_bind_memory(struct radv_device *device, const VkSparseImageMem
                                         (uint64_t)bind_offset.x * surface->prt_tile_height * surface->prt_tile_depth) *
                                           bs;
 
-      uint32_t aligned_extent_width = ALIGN(bind_extent.width, surface->prt_tile_width);
-      uint32_t aligned_extent_height = ALIGN(bind_extent.height, surface->prt_tile_height);
-      uint32_t aligned_extent_depth = ALIGN(bind_extent.depth, surface->prt_tile_depth);
+      uint32_t aligned_extent_width = align(bind_extent.width, surface->prt_tile_width);
+      uint32_t aligned_extent_height = align(bind_extent.height, surface->prt_tile_height);
+      uint32_t aligned_extent_depth = align(bind_extent.depth, surface->prt_tile_depth);
 
       bool whole_subres = (bind_extent.height <= surface->prt_tile_height || aligned_extent_width == pitch) &&
                           (bind_extent.depth <= surface->prt_tile_depth ||
diff --git a/src/amd/vulkan/radv_sdma.c b/src/amd/vulkan/radv_sdma.c
index cbdc70cdd0a..e32a16de2b9 100644
--- a/src/amd/vulkan/radv_sdma.c
+++ b/src/amd/vulkan/radv_sdma.c
@@ -134,7 +134,7 @@ radv_sdma_get_chunked_copy_info(const struct radv_device *const device, const st
 {
    const unsigned extent_horizontal_blocks = DIV_ROUND_UP(extent.width * img->texel_scale, img->blk_w);
    const unsigned extent_vertical_blocks = DIV_ROUND_UP(extent.height, img->blk_h);
-   const unsigned aligned_row_pitch = ALIGN(extent_horizontal_blocks, 4);
+   const unsigned aligned_row_pitch = align(extent_horizontal_blocks, 4);
    const unsigned aligned_row_bytes = aligned_row_pitch * img->bpp;
 
    /* Assume that we can always copy at least one full row at a time. */
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index e7e70dedd81..5e807cdd19b 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -760,7 +760,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
 
       if (nir->info.zero_initialize_shared_memory && nir->info.shared_size > 0) {
          const unsigned chunk_size = 16; /* max single store size */
-         const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
+         const unsigned shared_size = align(nir->info.shared_size, chunk_size);
          NIR_PASS(_, nir, nir_zero_initialize_shared_memory, shared_size, chunk_size);
       }
    }
@@ -945,7 +945,7 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage,
                &ngg_stage->info.ngg_lds_scratch_size);
    } else if (nir->info.stage == MESA_SHADER_MESH) {
       /* ACO aligns the workgroup size to the wave size. */
-      unsigned hw_workgroup_size = ALIGN(info->workgroup_size, info->wave_size);
+      unsigned hw_workgroup_size = align(info->workgroup_size, info->wave_size);
 
       bool scratch_ring = false;
       NIR_PASS(_, nir, ac_nir_lower_ngg_mesh, &pdev->info, options.export_clipdist_mask, options.vs_output_param_offset,
diff --git a/src/amd/vulkan/radv_shader_object.c b/src/amd/vulkan/radv_shader_object.c
index b99384c1073..d06c971cc0f 100644
--- a/src/amd/vulkan/radv_shader_object.c
+++ b/src/amd/vulkan/radv_shader_object.c
@@ -620,7 +620,7 @@ radv_get_shader_binary_size(const struct radv_shader_binary *binary)
    size_t size = sizeof(uint32_t); /* has_binary */
 
    if (binary)
-      size += SHA1_DIGEST_LENGTH + 4 + ALIGN(binary->total_size, 4);
+      size += SHA1_DIGEST_LENGTH + 4 + align(binary->total_size, 4);
 
    return size;
 }
diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c
index 3322aabd450..c2c7c3fb65b 100644
--- a/src/asahi/compiler/agx_compile.c
+++ b/src/asahi/compiler/agx_compile.c
@@ -3450,7 +3450,7 @@ agx_compile_function_nir(nir_shader *nir, nir_function_impl *impl,
     */
    if (ctx->any_scratch) {
       assert(!ctx->is_preamble && "preambles don't use scratch");
-      ctx->scratch_size_B = ALIGN(nir->scratch_size, 16);
+      ctx->scratch_size_B = align(nir->scratch_size, 16);
    }
 
    /* Stop the main shader or preamble shader after the exit block. For real
@@ -3509,7 +3509,7 @@ agx_compile_function_nir(nir_shader *nir, nir_function_impl *impl,
 
    if (ctx->scratch_size_B > 0) {
       /* Apple always allocate 40 more bytes in the entrypoint and align to 4. */
-      uint64_t stack_size = ALIGN(DIV_ROUND_UP(ctx->scratch_size_B, 4) + 10, 4);
+      uint64_t stack_size = align(DIV_ROUND_UP(ctx->scratch_size_B, 4) + 10, 4);
 
       assert(stack_size < INT16_MAX);
 
diff --git a/src/asahi/layout/layout.h b/src/asahi/layout/layout.h
index 05d29a6a47d..d7b28d0df46 100644
--- a/src/asahi/layout/layout.h
+++ b/src/asahi/layout/layout.h
@@ -339,10 +339,10 @@ static inline bool
 ail_is_level_allocated_compressed(const struct ail_layout *layout,
                                   unsigned level)
 {
-   unsigned width_sa = ALIGN(
+   unsigned width_sa = align(
       ail_effective_width_sa(layout->width_px, layout->sample_count_sa), 16);
 
-   unsigned height_sa = ALIGN(
+   unsigned height_sa = align(
       ail_effective_height_sa(layout->height_px, layout->sample_count_sa), 16);
 
    return layout->compressed &&
diff --git a/src/asahi/vulkan/hk_descriptor_set_layout.c b/src/asahi/vulkan/hk_descriptor_set_layout.c
index 05a3c2fa2f1..d7e81c6d868 100644
--- a/src/asahi/vulkan/hk_descriptor_set_layout.c
+++ b/src/asahi/vulkan/hk_descriptor_set_layout.c
@@ -80,7 +80,7 @@ hk_descriptor_stride_align_for_type(
          *stride = MAX2(*stride, desc_stride);
          *alignment = MAX2(*alignment, desc_align);
       }
-      *stride = ALIGN(*stride, *alignment);
+      *stride = align(*stride, *alignment);
       break;
 
    default:
diff --git a/src/compiler/glsl/gl_nir_link_varyings.c b/src/compiler/glsl/gl_nir_link_varyings.c
index e1f40f415dd..bac5163ba0f 100644
--- a/src/compiler/glsl/gl_nir_link_varyings.c
+++ b/src/compiler/glsl/gl_nir_link_varyings.c
@@ -2093,7 +2093,7 @@ xfb_decl_store(struct xfb_decl *xfb_decl, const struct gl_constants *consts,
       if (max_member_alignment && has_xfb_qualifiers) {
          max_member_alignment[buffer] = MAX2(max_member_alignment[buffer],
                                              _mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1);
-         info->Buffers[buffer].Stride = ALIGN(xfb_offset,
+         info->Buffers[buffer].Stride = align(xfb_offset,
                                               max_member_alignment[buffer]);
       } else {
          info->Buffers[buffer].Stride = xfb_offset;
@@ -2926,7 +2926,7 @@ varying_matches_assign_locations(struct varying_matches *vm,
           (previous_packing_class != vm->matches[i].packing_class) ||
           (vm->matches[i].packing_order == PACKING_ORDER_VEC3 &&
            dont_pack_vec3)) {
-         *location = ALIGN(*location, 4);
+         *location = align(*location, 4);
       }
 
       previous_var_xfb = var->data.is_xfb;
@@ -2967,7 +2967,7 @@ varying_matches_assign_locations(struct varying_matches *vm,
             break;
          }
 
-         *location = ALIGN(*location + 1, 4);
+         *location = align(*location + 1, 4);
          slot_end = *location + num_components - 1;
       }
 
@@ -3229,9 +3229,9 @@ tfeedback_candidate_generator(struct tfeedback_candidate_generator_state *state,
           * (c) each double-precision variable captured must be aligned to a
           *     multiple of eight bytes relative to the beginning of a vertex.
           */
-         state->xfb_offset_floats = ALIGN(state->xfb_offset_floats, 2);
+         state->xfb_offset_floats = align(state->xfb_offset_floats, 2);
          /* 64-bit members of structs are also aligned. */
-         state->varying_floats = ALIGN(state->varying_floats, 2);
+         state->varying_floats = align(state->varying_floats, 2);
       }
 
       candidate->xfb_offset_floats = state->xfb_offset_floats;
diff --git a/src/compiler/nir/nir_lower_shader_calls.c b/src/compiler/nir/nir_lower_shader_calls.c
index afa0b6f2ca6..e445718364d 100644
--- a/src/compiler/nir/nir_lower_shader_calls.c
+++ b/src/compiler/nir/nir_lower_shader_calls.c
@@ -586,7 +586,7 @@ spill_ssa_defs_and_lower_shader_calls(nir_shader *shader, uint32_t num_calls,
                   def = nir_b2b32(&before, def);
 
                const unsigned comp_size = def->bit_size / 8;
-               offset = ALIGN(offset, comp_size);
+               offset = align(offset, comp_size);
 
                new_def = spill_fill(&before, &after, def,
                                     index, call_idx,
@@ -617,7 +617,7 @@ spill_ssa_defs_and_lower_shader_calls(nir_shader *shader, uint32_t num_calls,
 
          nir_builder *b = &before;
 
-         offset = ALIGN(offset, options->stack_alignment);
+         offset = align(offset, options->stack_alignment);
          max_scratch_size = MAX2(max_scratch_size, offset);
 
          /* First thing on the called shader's stack is the resume address
@@ -1642,11 +1642,11 @@ nir_opt_sort_and_pack_stack(nir_shader *shader,
 
       unsigned scratch_size = start_call_scratch;
       util_dynarray_foreach(&ops, struct scratch_item, item) {
-         item->new_offset = ALIGN(scratch_size, item->bit_size / 8);
+         item->new_offset = align(scratch_size, item->bit_size / 8);
          scratch_size = item->new_offset + (item->bit_size * item->num_components) / 8;
          _mesa_hash_table_u64_insert(value_id_to_item, item->value, item);
       }
-      shader->scratch_size = ALIGN(scratch_size, stack_alignment);
+      shader->scratch_size = align(scratch_size, stack_alignment);
 
       /* Update offsets in the instructions */
       nir_foreach_block_safe(block, impl) {
diff --git a/src/compiler/nir/nir_lower_task_shader.c b/src/compiler/nir/nir_lower_task_shader.c
index 0207e207749..356505bb76d 100644
--- a/src/compiler/nir/nir_lower_task_shader.c
+++ b/src/compiler/nir/nir_lower_task_shader.c
@@ -132,7 +132,7 @@ static bool
 nir_lower_nv_task_count(nir_shader *shader)
 {
    lower_task_nv_state state = {
-      .task_count_shared_addr = ALIGN(shader->info.shared_size, 4),
+      .task_count_shared_addr = align(shader->info.shared_size, 4),
    };
 
    shader->info.shared_size += 4;
@@ -277,7 +277,7 @@ emit_shared_to_payload_copy(nir_builder *b,
       off += remaining_dwords * 4;
    }
 
-   assert(s->payload_offset_in_bytes + ALIGN(payload_size, 4) == off);
+   assert(s->payload_offset_in_bytes + align(payload_size, 4) == off);
 }
 
 static bool
@@ -453,7 +453,7 @@ nir_lower_task_shader(nir_shader *shader,
                             requires_payload_in_shared(shader, atomics, small_types);
 
    lower_task_state state = {
-      .payload_shared_addr = ALIGN(shader->info.shared_size, 16),
+      .payload_shared_addr = align(shader->info.shared_size, 16),
       .payload_in_shared = payload_in_shared,
       .payload_offset_in_bytes = options.payload_offset_in_bytes,
    };
diff --git a/src/etnaviv/drm/etnaviv_bo_cache.c b/src/etnaviv/drm/etnaviv_bo_cache.c
index 29cdada33b4..21bc8949d14 100644
--- a/src/etnaviv/drm/etnaviv_bo_cache.c
+++ b/src/etnaviv/drm/etnaviv_bo_cache.c
@@ -151,7 +151,7 @@ struct etna_bo *etna_bo_cache_alloc(struct etna_bo_cache *cache, uint32_t *size,
 	struct etna_bo *bo;
 	struct etna_bo_bucket *bucket;
 
-	*size = ALIGN(*size, 4096);
+	*size = align(*size, 4096);
 	bucket = get_bucket(cache, *size);
 
 	/* see if we can be green and recycle: */
diff --git a/src/etnaviv/drm/etnaviv_cmd_stream.c b/src/etnaviv/drm/etnaviv_cmd_stream.c
index 93a7a87930d..537e02a6a41 100644
--- a/src/etnaviv/drm/etnaviv_cmd_stream.c
+++ b/src/etnaviv/drm/etnaviv_cmd_stream.c
@@ -105,7 +105,7 @@ struct etna_cmd_stream *etna_cmd_stream_new(struct etna_pipe *pipe,
 	}
 
 	/* allocate even number of 32-bit words */
-	size = ALIGN(size, 2);
+	size = align(size, 2);
 
 	stream->base.buffer = malloc(size * sizeof(uint32_t));
 	if (!stream->base.buffer) {
diff --git a/src/freedreno/computerator/a6xx.cc b/src/freedreno/computerator/a6xx.cc
index 000c0b7179f..ef7f26b9762 100644
--- a/src/freedreno/computerator/a6xx.cc
+++ b/src/freedreno/computerator/a6xx.cc
@@ -257,7 +257,7 @@ cs_program_emit_regs(fd_cs &cs, struct kernel *kernel)
    if (v->pvtmem_size > 0) {
       uint32_t per_fiber_size = v->pvtmem_size;
       uint32_t per_sp_size =
-         ALIGN(per_fiber_size * a6xx_backend->info->fibers_per_sp, 1 << 12);
+         align(per_fiber_size * a6xx_backend->info->fibers_per_sp, 1 << 12);
       uint32_t total_size = per_sp_size * a6xx_backend->info->num_sp_cores;
 
       struct fd_bo *pvtmem = fd_bo_new(a6xx_backend->dev, total_size, 0, "pvtmem");
diff --git a/src/freedreno/decode/pgmdump.c b/src/freedreno/decode/pgmdump.c
index f557cef96f0..a8086149b6a 100644
--- a/src/freedreno/decode/pgmdump.c
+++ b/src/freedreno/decode/pgmdump.c
@@ -268,7 +268,7 @@ next_sect(struct state *state, int *sect_size)
    *sect_size = end - state->buf;
 
    /* copy the section to keep things nicely 32b aligned: */
-   sect = malloc(ALIGN(*sect_size, 4));
+   sect = malloc(align(*sect_size, 4));
    memcpy(sect, state->buf, *sect_size);
 
    state->sz -= *sect_size + 4;
@@ -584,8 +584,8 @@ dump_shaders_a3xx(struct state *state)
 
       if (!compact) {
          if (state->hdr->revision >= 7) {
-            instrs += ALIGN(instrs_size, 8) - instrs_size;
-            instrs_size = ALIGN(instrs_size, 8);
+            instrs += align(instrs_size, 8) - instrs_size;
+            instrs_size = align(instrs_size, 8);
          }
          instrs += 32;
          instrs_size -= 32;
diff --git a/src/freedreno/decode/pgmdump2.c b/src/freedreno/decode/pgmdump2.c
index 541bf36c787..44602f79c9d 100644
--- a/src/freedreno/decode/pgmdump2.c
+++ b/src/freedreno/decode/pgmdump2.c
@@ -347,9 +347,9 @@ decode_shader_descriptor_block(struct state *state,
             unsigned dwords = 2 * stats.instlen;
 
             if (gpu_id >= 400) {
-               dwords = ALIGN(dwords, 16 * 2);
+               dwords = align(dwords, 16 * 2);
             } else {
-               dwords = ALIGN(dwords, 4 * 2);
+               dwords = align(dwords, 4 * 2);
             }
 
             unsigned half_regs = state->half_regs;
diff --git a/src/freedreno/drm/freedreno_bo_heap.c b/src/freedreno/drm/freedreno_bo_heap.c
index 5ec17bc8e4c..7439e49ea4c 100644
--- a/src/freedreno/drm/freedreno_bo_heap.c
+++ b/src/freedreno/drm/freedreno_bo_heap.c
@@ -205,7 +205,7 @@ fd_bo_heap_alloc(struct fd_bo_heap *heap, uint32_t size, uint32_t flags)
     */
    size = MAX2(size, SUBALLOC_ALIGNMENT);
 
-   size = ALIGN(size, SUBALLOC_ALIGNMENT);
+   size = align(size, SUBALLOC_ALIGNMENT);
 
    simple_mtx_lock(&heap->lock);
    /* Allocate larger buffers from the bottom, and smaller buffers from top
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index e274c83f57a..3437bab6a79 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -6026,7 +6026,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
       ir3_instr_move_before(unlock, end);
    }
 
-   so->pvtmem_size = ALIGN(so->pvtmem_size, compiler->pvtmem_per_fiber_align);
+   so->pvtmem_size = align(so->pvtmem_size, compiler->pvtmem_per_fiber_align);
 
    /* Note that max_bary counts inputs that are not bary.f'd for FS: */
    if (so->type == MESA_SHADER_FRAGMENT)
diff --git a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c
index 6e3a77909c8..8fecd03213b 100644
--- a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c
+++ b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c
@@ -36,7 +36,7 @@ get_ubo_load_range(nir_shader *nir, nir_intrinsic_instr *instr,
       return false;
 
    r->start = ROUND_DOWN_TO(offset, alignment * 16);
-   r->end = ALIGN(offset + size, alignment * 16);
+   r->end = align(offset + size, alignment * 16);
 
    return true;
 }
diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c
index 60d3eadb837..7c402a6e78f 100644
--- a/src/freedreno/ir3/ir3_ra.c
+++ b/src/freedreno/ir3/ir3_ra.c
@@ -1237,7 +1237,7 @@ compress_regs_left(struct ra_ctx *ctx, struct ra_file *file,
       }
 
       if (!(cur_reg->flags & IR3_REG_HALF))
-         physreg = ALIGN(physreg, 2);
+         physreg = align(physreg, 2);
 
       d("pushing reg %u physreg %u\n", cur_reg->name, physreg);
 
@@ -1330,7 +1330,7 @@ find_best_gap(struct ra_ctx *ctx, struct ra_file *file,
    BITSET_WORD *available =
       is_early_clobber(dst) ? file->available_to_evict : file->available;
 
-   unsigned start = ALIGN(file->start, alignment);
+   unsigned start = align(file->start, alignment);
    if (start + size > file_size)
       start = 0;
    unsigned candidate = start;
diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c
index f2a8a66da58..9ac3b70b57e 100644
--- a/src/freedreno/ir3/ir3_shader.c
+++ b/src/freedreno/ir3/ir3_shader.c
@@ -37,7 +37,7 @@ ir3_const_ensure_imm_size(struct ir3_shader_variant *v, unsigned size)
    /* Immediates are uploaded in units of vec4 so make sure our buffer is large
     * enough.
     */
-   size = ALIGN(size, 4);
+   size = align(size, 4);
 
    /* Pre-a7xx, the immediates that get lowered to const registers are
     * emitted as part of the const state so the total size of immediates
diff --git a/src/freedreno/ir3/ir3_shared_ra.c b/src/freedreno/ir3/ir3_shared_ra.c
index 7ab2312b21f..17d92bbbb32 100644
--- a/src/freedreno/ir3/ir3_shared_ra.c
+++ b/src/freedreno/ir3/ir3_shared_ra.c
@@ -362,7 +362,7 @@ find_best_gap(struct ra_ctx *ctx, struct ir3_register *dst, unsigned size,
    if (size > file_size)
       return (physreg_t) ~0;
 
-   unsigned start = ALIGN(ctx->start, alignment);
+   unsigned start = align(ctx->start, alignment);
    if (start + size > file_size)
       start = 0;
    unsigned candidate = start;
@@ -395,7 +395,7 @@ find_best_spill_reg(struct ra_ctx *ctx, struct ir3_register *reg,
    unsigned file_size = reg_file_size(reg);
    unsigned min_cost = UINT_MAX;
 
-   unsigned start = ALIGN(ctx->start, alignment);
+   unsigned start = align(ctx->start, alignment);
    if (start + size > file_size)
       start = 0;
    physreg_t candidate = start;
diff --git a/src/freedreno/vulkan/tu_descriptor_set.cc b/src/freedreno/vulkan/tu_descriptor_set.cc
index 534f76dd618..81c7d793677 100644
--- a/src/freedreno/vulkan/tu_descriptor_set.cc
+++ b/src/freedreno/vulkan/tu_descriptor_set.cc
@@ -602,7 +602,7 @@ tu_descriptor_set_create(struct tu_device *device,
          &layout->binding[layout->binding_count - 1];
       if (binding->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
          layout_size = binding->offset +
-            ALIGN(variable_count, 4 * A6XX_TEX_CONST_DWORDS);
+            align(variable_count, 4 * A6XX_TEX_CONST_DWORDS);
       } else {
          uint32_t stride = binding->size;
          layout_size = binding->offset + variable_count * stride;
diff --git a/src/freedreno/vulkan/tu_pipeline.cc b/src/freedreno/vulkan/tu_pipeline.cc
index 00eaf574d8c..7f8542267a4 100644
--- a/src/freedreno/vulkan/tu_pipeline.cc
+++ b/src/freedreno/vulkan/tu_pipeline.cc
@@ -683,7 +683,7 @@ tu6_emit_link_map(struct tu_cs *cs,
                   enum a6xx_state_block sb)
 {
    const struct ir3_const_state *const_state = ir3_const_state(consumer);
-   uint32_t size = ALIGN(consumer->input_size, 4);
+   uint32_t size = align(consumer->input_size, 4);
 
    if (size == 0)
       return;
diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc
index 76f8ed30fad..a88fa3d2059 100644
--- a/src/freedreno/vulkan/tu_shader.cc
+++ b/src/freedreno/vulkan/tu_shader.cc
@@ -2289,9 +2289,9 @@ tu_setup_pvtmem(struct tu_device *dev,
          tu_bo_finish(dev, pvtmem_bo->bo);
 
       pvtmem_bo->per_fiber_size =
-         util_next_power_of_two(ALIGN(pvtmem_bytes, 512));
+         util_next_power_of_two(align(pvtmem_bytes, 512));
       pvtmem_bo->per_sp_size =
-         ALIGN(pvtmem_bo->per_fiber_size *
+         align(pvtmem_bo->per_fiber_size *
                   dev->physical_device->info->fibers_per_sp,
                1 << 12);
       uint32_t total_size =
@@ -2729,7 +2729,7 @@ tu_shader_create(struct tu_device *dev,
           * is allocated at the API level, and it's up to the user to ensure
           * that accesses are limited to those bounds.
           */
-         const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
+         const unsigned shared_size = align(nir->info.shared_size, chunk_size);
          NIR_PASS(_, nir, nir_zero_initialize_shared_memory, shared_size,
                   chunk_size);
       }
diff --git a/src/freedreno/vulkan/tu_suballoc.cc b/src/freedreno/vulkan/tu_suballoc.cc
index 0f7bcf35e8d..019e290c8fd 100644
--- a/src/freedreno/vulkan/tu_suballoc.cc
+++ b/src/freedreno/vulkan/tu_suballoc.cc
@@ -55,7 +55,7 @@ tu_suballoc_bo_alloc(struct tu_suballoc_bo *suballoc_bo,
 {
    struct tu_bo *bo = suballoc->bo;
    if (bo) {
-      uint32_t offset = ALIGN(suballoc->next_offset, alignment);
+      uint32_t offset = align(suballoc->next_offset, alignment);
       if (offset + size <= bo->size) {
          suballoc_bo->bo = tu_bo_get_ref(bo);
          suballoc_bo->iova = bo->iova + offset;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c
index 749269a478d..6e6aebb9db8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c
@@ -6073,7 +6073,7 @@ void lp_build_nir_soa_func(struct gallivm_state *gallivm,
 
    bld.shader = shader;
 
-   bld.scratch_size = ALIGN(shader->scratch_size, 8);
+   bld.scratch_size = align(shader->scratch_size, 8);
    if (params->scratch_ptr)
       bld.scratch_ptr = params->scratch_ptr;
    else if (shader->scratch_size) {
diff --git a/src/gallium/drivers/crocus/crocus_batch.c b/src/gallium/drivers/crocus/crocus_batch.c
index 9a8505e7c4b..868a72b4d2e 100644
--- a/src/gallium/drivers/crocus/crocus_batch.c
+++ b/src/gallium/drivers/crocus/crocus_batch.c
@@ -872,7 +872,7 @@ submit_batch(struct crocus_batch *batch)
       .buffer_count = batch->exec_count,
       .batch_start_offset = 0,
       /* This must be QWord aligned. */
-      .batch_len = ALIGN(batch->primary_batch_size, 8),
+      .batch_len = align(batch->primary_batch_size, 8),
       .flags = I915_EXEC_RENDER |
                I915_EXEC_NO_RELOC |
                I915_EXEC_BATCH_FIRST |
diff --git a/src/gallium/drivers/crocus/crocus_blorp.c b/src/gallium/drivers/crocus/crocus_blorp.c
index 313aa6c178d..46ccdd9bd9e 100644
--- a/src/gallium/drivers/crocus/crocus_blorp.c
+++ b/src/gallium/drivers/crocus/crocus_blorp.c
@@ -56,11 +56,11 @@ stream_state(struct crocus_batch *batch,
              uint32_t *out_offset,
              struct crocus_bo **out_bo)
 {
-   uint32_t offset = ALIGN(batch->state.used, alignment);
+   uint32_t offset = align(batch->state.used, alignment);
 
    if (offset + size >= STATE_SZ && !batch->no_wrap) {
       crocus_batch_flush(batch);
-      offset = ALIGN(batch->state.used, alignment);
+      offset = align(batch->state.used, alignment);
    } else if (offset + size >= batch->state.bo->size) {
       const unsigned new_size =
          MIN2(batch->state.bo->size + batch->state.bo->size / 2,
diff --git a/src/gallium/drivers/crocus/crocus_context.c b/src/gallium/drivers/crocus/crocus_context.c
index c766f6da5d9..46893b3ab8e 100644
--- a/src/gallium/drivers/crocus/crocus_context.c
+++ b/src/gallium/drivers/crocus/crocus_context.c
@@ -62,7 +62,7 @@ crocus_init_identifier_bo(struct crocus_context *ice)
       return false;
 
    ice->workaround_bo->kflags |= EXEC_OBJECT_CAPTURE;
-   ice->workaround_offset = ALIGN(
+   ice->workaround_offset = align(
       intel_debug_write_identifiers(bo_map, 4096, "Crocus"), 32);
 
    crocus_bo_unmap(ice->workaround_bo);
diff --git a/src/gallium/drivers/crocus/crocus_program_cache.c b/src/gallium/drivers/crocus/crocus_program_cache.c
index f03da30f0cb..88ccee9a83d 100644
--- a/src/gallium/drivers/crocus/crocus_program_cache.c
+++ b/src/gallium/drivers/crocus/crocus_program_cache.c
@@ -179,7 +179,7 @@ crocus_alloc_item_data(struct crocus_context *ice, uint32_t size)
    uint32_t offset = ice->shaders.cache_next_offset;
 
    /* Programs are always 64-byte aligned, so set up the next one now */
-   ice->shaders.cache_next_offset = ALIGN(offset + size, 64);
+   ice->shaders.cache_next_offset = align(offset + size, 64);
    return offset;
 }
 
diff --git a/src/gallium/drivers/crocus/crocus_resource.c b/src/gallium/drivers/crocus/crocus_resource.c
index 944792f4d19..e054fca9ea4 100644
--- a/src/gallium/drivers/crocus/crocus_resource.c
+++ b/src/gallium/drivers/crocus/crocus_resource.c
@@ -229,7 +229,7 @@ crocus_resource_configure_main(const struct crocus_screen *screen,
        devinfo->ver < 6) {
       /* align row pitch to 4 so we can keep using BLT engine */
       row_pitch_B = util_format_get_stride(templ->format, templ->width0);
-      row_pitch_B = ALIGN(row_pitch_B, 4);
+      row_pitch_B = align(row_pitch_B, 4);
    }
 
    const struct isl_surf_init_info init_info = {
@@ -1437,7 +1437,7 @@ crocus_map_tiled_memcpy(struct crocus_transfer *map)
    struct crocus_resource *res = (struct crocus_resource *) xfer->resource;
    struct isl_surf *surf = &res->surf;
 
-   xfer->stride = ALIGN(surf->row_pitch_B, 16);
+   xfer->stride = align(surf->row_pitch_B, 16);
    xfer->layer_stride = xfer->stride * box->height;
 
    unsigned x1, x2, y1, y2;
diff --git a/src/gallium/drivers/crocus/crocus_state.c b/src/gallium/drivers/crocus/crocus_state.c
index 888f22b734a..15cbf380476 100644
--- a/src/gallium/drivers/crocus/crocus_state.c
+++ b/src/gallium/drivers/crocus/crocus_state.c
@@ -329,11 +329,11 @@ stream_state(struct crocus_batch *batch,
              unsigned alignment,
              uint32_t *out_offset)
 {
-   uint32_t offset = ALIGN(batch->state.used, alignment);
+   uint32_t offset = align(batch->state.used, alignment);
 
    if (offset + size >= STATE_SZ && !batch->no_wrap) {
       crocus_batch_flush(batch);
-      offset = ALIGN(batch->state.used, alignment);
+      offset = align(batch->state.used, alignment);
    } else if (offset + size >= batch->state.bo->size) {
       const unsigned new_size =
          MIN2(batch->state.bo->size + batch->state.bo->size / 2,
@@ -6591,7 +6591,7 @@ crocus_upload_dirty_render_state(struct crocus_context *ice,
             struct crocus_resource *res = (void *) tgt->base.buffer;
             uint32_t start = tgt->base.buffer_offset;
 #if GFX_VER < 8
-            uint32_t end = ALIGN(start + tgt->base.buffer_size, 4);
+            uint32_t end = align(start + tgt->base.buffer_size, 4);
 #endif
             crocus_emit_cmd(batch, GENX(3DSTATE_SO_BUFFER), sob) {
                sob.SOBufferIndex = i;
@@ -8143,7 +8143,7 @@ crocus_upload_compute_state(struct crocus_context *ice,
          vfe.URBEntryAllocationSize = GFX_VER == 8 ? 2 : 0;
 
          vfe.CURBEAllocationSize =
-            ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads +
+            align(cs_prog_data->push.per_thread.regs * dispatch.threads +
                   cs_prog_data->push.cross_thread.regs, 2);
       }
    }
@@ -8159,15 +8159,15 @@ crocus_upload_compute_state(struct crocus_context *ice,
          elk_cs_push_const_total_size(cs_prog_data, dispatch.threads);
       uint32_t *curbe_data_map =
          stream_state(batch,
-                      ALIGN(push_const_size, 64), 64,
+                      align(push_const_size, 64), 64,
                       &curbe_data_offset);
       assert(curbe_data_map);
-      memset(curbe_data_map, 0x5a, ALIGN(push_const_size, 64));
+      memset(curbe_data_map, 0x5a, align(push_const_size, 64));
       crocus_fill_cs_push_const_buffer(cs_prog_data, dispatch.threads,
                                        curbe_data_map);
 
       crocus_emit_cmd(batch, GENX(MEDIA_CURBE_LOAD), curbe) {
-         curbe.CURBETotalDataLength = ALIGN(push_const_size, 64);
+         curbe.CURBETotalDataLength = align(push_const_size, 64);
          curbe.CURBEDataStartAddress = curbe_data_offset;
       }
    }
diff --git a/src/gallium/drivers/d3d12/d3d12_resource.cpp b/src/gallium/drivers/d3d12/d3d12_resource.cpp
index a1a0e81df26..2b5e995901d 100644
--- a/src/gallium/drivers/d3d12/d3d12_resource.cpp
+++ b/src/gallium/drivers/d3d12/d3d12_resource.cpp
@@ -1199,11 +1199,11 @@ fill_buffer_location(struct d3d12_context *ctx,
       buf_loc.PlacedFootprint.Footprint.Height = res->base.b.height0;
       buf_loc.PlacedFootprint.Footprint.Depth = res->base.b.depth0;
    } else {
-      buf_loc.PlacedFootprint.Footprint.Width = ALIGN(trans->base.b.box.width,
+      buf_loc.PlacedFootprint.Footprint.Width = align(trans->base.b.box.width,
                                                       util_format_get_blockwidth(res->base.b.format));
-      buf_loc.PlacedFootprint.Footprint.Height = ALIGN(trans->base.b.box.height,
+      buf_loc.PlacedFootprint.Footprint.Height = align(trans->base.b.box.height,
                                                        util_format_get_blockheight(res->base.b.format));
-      buf_loc.PlacedFootprint.Footprint.Depth = ALIGN(depth,
+      buf_loc.PlacedFootprint.Footprint.Depth = align(depth,
                                                       util_format_get_blockdepth(res->base.b.format));
    }
 
diff --git a/src/gallium/drivers/d3d12/d3d12_video_encoder_bitstream_builder_hevc.cpp b/src/gallium/drivers/d3d12/d3d12_video_encoder_bitstream_builder_hevc.cpp
index 640ddf9ce02..5c73710f03f 100644
--- a/src/gallium/drivers/d3d12/d3d12_video_encoder_bitstream_builder_hevc.cpp
+++ b/src/gallium/drivers/d3d12/d3d12_video_encoder_bitstream_builder_hevc.cpp
@@ -479,8 +479,8 @@ d3d12_video_bitstream_builder_hevc::build_sps(const HevcVideoParameterSet& paren
    viewport.Width = crop_window_upper_layer.front /* passes width */ - ((crop_window_upper_layer.left + crop_window_upper_layer.right) * SubWidthC);
    viewport.Height = crop_window_upper_layer.back /* passes height */- ((crop_window_upper_layer.top + crop_window_upper_layer.bottom) * SubHeightC);
 
-   m_latest_sps.pic_width_in_luma_samples = ALIGN(encodeResolution.Width, picDimensionMultipleRequirement);
-   m_latest_sps.pic_height_in_luma_samples = ALIGN(encodeResolution.Height, picDimensionMultipleRequirement);
+   m_latest_sps.pic_width_in_luma_samples = align(encodeResolution.Width, picDimensionMultipleRequirement);
+   m_latest_sps.pic_height_in_luma_samples = align(encodeResolution.Height, picDimensionMultipleRequirement);
    m_latest_sps.conf_win_right_offset = (m_latest_sps.pic_width_in_luma_samples - viewport.Width) / SubWidthC;
    m_latest_sps.conf_win_bottom_offset = (m_latest_sps.pic_height_in_luma_samples - viewport.Height) / SubHeightC;
 
diff --git a/src/gallium/drivers/ethosu/ethosu_cmd.c b/src/gallium/drivers/ethosu/ethosu_cmd.c
index e1e8fdfec78..eda454cdadc 100644
--- a/src/gallium/drivers/ethosu/ethosu_cmd.c
+++ b/src/gallium/drivers/ethosu/ethosu_cmd.c
@@ -90,7 +90,7 @@ emit_strides(
    if (tensor->layout == ETHOSU_LAYOUT_NHCWB16) {
       tensor_x = 16 * elem_size;
       tensor_c = tensor_x * tensor->shape.width;
-      tensor_y = elem_size * tensor->shape.width * ALIGN(tensor->shape.depth, 16);
+      tensor_y = elem_size * tensor->shape.width * align(tensor->shape.depth, 16);
    } else {
       tensor_c = elem_size;
       tensor_x = tensor->shape.depth * tensor_c;
diff --git a/src/gallium/drivers/ethosu/ethosu_coefs.c b/src/gallium/drivers/ethosu/ethosu_coefs.c
index a46cc3370cd..fe793fcc321 100644
--- a/src/gallium/drivers/ethosu/ethosu_coefs.c
+++ b/src/gallium/drivers/ethosu/ethosu_coefs.c
@@ -16,7 +16,7 @@ fill_scale_and_biases(struct ethosu_subgraph *subgraph, struct ethosu_operation
                                      PIPE_MAP_READ, &transfer_in);
    unsigned idx = 0;
 
-   *scales_size = ALIGN(operation->ofm.shape.depth * 10, 16);
+   *scales_size = align(operation->ofm.shape.depth * 10, 16);
    *scales = malloc(*scales_size);
    memset(*scales, 0, *scales_size);
 
diff --git a/src/gallium/drivers/ethosu/ethosu_lower.c b/src/gallium/drivers/ethosu/ethosu_lower.c
index 97fc8cc9f7a..a6286ba0d05 100644
--- a/src/gallium/drivers/ethosu/ethosu_lower.c
+++ b/src/gallium/drivers/ethosu/ethosu_lower.c
@@ -228,7 +228,7 @@ ethosu_lower_concatenation(struct ethosu_subgraph *subgraph,
       if (tensor->layout == ETHOSU_LAYOUT_NHWC)
          operation->ofm.tiles.addresses[0] += poperation->input_tensors[i]->dims[3];
       else if (tensor->layout == ETHOSU_LAYOUT_NHCWB16)
-         operation->ofm.tiles.addresses[0] += poperation->input_tensors[i]->dims[2] * ALIGN(poperation->input_tensors[i]->dims[3], 16);
+         operation->ofm.tiles.addresses[0] += poperation->input_tensors[i]->dims[2] * align(poperation->input_tensors[i]->dims[3], 16);
       else
          assert(0 && "Unsupported layout");
    }
diff --git a/src/gallium/drivers/ethosu/ethosu_ml.c b/src/gallium/drivers/ethosu/ethosu_ml.c
index 82eb0716d72..8230bdc7661 100644
--- a/src/gallium/drivers/ethosu/ethosu_ml.c
+++ b/src/gallium/drivers/ethosu/ethosu_ml.c
@@ -69,7 +69,7 @@ ethosu_allocate_feature_map(struct ethosu_subgraph *subgraph, struct ethosu_feat
    if (tensor->layout == ETHOSU_LAYOUT_NHWC) {
       size = tensor->shape.width * tensor->shape.height * tensor->shape.depth;
    } else if (tensor->layout == ETHOSU_LAYOUT_NHCWB16) {
-      size = tensor->shape.width * tensor->shape.height * ALIGN(tensor->shape.depth, 16);
+      size = tensor->shape.width * tensor->shape.height * align(tensor->shape.depth, 16);
    } else {
       assert(0 && "Unsupported layout");
       size = 0; // This should never happen
diff --git a/src/gallium/drivers/ethosu/ethosu_sched.c b/src/gallium/drivers/ethosu/ethosu_sched.c
index 45021362402..0e5cd122c25 100644
--- a/src/gallium/drivers/ethosu/ethosu_sched.c
+++ b/src/gallium/drivers/ethosu/ethosu_sched.c
@@ -18,11 +18,11 @@ _get_ifm_blocksize(struct ethosu_operation *operation, struct ethosu_block ofm_b
 
    // IFM block height
    int h = required_input_size(ofm_block.height, operation->kernel.stride_y, MIN2(operation->kernel.height, SUB_KERNEL_MAX.height));
-   h = ALIGN(h, OFM_UBLOCK.height);
+   h = align(h, OFM_UBLOCK.height);
 
    // IFM block width
    int w = required_input_size(ofm_block.width, operation->kernel.stride_x, MIN2(operation->kernel.width, SUB_KERNEL_MAX.width));
-   w = ALIGN(w, OFM_UBLOCK.width);
+   w = align(w, OFM_UBLOCK.width);
 
    ifm_block.height = h;
    ifm_block.width = w;
@@ -34,8 +34,8 @@ _get_ifm_blocksize(struct ethosu_operation *operation, struct ethosu_block ofm_b
 static bool
 try_block_config(struct ethosu_operation *operation, struct ethosu_block ofm_block, struct ethosu_block ifm_block, struct ethosu_shram_layout *layout)
 {
-   int ifm_bytes = ifm_block.width * ifm_block.height * ALIGN(ifm_block.depth, 8);
-   int ifm_banks = ALIGN(DIV_ROUND_UP(ifm_bytes, BANK_SIZE_BYTES) * 2, IFM_GRANULE);
+   int ifm_bytes = ifm_block.width * ifm_block.height * align(ifm_block.depth, 8);
+   int ifm_banks = align(DIV_ROUND_UP(ifm_bytes, BANK_SIZE_BYTES) * 2, IFM_GRANULE);
    int lut_bytes = operation->type == ETHOSU_OPERATION_TYPE_ELTWISE ? operation->eltwise.lut_bytes : 0;
    int lut_banks = MAX2(DIV_ROUND_UP(lut_bytes, 1024), SHRAM_RESERVED_END_BANKS);
    int lut_start = SHRAM_TOTAL_BANKS - lut_banks;
@@ -44,8 +44,8 @@ try_block_config(struct ethosu_operation *operation, struct ethosu_block ofm_blo
    int acc_start = lut_start;
 
    if (operation->type != ETHOSU_OPERATION_TYPE_ELTWISE) {
-      int acc_bytes = (ofm_block.width * ofm_block.height * ALIGN(ofm_block.depth, 8) * 32) / 8;
-      int acc_banks = ALIGN(DIV_ROUND_UP(acc_bytes, BANK_SIZE_BYTES) * 2, ACC_GRANULE);
+      int acc_bytes = (ofm_block.width * ofm_block.height * align(ofm_block.depth, 8) * 32) / 8;
+      int acc_banks = align(DIV_ROUND_UP(acc_bytes, BANK_SIZE_BYTES) * 2, ACC_GRANULE);
       acc_start -= acc_banks;
    } else {
       int ifm2_banks = ifm_banks; /* TODO: Fix for scalar eltwise */
@@ -89,12 +89,12 @@ find_block_config(struct ethosu_operation *operation)
    unsigned depth = MAX2(OFM_UBLOCK.depth, MIN2(search_space.depth, ARCH_SPLIT_DEPTH));
 
    if (depth < operation->ofm.shape.depth) {
-      depth = ALIGN(depth, ARCH_SPLIT_DEPTH);
+      depth = align(depth, ARCH_SPLIT_DEPTH);
    }
 
-   search_space.width = ALIGN(search_space.width, OFM_UBLOCK.width);
-   search_space.height = ALIGN(search_space.height, OFM_UBLOCK.height);
-   search_space.depth = ALIGN(search_space.depth, OFM_UBLOCK.depth);
+   search_space.width = align(search_space.width, OFM_UBLOCK.width);
+   search_space.height = align(search_space.height, OFM_UBLOCK.height);
+   search_space.depth = align(search_space.depth, OFM_UBLOCK.depth);
 
    while (depth <= search_space.depth) {
       bool wont_fit[search_space.height + 1][search_space.width + 1];
@@ -110,7 +110,7 @@ find_block_config(struct ethosu_operation *operation)
             struct ethosu_block ifm_block = _get_ifm_blocksize(operation, ofm_block);
 
             if (!is_equal_depth)
-               ifm_block.depth = ALIGN(MIN2(operation->ifm.shape.depth, operation->conv.part_kernel_first ? 16 : 32), IFM_UBLOCK.depth);
+               ifm_block.depth = align(MIN2(operation->ifm.shape.depth, operation->conv.part_kernel_first ? 16 : 32), IFM_UBLOCK.depth);
 
             // Try to fit the blocks in SHRAM
             struct ethosu_shram_layout layout = {0};
@@ -179,7 +179,7 @@ find_block_config(struct ethosu_operation *operation)
 
       depth += OFM_UBLOCK.depth;
       if (depth < operation->ofm.shape.depth) {
-         depth = ALIGN(depth, ARCH_SPLIT_DEPTH);
+         depth = align(depth, ARCH_SPLIT_DEPTH);
       }
    }
 
diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c b/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c
index 32a282e0292..cba022648fb 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c
@@ -1167,9 +1167,9 @@ create_nn_config(struct etna_ml_subgraph *subgraph, const struct etna_operation
       unsigned in_image_tile_x_size = map->out_image_tile_x_size + weight_width - 1;
       unsigned in_image_tile_y_size = map->out_image_tile_y_size + weight_width - 1;
       image_cache_size = in_image_tile_x_size * in_image_tile_y_size;
-      image_cache_size = ALIGN(image_cache_size, 16);
+      image_cache_size = align(image_cache_size, 16);
       image_cache_size *= input_channels;
-      image_cache_size = ALIGN(image_cache_size, 128);
+      image_cache_size = align(image_cache_size, 128);
    }
 
    ML_DBG("coefficients_size 0x%x (%d) image_size 0x%x (%d)\n", coef_cache_size, coef_cache_size, image_cache_size, image_cache_size);
@@ -1182,7 +1182,7 @@ create_nn_config(struct etna_ml_subgraph *subgraph, const struct etna_operation
       map->kernel_pattern_msb = 0x0;
       map->kernel_pattern_low = 0x0;
       map->kernel_pattern_high = 0x0;
-      map->kernel_cache_end_address = MAX2(MIN2(ALIGN(map->kernel_cache_start_address + coef_cache_size, 128), oc_sram_size), 0xa00);
+      map->kernel_cache_end_address = MAX2(MIN2(align(map->kernel_cache_start_address + coef_cache_size, 128), oc_sram_size), 0xa00);
    } else {
       /* Doesn't fit in the 512KB we have of on-chip SRAM */
       map->kernel_caching_mode = SRAM_CACHE_MODE_PARTIAL_CACHE;
diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml_nn_v7.c b/src/gallium/drivers/etnaviv/etnaviv_ml_nn_v7.c
index 22e18309cee..86dce8f6d5b 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_ml_nn_v7.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_ml_nn_v7.c
@@ -441,7 +441,7 @@ calculate_weight_bo_size(struct etna_ml_subgraph *subgraph, const struct etna_op
    struct pipe_context *context = subgraph->base.context;
    struct etna_context *ctx = etna_context(context);
    unsigned nn_core_count = etna_ml_get_core_info(ctx)->nn_core_count;
-   unsigned header_size = ALIGN(nn_core_count * 4, 64);
+   unsigned header_size = align(nn_core_count * 4, 64);
    unsigned input_channels = operation->addition ? 1 : operation->input_channels;
    unsigned output_channels = operation->addition ? 1 : operation->output_channels;
    unsigned cores_used = MIN2(output_channels, nn_core_count);
@@ -453,7 +453,7 @@ calculate_weight_bo_size(struct etna_ml_subgraph *subgraph, const struct etna_op
 
    weights_size = operation->weight_width * operation->weight_height * input_channels;
    core_size = 1 + 2 + (weights_size + 4 + 4) * kernels_per_core;
-   core_size_aligned = ALIGN(core_size, 64);
+   core_size_aligned = align(core_size, 64);
    compressed_size_aligned = header_size + core_size_aligned * cores_used;
 
    return compressed_size_aligned;
@@ -466,7 +466,7 @@ calculate_zrl_bits(struct etna_ml_subgraph *subgraph, const struct etna_operatio
    struct etna_context *ctx = etna_context(context);
    unsigned nn_core_count = etna_ml_get_core_info(ctx)->nn_core_count;
    unsigned max_zrl_bits = etna_ml_get_core_info(ctx)->nn_zrl_bits;
-   unsigned header_size = ALIGN(nn_core_count * 4, 64);
+   unsigned header_size = align(nn_core_count * 4, 64);
    unsigned input_channels = operation->addition ? 1 : operation->input_channels;
    unsigned output_channels = operation->addition ? 1 : operation->output_channels;
    unsigned cores_used = MIN2(output_channels, nn_core_count);
@@ -518,7 +518,7 @@ etna_ml_create_coeffs_v7(struct etna_ml_subgraph *subgraph, const struct etna_op
    struct pipe_context *context = subgraph->base.context;
    struct etna_context *ctx = etna_context(context);
    unsigned nn_core_count = etna_ml_get_core_info(ctx)->nn_core_count;
-   unsigned header_size = ALIGN(nn_core_count * 4, 64);
+   unsigned header_size = align(nn_core_count * 4, 64);
    unsigned input_channels = operation->addition ? 1 : operation->input_channels;
    unsigned output_channels = operation->addition ? 1 : operation->output_channels;
    unsigned cores_used = MIN2(output_channels, nn_core_count);
@@ -548,7 +548,7 @@ etna_ml_create_coeffs_v7(struct etna_ml_subgraph *subgraph, const struct etna_op
       else
          actual_size = write_core_sequential(subgraph, map, core, operation, zrl_bits);
 
-      actual_size = ALIGN(actual_size, 64);
+      actual_size = align(actual_size, 64);
       max_core_size = MAX2(actual_size, max_core_size);
 
       header[core] = actual_size;
diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml_nn_v8.c b/src/gallium/drivers/etnaviv/etnaviv_ml_nn_v8.c
index d9b23e74fce..0224b5702b9 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_ml_nn_v8.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_ml_nn_v8.c
@@ -685,7 +685,7 @@ create_bo(struct etna_ml_subgraph *subgraph, const struct etna_operation *operat
       input_channels = 2 * output_channels;
 
    unsigned header_size = 64;
-   unsigned body_size = ALIGN(DIV_ROUND_UP(output_channels, cores_used) * (input_channels * operation->weight_height * operation->weight_width + 4 + 4), 64) * 2;
+   unsigned body_size = align(DIV_ROUND_UP(output_channels, cores_used) * (input_channels * operation->weight_height * operation->weight_width + 4 + 4), 64) * 2;
    unsigned tail_size = 64;
    max_size = header_size + cores_used * body_size + tail_size;
 
@@ -803,7 +803,7 @@ etna_ml_create_coeffs_v8(struct etna_ml_subgraph *subgraph, const struct etna_op
    header->symbol_map = pack_symbol_map(symbol_map);
    header->version = 1;
 
-   map += ALIGN(sizeof(*header), 64) / 4;
+   map += align(sizeof(*header), 64) / 4;
 
    encoder_init(&encoder, symbol_map, map);
 
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_const.cc b/src/gallium/drivers/freedreno/a6xx/fd6_const.cc
index 4ae4efa3287..38011ca126b 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_const.cc
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_const.cc
@@ -553,7 +553,7 @@ fd6_emit_link_map(struct fd_context *ctx, fd_cs &cs,
    if (CHIP == A7XX && producer->compiler->load_shader_consts_via_preamble) {
       const struct ir3_const_state *const_state = ir3_const_state(consumer);
       int base = const_state->primitive_map_ubo.idx;
-      uint32_t size = ALIGN(consumer->input_size, 4);
+      uint32_t size = align(consumer->input_size, 4);
 
       fd6_upload_emit_driver_ubo(ctx, cs, consumer, base, size, producer->output_loc);
    } else {
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.cc b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.cc
index 1eecbe5bfb5..ed48933d12e 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.cc
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.cc
@@ -1366,8 +1366,8 @@ set_blit_scissor(struct fd_batch *batch, fd_cs &cs)
 
    blit_scissor.minx = 0;
    blit_scissor.miny = 0;
-   blit_scissor.maxx = ALIGN(pfb->width, 16);
-   blit_scissor.maxy = ALIGN(pfb->height, 4);
+   blit_scissor.maxx = align(pfb->width, 16);
+   blit_scissor.maxy = align(pfb->height, 4);
 
    fd_pkt4(cs, 2)
       .add(A6XX_RB_RESOLVE_CNTL_1(.x = blit_scissor.minx, .y = blit_scissor.miny))
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c
index 2d5edb3cd1d..1ae0c0d89c5 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c
@@ -647,7 +647,7 @@ ir3_get_private_mem(struct fd_context *ctx, const struct ir3_shader_variant *so)
       if (ctx->pvtmem[so->pvtmem_per_wave].bo)
          fd_bo_del(ctx->pvtmem[so->pvtmem_per_wave].bo);
 
-      uint32_t per_sp_size = ALIGN(per_fiber_size * fibers_per_sp, 1 << 12);
+      uint32_t per_sp_size = align(per_fiber_size * fibers_per_sp, 1 << 12);
       uint32_t total_size = per_sp_size * num_sp_cores;
 
       ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size = per_fiber_size;
diff --git a/src/gallium/drivers/iris/i915/iris_kmd_backend.c b/src/gallium/drivers/iris/i915/iris_kmd_backend.c
index 745bee79ed3..65beb19118c 100644
--- a/src/gallium/drivers/iris/i915/iris_kmd_backend.c
+++ b/src/gallium/drivers/iris/i915/iris_kmd_backend.c
@@ -372,7 +372,7 @@ i915_batch_submit(struct iris_batch *batch)
       .buffer_count = validation_count,
       .batch_start_offset = 0,
       /* This must be QWord aligned. */
-      .batch_len = ALIGN(batch->primary_batch_size, 8),
+      .batch_len = align(batch->primary_batch_size, 8),
       .flags = batch->i915.exec_flags |
                I915_EXEC_NO_RELOC |
                I915_EXEC_BATCH_FIRST |
diff --git a/src/gallium/drivers/iris/iris_bufmgr.c b/src/gallium/drivers/iris/iris_bufmgr.c
index 1fd412fd252..fafe2eed0de 100644
--- a/src/gallium/drivers/iris/iris_bufmgr.c
+++ b/src/gallium/drivers/iris/iris_bufmgr.c
@@ -2305,7 +2305,7 @@ intel_aux_map_buffer_alloc(void *driver_ctx, uint32_t size)
    struct iris_bufmgr *bufmgr = (struct iris_bufmgr *)driver_ctx;
 
    unsigned int page_size = getpagesize();
-   size = MAX2(ALIGN(size, page_size), page_size);
+   size = MAX2(align(size, page_size), page_size);
 
    struct iris_bo *bo = alloc_fresh_bo(bufmgr, size, BO_ALLOC_CAPTURE);
    if (!bo) {
diff --git a/src/gallium/drivers/iris/iris_indirect_gen.c b/src/gallium/drivers/iris/iris_indirect_gen.c
index bcf1c1be615..7b2a19b3103 100644
--- a/src/gallium/drivers/iris/iris_indirect_gen.c
+++ b/src/gallium/drivers/iris/iris_indirect_gen.c
@@ -398,7 +398,7 @@ emit_indirect_generate_draw(struct iris_batch *batch,
    float *vertices =
       upload_state(batch, ice->state.dynamic_uploader,
                    &ice->draw.generation.vertices,
-                   ALIGN(9 * sizeof(float), 8), 8);
+                   align(9 * sizeof(float), 8), 8);
 
    vertices[0] = x1; vertices[1] = y1; vertices[2] = z; /* v0 */
    vertices[3] = x0; vertices[4] = y1; vertices[5] = z; /* v1 */
diff --git a/src/gallium/drivers/iris/iris_resource.c b/src/gallium/drivers/iris/iris_resource.c
index 5ee54596347..aea55f64f57 100644
--- a/src/gallium/drivers/iris/iris_resource.c
+++ b/src/gallium/drivers/iris/iris_resource.c
@@ -2254,7 +2254,7 @@ iris_map_tiled_memcpy(struct iris_transfer *map)
    struct iris_resource *res = (struct iris_resource *) xfer->resource;
    struct isl_surf *surf = &res->surf;
 
-   xfer->stride = ALIGN(surf->row_pitch_B, 16);
+   xfer->stride = align(surf->row_pitch_B, 16);
    xfer->layer_stride = xfer->stride * box->height;
 
    unsigned x1, x2, y1, y2;
diff --git a/src/gallium/drivers/iris/iris_screen.c b/src/gallium/drivers/iris/iris_screen.c
index 4ec026dea6f..9f984f39fee 100644
--- a/src/gallium/drivers/iris/iris_screen.c
+++ b/src/gallium/drivers/iris/iris_screen.c
@@ -617,7 +617,7 @@ iris_init_identifier_bo(struct iris_screen *screen)
 
    screen->workaround_address = (struct iris_address) {
       .bo = screen->workaround_bo,
-      .offset = ALIGN(
+      .offset = align(
          intel_debug_write_identifiers(bo_map, 4096, "Iris"), 32),
    };
 
diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c
index eef7df7a609..d8593f8947d 100644
--- a/src/gallium/drivers/iris/iris_state.c
+++ b/src/gallium/drivers/iris/iris_state.c
@@ -9385,7 +9385,7 @@ iris_upload_gpgpu_walker(struct iris_context *ice,
          vfe.URBEntryAllocationSize = 2;
 
          vfe.CURBEAllocationSize =
-            ALIGN(cs_data->push.per_thread.regs * dispatch.threads +
+            align(cs_data->push.per_thread.regs * dispatch.threads +
                   cs_data->push.cross_thread.regs, 2);
       }
    }
@@ -9403,15 +9403,15 @@ iris_upload_gpgpu_walker(struct iris_context *ice,
       uint32_t *curbe_data_map =
          stream_state(batch, ice->state.dynamic_uploader,
                       &ice->state.last_res.cs_thread_ids,
-                      ALIGN(push_const_size, 64), 64,
+                      align(push_const_size, 64), 64,
                       &curbe_data_offset);
       assert(curbe_data_map);
-      memset(curbe_data_map, 0x5a, ALIGN(push_const_size, 64));
+      memset(curbe_data_map, 0x5a, align(push_const_size, 64));
       iris_fill_cs_push_const_buffer(screen, shader, dispatch.threads,
                                      curbe_data_map);
 
       iris_emit_cmd(batch, GENX(MEDIA_CURBE_LOAD), curbe) {
-         curbe.CURBETotalDataLength = ALIGN(push_const_size, 64);
+         curbe.CURBETotalDataLength = align(push_const_size, 64);
          curbe.CURBEDataStartAddress = curbe_data_offset;
       }
    }
diff --git a/src/gallium/drivers/lima/drm-shim/lima_noop.c b/src/gallium/drivers/lima/drm-shim/lima_noop.c
index 2732029075e..d90af2512b9 100644
--- a/src/gallium/drivers/lima/drm-shim/lima_noop.c
+++ b/src/gallium/drivers/lima/drm-shim/lima_noop.c
@@ -63,7 +63,7 @@ lima_ioctl_gem_create(int fd, unsigned long request, void *arg)
 
    struct shim_fd *shim_fd = drm_shim_fd_lookup(fd);
    struct shim_bo *bo = calloc(1, sizeof(*bo));
-   size_t size = ALIGN(create->size, 4096);
+   size_t size = align(create->size, 4096);
 
    drm_shim_bo_init(bo, size);
 
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
index 2701cd9ca6d..766ce046f75 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
@@ -1343,7 +1343,7 @@ out:
    info_out->bin.maxGPR = prog->maxGPR;
    info_out->bin.code = prog->code;
    info_out->bin.codeSize = prog->binSize;
-   info_out->bin.tlsSpace = ALIGN(prog->tlsSize, 0x10);
+   info_out->bin.tlsSpace = align(prog->tlsSize, 0x10);
 
    delete prog;
    nv50_ir::Target::destroy(targ);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.c b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
index eb65544be6f..f45c9984126 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_compute.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
@@ -389,7 +389,7 @@ nv50_compute_validate_surfaces(struct nv50_context *nv50)
          PUSH_DATAh(push, res->address + buffer->buffer_offset);
          PUSH_DATA (push, res->address + buffer->buffer_offset);
          PUSH_DATA (push, 0); /* pitch? */
-         PUSH_DATA (push, ALIGN(buffer->buffer_size, 256) - 1);
+         PUSH_DATA (push, align(buffer->buffer_size, 256) - 1);
          PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
          BCTX_REFN(nv50->bufctx_cp, CP_BUF, res, RDWR);
          util_range_add(&res->base, &res->valid_buffer_range,
@@ -420,7 +420,7 @@ nv50_compute_validate_surfaces(struct nv50_context *nv50)
             PUSH_DATAh(push, address);
             PUSH_DATA (push, address);
             PUSH_DATA (push, 0); /* pitch? */
-            PUSH_DATA (push, ALIGN(view->u.buf.size, 0x100) - 1);
+            PUSH_DATA (push, align(view->u.buf.size, 0x100) - 1);
             PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
          } else {
             struct nv50_miptree *mt = nv50_miptree(view->resource);
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 60e79da6a36..77f4a498f45 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -391,7 +391,7 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir,
                shader->info.nr_param_exports || shader->info.nr_prim_param_exports,
                &out_needs_scratch_ring,
                shader->wave_size,
-               ALIGN(max_workgroup_size, shader->wave_size),
+               align(max_workgroup_size, shader->wave_size),
                false,
                false);
       shader->info.uses_mesh_scratch_ring = out_needs_scratch_ring;
@@ -923,7 +923,7 @@ static void run_late_optimization_and_lowering_passes(struct si_nir_shader_ctx *
 
    if (si_should_clear_lds(sel->screen, nir)) {
       const unsigned chunk_size = 16; /* max single store size */
-      const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
+      const unsigned shared_size = align(nir->info.shared_size, chunk_size);
       NIR_PASS(_, nir, nir_clear_shared_memory, shared_size, chunk_size);
    }
 
diff --git a/src/gallium/drivers/radeonsi/si_shader_binary.c b/src/gallium/drivers/radeonsi/si_shader_binary.c
index 73a044ee820..963b90cb6ba 100644
--- a/src/gallium/drivers/radeonsi/si_shader_binary.c
+++ b/src/gallium/drivers/radeonsi/si_shader_binary.c
@@ -472,7 +472,7 @@ void si_shader_dump_stats_for_shader_db(struct si_screen *screen, struct si_shad
                       "HSPatchOuts: %u ESOutputs: %u GSOutputs: %u VSOutputs: %u PSOutputs: %u "
                       "InlineUniforms: %u DivergentLoop: %u (%s, W%u)",
                       conf->num_sgprs, conf->num_vgprs, si_get_shader_binary_size(screen, shader),
-                      ALIGN(conf->lds_size, ac_shader_get_lds_alloc_granularity(screen->info.gfx_level)),
+                      align(conf->lds_size, ac_shader_get_lds_alloc_granularity(screen->info.gfx_level)),
                       conf->scratch_bytes_per_wave, shader->info.max_simd_waves,
                       conf->spilled_sgprs, conf->spilled_vgprs, shader->info.private_mem_vgprs,
                       num_ls_outputs, num_hs_outputs,
diff --git a/src/gallium/drivers/radeonsi/si_sqtt.c b/src/gallium/drivers/radeonsi/si_sqtt.c
index 13cc8aa441f..a38d1b2630f 100644
--- a/src/gallium/drivers/radeonsi/si_sqtt.c
+++ b/src/gallium/drivers/radeonsi/si_sqtt.c
@@ -769,7 +769,7 @@ si_sqtt_add_code_object(struct si_context *sctx,
       record->shader_data[i].hw_stage = hw_stage;
       record->shader_data[i].is_combined = false;
       record->shader_data[i].scratch_memory_size = shader->config.scratch_bytes_per_wave;
-      record->shader_data[i].lds_size = ALIGN(shader->config.lds_size, ac_shader_get_lds_alloc_granularity(sctx->gfx_level));
+      record->shader_data[i].lds_size = align(shader->config.lds_size, ac_shader_get_lds_alloc_granularity(sctx->gfx_level));
       record->shader_data[i].wavefront_size = shader->wave_size;
 
       record->shader_stages_mask |= 1 << i;
diff --git a/src/gallium/drivers/rocket/rkt_coefs.c b/src/gallium/drivers/rocket/rkt_coefs.c
index 82258e70aaa..07399288118 100644
--- a/src/gallium/drivers/rocket/rkt_coefs.c
+++ b/src/gallium/drivers/rocket/rkt_coefs.c
@@ -31,12 +31,12 @@ rkt_fill_weights(struct rkt_ml_subgraph *subgraph,
 
    input_channels = MAX2(input_channels, FEATURE_ATOMIC_SIZE);
 
-   output_channels = ALIGN(output_channels, 2);
+   output_channels = align(output_channels, 2);
    if (rkt_is_depthwise(poperation))
       output_channels = 1;
 
    weights_size = weights_width * weights_height * output_channels *
-                  ALIGN(input_channels, WEIGHT_ATOMIC_SIZE) * 2;
+                  align(input_channels, WEIGHT_ATOMIC_SIZE) * 2;
 
    rsc =
       pipe_buffer_create(pcontext->screen, 0, PIPE_USAGE_DEFAULT, weights_size);
@@ -62,7 +62,7 @@ rkt_fill_weights(struct rkt_ml_subgraph *subgraph,
                      unsigned oc = oc1 * WEIGHT_ATOMIC_SIZE + oc2;
                      unsigned ic = ic1 * input_channel_groups + ic2;
                      if (output_channels_real > 2 &&
-                         oc >= ALIGN(output_channels_real, 2))
+                         oc >= align(output_channels_real, 2))
                         continue;
 
                      if (oc >= output_channels_real)
diff --git a/src/gallium/drivers/rocket/rkt_ml.c b/src/gallium/drivers/rocket/rkt_ml.c
index ce59c4ed1d4..8da2a589488 100644
--- a/src/gallium/drivers/rocket/rkt_ml.c
+++ b/src/gallium/drivers/rocket/rkt_ml.c
@@ -105,7 +105,7 @@ compile_operation(struct rkt_ml_subgraph *subgraph,
 
       unsigned size =
          util_dynarray_num_elements(&regcfgs[i], uint64_t) * sizeof(uint64_t);
-      regcfg_total_size += ALIGN(size, 64);
+      regcfg_total_size += align(size, 64);
    }
 
    operation->regcmd = pipe_buffer_create(pcontext->screen, 0,
@@ -129,13 +129,13 @@ compile_operation(struct rkt_ml_subgraph *subgraph,
             util_dynarray_element(&regcfgs[i], uint64_t, reg_count - 3);
 
          uint64_t addr = rkt_resource(operation->regcmd)->phys_addr +
-                         regcmd_offset + ALIGN(size * sizeof(uint64_t), 64);
+                         regcmd_offset + align(size * sizeof(uint64_t), 64);
          *next_address_reg |= addr << 16;
 
          unsigned regs_to_fetch =
             util_dynarray_num_elements(&regcfgs[i + 1], uint64_t);
          regs_to_fetch -= 4;
-         regs_to_fetch = ALIGN(regs_to_fetch / 2, 2);
+         regs_to_fetch = align(regs_to_fetch / 2, 2);
          *reg_count_reg |= regs_to_fetch << 16;
       }
 
@@ -151,7 +151,7 @@ compile_operation(struct rkt_ml_subgraph *subgraph,
          rkt_dump_buffer(regcmd, "regcmd", 0, i, regcmd_offset,
                          (size + 4) * sizeof(uint64_t));
 
-      regcmd_offset += ALIGN(size * sizeof(uint64_t), 64);
+      regcmd_offset += align(size * sizeof(uint64_t), 64);
    }
 
    pipe_buffer_unmap(pcontext, transfer);
diff --git a/src/gallium/drivers/rocket/rkt_task.c b/src/gallium/drivers/rocket/rkt_task.c
index ac9afa4e24a..0fa50eb7a4b 100644
--- a/src/gallium/drivers/rocket/rkt_task.c
+++ b/src/gallium/drivers/rocket/rkt_task.c
@@ -101,7 +101,7 @@ fill_task(struct rkt_ml_subgraph *subgraph,
 
    task->input_height = operation->input_height;
    task->input_channels =
-      ALIGN(MAX2(operation->input_channels, FEATURE_ATOMIC_SIZE),
+      align(MAX2(operation->input_channels, FEATURE_ATOMIC_SIZE),
             FEATURE_ATOMIC_SIZE);
    task->input_channels_real = operation->input_channels;
    task->input_zero_point = operation->input_zero_point;
@@ -111,11 +111,11 @@ fill_task(struct rkt_ml_subgraph *subgraph,
    task->output_height = operation->output_height;
 
    task->output_channels_real = operation->output_channels;
-   task->output_channels = ALIGN(MAX2(operation->output_channels, 32), 32);
+   task->output_channels = align(MAX2(operation->output_channels, 32), 32);
    if (operation->depthwise) {
       if (task->output_channels_real <= 32)
          task->output_channels *= 2;
-      task->output_channels = ALIGN(task->output_channels, 64);
+      task->output_channels = align(task->output_channels, 64);
    }
 
    task->output_zero_point = operation->output_zero_point;
@@ -170,7 +170,7 @@ fill_task(struct rkt_ml_subgraph *subgraph,
    if (operation->depthwise)
       task->weights_kernels = 1;
    else
-      task->weights_kernels = ALIGN(operation->output_channels, 2);
+      task->weights_kernels = align(operation->output_channels, 2);
 
    task->surfaces_per_row = task->output_width * task->output_height * 2;
    if (operation->depthwise)
diff --git a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
index b7a7f8d3b86..f5bc94d6c42 100644
--- a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
+++ b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
@@ -201,8 +201,8 @@ virgl_drm_winsys_resource_create_blob(struct virgl_winsys *qws,
       return NULL;
 
    /* Make sure blob is page aligned. */
-   width = ALIGN(width, getpagesize());
-   size = ALIGN(size, getpagesize());
+   width = align(width, getpagesize());
+   size = align(size, getpagesize());
 
    blob_id = p_atomic_inc_return(&qdws->blob_id);
    cmd[0] = VIRGL_CMD0(VIRGL_CCMD_PIPE_RESOURCE_CREATE, 0, VIRGL_PIPE_RES_CREATE_SIZE);
diff --git a/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c
index 5c4bd39eb63..bea4d19e2fd 100644
--- a/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c
+++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c
@@ -311,8 +311,8 @@ virgl_vtest_winsys_resource_create(struct virgl_winsys *vws,
 
    if ((flags & (VIRGL_RESOURCE_FLAG_MAP_PERSISTENT |
                  VIRGL_RESOURCE_FLAG_MAP_COHERENT))) {
-      width = ALIGN(width, getpagesize());
-      size = ALIGN(size, getpagesize());
+      width = align(width, getpagesize());
+      size = align(size, getpagesize());
       new_handle = virgl_vtest_winsys_resource_create_blob(vws, target, format, bind,
                                                            width, height, depth,
                                                            array_size, last_level, nr_samples,
diff --git a/src/imagination/vulkan/pvr_image.c b/src/imagination/vulkan/pvr_image.c
index 4065d97e018..5d88a09c60d 100644
--- a/src/imagination/vulkan/pvr_image.c
+++ b/src/imagination/vulkan/pvr_image.c
@@ -89,7 +89,7 @@ static void pvr_image_init_physical_extent(struct pvr_image *image)
       if (image->vk.usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
                              VK_IMAGE_USAGE_TRANSFER_DST_BIT)) {
          image->physical_extent.width =
-            ALIGN(image->physical_extent.width,
+            align(image->physical_extent.width,
                   ROGUE_PBESTATE_REG_WORD0_LINESTRIDE_UNIT_SIZE);
       }
    }
@@ -110,11 +110,11 @@ static void pvr_image_setup_mip_levels(struct pvr_image *image)
    for (uint32_t i = 0; i < image->vk.mip_levels; i++) {
       struct pvr_mip_level *mip_level = &image->mip_levels[i];
 
-      mip_level->pitch = cpp * ALIGN(extent.width, extent_alignment);
-      mip_level->height_pitch = ALIGN(extent.height, extent_alignment);
+      mip_level->pitch = cpp * align(extent.width, extent_alignment);
+      mip_level->height_pitch = align(extent.height, extent_alignment);
       mip_level->size = image->vk.samples * mip_level->pitch *
                         mip_level->height_pitch *
-                        ALIGN(extent.depth, extent_alignment);
+                        align(extent.depth, extent_alignment);
       mip_level->offset = image->layer_size;
 
       image->layer_size += mip_level->size;
@@ -129,11 +129,11 @@ static void pvr_image_setup_mip_levels(struct pvr_image *image)
        * were present so we need to account for that in the `layer_size`.
        */
       while (extent.height != 1 || extent.width != 1 || extent.depth != 1) {
-         const uint32_t height_pitch = ALIGN(extent.height, extent_alignment);
-         const uint32_t pitch = cpp * ALIGN(extent.width, extent_alignment);
+         const uint32_t height_pitch = align(extent.height, extent_alignment);
+         const uint32_t pitch = cpp * align(extent.width, extent_alignment);
 
          image->layer_size += image->vk.samples * pitch * height_pitch *
-                              ALIGN(extent.depth, extent_alignment);
+                              align(extent.depth, extent_alignment);
 
          extent.height = u_minify(extent.height, 1);
          extent.width = u_minify(extent.width, 1);
diff --git a/src/imagination/vulkan/pvr_robustness.c b/src/imagination/vulkan/pvr_robustness.c
index 88b0481725d..edfd66217ce 100644
--- a/src/imagination/vulkan/pvr_robustness.c
+++ b/src/imagination/vulkan/pvr_robustness.c
@@ -61,7 +61,7 @@ VkResult pvr_init_robustness_buffer(struct pvr_device *device)
    VkResult result;
 
 #define ROBUSTNESS_BUFFER_OFFSET_ALIGN16(cur_offset, add) \
-   ((uint16_t)ALIGN((cur_offset + (uint16_t)(add)), 16))
+   ((uint16_t)align((cur_offset + (uint16_t)(add)), 16))
 
    robustness_buffer_offsets[PVR_ROBUSTNESS_BUFFER_FORMAT_UINT64] = offset;
    offset = ROBUSTNESS_BUFFER_OFFSET_ALIGN16(offset, sizeof(uint64_t) * 4);
diff --git a/src/intel/blorp/blorp.c b/src/intel/blorp/blorp.c
index b0913a1a3ad..4c43d7cde35 100644
--- a/src/intel/blorp/blorp.c
+++ b/src/intel/blorp/blorp.c
@@ -314,8 +314,8 @@ blorp_hiz_op(struct blorp_batch *batch, struct blorp_surf *surf,
                            params.depth.view.base_level);
       params.y1 = u_minify(params.depth.surf.logical_level0_px.height,
                            params.depth.view.base_level);
-      params.x1 = ALIGN(params.x1, 8);
-      params.y1 = ALIGN(params.y1, 4);
+      params.x1 = align(params.x1, 8);
+      params.y1 = align(params.y1, 4);
 
       if (params.depth.view.base_level == 0) {
          /* TODO: What about MSAA? */
diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
index a3daf620d02..df9a317a447 100644
--- a/src/intel/blorp/blorp_blit.c
+++ b/src/intel/blorp/blorp_blit.c
@@ -1731,9 +1731,9 @@ blorp_surf_retile_w_to_y(const struct isl_device *isl_dev,
    const unsigned x_align = 8, y_align = info->surf.samples != 0 ? 8 : 4;
    info->surf.tiling = ISL_TILING_Y0;
    info->surf.logical_level0_px.width =
-      ALIGN(info->surf.logical_level0_px.width, x_align) * 2;
+      align(info->surf.logical_level0_px.width, x_align) * 2;
    info->surf.logical_level0_px.height =
-      ALIGN(info->surf.logical_level0_px.height, y_align) / 2;
+      align(info->surf.logical_level0_px.height, y_align) / 2;
    info->tile_x_sa *= 2;
    info->tile_y_sa /= 2;
 }
@@ -1984,8 +1984,8 @@ try_blorp_blit(struct blorp_batch *batch,
          isl_get_interleaved_msaa_px_size_sa(params->dst.surf.samples);
       params->x0 = ROUND_DOWN_TO(params->x0, 2) * px_size_sa.width;
       params->y0 = ROUND_DOWN_TO(params->y0, 2) * px_size_sa.height;
-      params->x1 = ALIGN(params->x1, 2) * px_size_sa.width;
-      params->y1 = ALIGN(params->y1, 2) * px_size_sa.height;
+      params->x1 = align(params->x1, 2) * px_size_sa.width;
+      params->y1 = align(params->y1, 2) * px_size_sa.height;
 
       blorp_surf_fake_interleaved_msaa(batch->blorp->isl_dev, &params->dst);
 
@@ -2045,8 +2045,8 @@ try_blorp_blit(struct blorp_batch *batch,
       const unsigned y_align = params->dst.surf.samples != 0 ? 8 : 4;
       params->x0 = ROUND_DOWN_TO(params->x0, x_align) * 2;
       params->y0 = ROUND_DOWN_TO(params->y0, y_align) / 2;
-      params->x1 = ALIGN(params->x1, x_align) * 2;
-      params->y1 = ALIGN(params->y1, y_align) / 2;
+      params->x1 = align(params->x1, x_align) * 2;
+      params->y1 = align(params->y1, y_align) / 2;
 
       /* Retile the surface to Y-tiled */
       blorp_surf_retile_w_to_y(batch->blorp->isl_dev, &params->dst);
diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c
index 845ebfb42e1..dd4d6e7af21 100644
--- a/src/intel/blorp/blorp_clear.c
+++ b/src/intel/blorp/blorp_clear.c
@@ -387,8 +387,8 @@ get_fast_clear_rect(const struct isl_device *dev,
 
    *x0 = ROUND_DOWN_TO(*x0,  x_align) / x_scaledown;
    *y0 = ROUND_DOWN_TO(*y0, y_align) / y_scaledown;
-   *x1 = ALIGN(*x1, x_align) / x_scaledown;
-   *y1 = ALIGN(*y1, y_align) / y_scaledown;
+   *x1 = align(*x1, x_align) / x_scaledown;
+   *y1 = align(*y1, y_align) / y_scaledown;
 }
 
 static void
@@ -541,7 +541,7 @@ blorp_fast_clear(struct blorp_batch *batch,
          assert(surf->surf->levels > 1 ||
                 surf->surf->logical_level0_px.d > 1 ||
                 surf->surf->logical_level0_px.a > 1);
-         const int phys_height0 = ALIGN(surf->surf->logical_level0_px.h,
+         const int phys_height0 = align(surf->surf->logical_level0_px.h,
                                         surf->surf->image_alignment_el.h);
          unaligned_height = phys_height0 % 32;
          size_B = (int64_t)surf->surf->row_pitch_B * (phys_height0 - unaligned_height);
@@ -583,7 +583,7 @@ blorp_fast_clear(struct blorp_batch *batch,
                                  mem_surf.addr.offset, size_B, ISL_TILING_4);
                assert(isl_surf.logical_level0_px.h == 32);
                assert(isl_surf.logical_level0_px.a == 1);
-               isl_surf.row_pitch_B = ALIGN(isl_surf.row_pitch_B, 16 * 128);
+               isl_surf.row_pitch_B = align(isl_surf.row_pitch_B, 16 * 128);
             } else {
                isl_surf_from_mem(batch->blorp->isl_dev, &isl_surf,
                                  mem_surf.addr.offset, size_B, ISL_TILING_64);
@@ -1341,8 +1341,8 @@ blorp_ccs_resolve(struct blorp_batch *batch,
          x_scaledown = aux_fmtl->bw / 2;
          y_scaledown = aux_fmtl->bh / 2;
       }
-      params.x1 = ALIGN(params.x1, x_scaledown) / x_scaledown;
-      params.y1 = ALIGN(params.y1, y_scaledown) / y_scaledown;
+      params.x1 = align(params.x1, x_scaledown) / x_scaledown;
+      params.y1 = align(params.y1, y_scaledown) / y_scaledown;
    }
 
    if (batch->blorp->isl_dev->info->ver >= 10) {
diff --git a/src/intel/blorp/blorp_genX_exec_brw.h b/src/intel/blorp/blorp_genX_exec_brw.h
index aa959f9cca7..70b1fc4c6e5 100644
--- a/src/intel/blorp/blorp_genX_exec_brw.h
+++ b/src/intel/blorp/blorp_genX_exec_brw.h
@@ -1656,7 +1656,7 @@ blorp_get_compute_push_const(struct blorp_batch *batch,
 {
    const struct brw_cs_prog_data *cs_prog_data = params->cs_prog_data;
    const unsigned push_const_size =
-      ALIGN(brw_cs_push_const_total_size(cs_prog_data, threads), 64);
+      align(brw_cs_push_const_total_size(cs_prog_data, threads), 64);
    assert(cs_prog_data->push.cross_thread.size +
           cs_prog_data->push.per_thread.size == sizeof(params->wm_inputs));
 
@@ -1860,7 +1860,7 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
       vfe.URBEntryAllocationSize = 2;
 
       const uint32_t vfe_curbe_allocation =
-         ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads +
+         align(cs_prog_data->push.per_thread.regs * dispatch.threads +
                cs_prog_data->push.cross_thread.regs, 2);
       vfe.CURBEAllocationSize = vfe_curbe_allocation;
    }
diff --git a/src/intel/blorp/blorp_genX_exec_elk.h b/src/intel/blorp/blorp_genX_exec_elk.h
index 7780671f920..3cfd9a9ebe2 100644
--- a/src/intel/blorp/blorp_genX_exec_elk.h
+++ b/src/intel/blorp/blorp_genX_exec_elk.h
@@ -1882,7 +1882,7 @@ blorp_get_compute_push_const(struct blorp_batch *batch,
 {
    const struct elk_cs_prog_data *cs_prog_data = params->cs_prog_data;
    const unsigned push_const_size =
-      ALIGN(elk_cs_push_const_total_size(cs_prog_data, threads), 64);
+      align(elk_cs_push_const_total_size(cs_prog_data, threads), 64);
    assert(cs_prog_data->push.cross_thread.size +
           cs_prog_data->push.per_thread.size == sizeof(params->wm_inputs));
 
@@ -1981,7 +1981,7 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
       vfe.URBEntryAllocationSize = GFX_VER >= 8 ? 2 : 0;
 
       const uint32_t vfe_curbe_allocation =
-         ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads +
+         align(cs_prog_data->push.per_thread.regs * dispatch.threads +
                cs_prog_data->push.cross_thread.regs, 2);
       vfe.CURBEAllocationSize = vfe_curbe_allocation;
    }
diff --git a/src/intel/common/intel_urb_config.c b/src/intel/common/intel_urb_config.c
index 5b40d12516d..82ce5aa47de 100644
--- a/src/intel/common/intel_urb_config.c
+++ b/src/intel/common/intel_urb_config.c
@@ -135,7 +135,7 @@ intel_get_urb_config(const struct intel_device_info *devinfo,
     * Round them all up.
     */
    for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
-      min_entries[i] = ALIGN(min_entries[i], granularity[i]);
+      min_entries[i] = align(min_entries[i], granularity[i]);
    }
 
    unsigned entry_size_bytes[4];
@@ -323,7 +323,7 @@ intel_get_mesh_urb_config(const struct intel_device_info *devinfo,
     *       in slices beyond Slice0) of the MESH URB allocation, specified in
     *       multiples of 8 KB.
     */
-   push_constant_kb = ALIGN(push_constant_kb, 8);
+   push_constant_kb = align(push_constant_kb, 8);
    total_urb_kb -= push_constant_kb;
    const unsigned total_urb_avail_mesh_task_kb = total_urb_kb;
 
@@ -360,9 +360,9 @@ intel_get_mesh_urb_config(const struct intel_device_info *devinfo,
     */
    const unsigned min_mesh_entries = urb_cfg->size[MESA_SHADER_MESH] < 9 ? 8 : 1;
    const unsigned min_task_entries = urb_cfg->size[MESA_SHADER_TASK] < 9 ? 8 : 1;
-   const unsigned min_mesh_urb_kb = ALIGN(urb_cfg->size[MESA_SHADER_MESH] *
+   const unsigned min_mesh_urb_kb = align(urb_cfg->size[MESA_SHADER_MESH] *
                                           min_mesh_entries * 64, 1024) / 1024;
-   const unsigned min_task_urb_kb = ALIGN(urb_cfg->size[MESA_SHADER_TASK] *
+   const unsigned min_task_urb_kb = align(urb_cfg->size[MESA_SHADER_TASK] *
                                           min_task_entries * 64, 1024) / 1024;
 
    total_urb_kb -= (min_mesh_urb_kb + min_task_urb_kb);
@@ -380,8 +380,8 @@ intel_get_mesh_urb_config(const struct intel_device_info *devinfo,
     *       in slices beyond Slice0) of the TASK URB allocation, specified in
     *       multiples of 8 KB.
     */
-   if ((total_urb_avail_mesh_task_kb - ALIGN(mesh_urb_kb, 8)) >= min_task_entries) {
-      mesh_urb_kb = ALIGN(mesh_urb_kb, 8);
+   if ((total_urb_avail_mesh_task_kb - align(mesh_urb_kb, 8)) >= min_task_entries) {
+      mesh_urb_kb = align(mesh_urb_kb, 8);
    } else {
       mesh_urb_kb = ROUND_DOWN_TO(mesh_urb_kb, 8);
    }
diff --git a/src/intel/compiler/brw/brw_compile_fs.cpp b/src/intel/compiler/brw/brw_compile_fs.cpp
index bcb7cfc4aa6..205a8af2024 100644
--- a/src/intel/compiler/brw/brw_compile_fs.cpp
+++ b/src/intel/compiler/brw/brw_compile_fs.cpp
@@ -1219,7 +1219,7 @@ remap_attr_reg(brw_shader &s,
    const bool per_prim = src.nr < prog_data->num_per_primitive_inputs;
    const unsigned base = urb_start +
       (per_prim ? 0 :
-       ALIGN(prog_data->num_per_primitive_inputs / 2,
+       align(prog_data->num_per_primitive_inputs / 2,
              reg_unit(s.devinfo)) * s.max_polygons);
    const unsigned idx = per_prim ? src.nr :
       src.nr - prog_data->num_per_primitive_inputs;
diff --git a/src/intel/compiler/brw/brw_compile_gs.cpp b/src/intel/compiler/brw/brw_compile_gs.cpp
index a24a9767a49..e9da41e651a 100644
--- a/src/intel/compiler/brw/brw_compile_gs.cpp
+++ b/src/intel/compiler/brw/brw_compile_gs.cpp
@@ -223,7 +223,7 @@ brw_compile_gs(const struct brw_compiler *compiler,
 
    /* 1 HWORD = 32 bytes = 256 bits */
    prog_data->control_data_header_size_hwords =
-      ALIGN(control_data_header_size_bits, 256) / 256;
+      align(control_data_header_size_bits, 256) / 256;
 
    /* Compute the output vertex size.
     *
@@ -276,7 +276,7 @@ brw_compile_gs(const struct brw_compiler *compiler,
    unsigned output_vertex_size_bytes = prog_data->base.vue_map.num_slots * 16;
    assert(output_vertex_size_bytes <= GFX7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES);
    prog_data->output_vertex_size_hwords =
-      ALIGN(output_vertex_size_bytes, 32) / 32;
+      align(output_vertex_size_bytes, 32) / 32;
 
    /* Compute URB entry size.  The maximum allowed URB entry size is 32k.
     * That divides up as follows:
@@ -330,7 +330,7 @@ brw_compile_gs(const struct brw_compiler *compiler,
 
 
    /* URB entry sizes are stored as a multiple of 64 bytes in gfx7+. */
-   prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
+   prog_data->base.urb_entry_size = align(output_size_bytes, 64) / 64;
 
    assert(nir->info.gs.output_primitive < ARRAY_SIZE(gl_prim_to_hw_prim));
    prog_data->output_topology =
diff --git a/src/intel/compiler/brw/brw_compile_mesh.cpp b/src/intel/compiler/brw/brw_compile_mesh.cpp
index 3d07da82ad3..ead45643807 100644
--- a/src/intel/compiler/brw/brw_compile_mesh.cpp
+++ b/src/intel/compiler/brw/brw_compile_mesh.cpp
@@ -124,7 +124,7 @@ brw_nir_lower_tue_outputs(nir_shader *nir, brw_tue_map *map)
    NIR_PASS(_, nir, nir_lower_explicit_io,
             nir_var_mem_task_payload, nir_address_format_32bit_offset);
 
-   map->size_dw = ALIGN(DIV_ROUND_UP(nir->info.task_payload_size, 4), 8);
+   map->size_dw = align(DIV_ROUND_UP(nir->info.task_payload_size, 4), 8);
 }
 
 static void
@@ -203,7 +203,7 @@ brw_nir_align_launch_mesh_workgroups_instr(nir_builder *b,
    /* This will avoid special case in nir_lower_task_shader dealing with
     * not vec4-aligned payload when payload_in_shared workaround is enabled.
     */
-   nir_intrinsic_set_range(intrin, ALIGN(range, 16));
+   nir_intrinsic_set_range(intrin, align(range, 16));
 
    return true;
 }
@@ -505,7 +505,7 @@ brw_nir_lower_tue_inputs(nir_shader *nir, const brw_tue_map *map)
       /* The types for Task Output and Mesh Input should match, so their sizes
        * should also match.
        */
-      assert(!map || map->size_dw == ALIGN(DIV_ROUND_UP(nir->info.task_payload_size, 4), 8));
+      assert(!map || map->size_dw == align(DIV_ROUND_UP(nir->info.task_payload_size, 4), 8));
    } else {
       /* Mesh doesn't read any input, to make it clearer set the
        * task_payload_size to zero instead of keeping an incomplete size that
diff --git a/src/intel/compiler/brw/brw_compile_tcs.cpp b/src/intel/compiler/brw/brw_compile_tcs.cpp
index 64a6e29fab2..0c6099cd07b 100644
--- a/src/intel/compiler/brw/brw_compile_tcs.cpp
+++ b/src/intel/compiler/brw/brw_compile_tcs.cpp
@@ -263,7 +263,7 @@ brw_compile_tcs(const struct brw_compiler *compiler,
       return NULL;
 
    /* URB entry sizes are stored as a multiple of 64 bytes. */
-   vue_prog_data->urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
+   vue_prog_data->urb_entry_size = align(output_size_bytes, 64) / 64;
 
    /* HS does not use the usual payload pushing from URB to GRFs,
     * because we don't have enough registers for a full-size payload, and
diff --git a/src/intel/compiler/brw/brw_compile_tes.cpp b/src/intel/compiler/brw/brw_compile_tes.cpp
index 2f8b9d93be3..40e0f7d4da5 100644
--- a/src/intel/compiler/brw/brw_compile_tes.cpp
+++ b/src/intel/compiler/brw/brw_compile_tes.cpp
@@ -133,7 +133,7 @@ brw_compile_tes(const struct brw_compiler *compiler,
       BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
 
    /* URB entry sizes are stored as a multiple of 64 bytes. */
-   prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
+   prog_data->base.urb_entry_size = align(output_size_bytes, 64) / 64;
 
    prog_data->base.urb_read_length = 0;
 
diff --git a/src/intel/compiler/brw/brw_eu_emit.c b/src/intel/compiler/brw/brw_eu_emit.c
index 66762b61e35..c730f61471c 100644
--- a/src/intel/compiler/brw/brw_eu_emit.c
+++ b/src/intel/compiler/brw/brw_eu_emit.c
@@ -407,7 +407,7 @@ brw_append_insns(struct brw_codegen *p, unsigned nr_insn, unsigned alignment)
    assert(util_is_power_of_two_or_zero(sizeof(brw_eu_inst)));
    assert(util_is_power_of_two_or_zero(alignment));
    const unsigned align_insn = MAX2(alignment / sizeof(brw_eu_inst), 1);
-   const unsigned start_insn = ALIGN(p->nr_insn, align_insn);
+   const unsigned start_insn = align(p->nr_insn, align_insn);
    const unsigned new_nr_insn = start_insn + nr_insn;
 
    if (p->store_size < new_nr_insn) {
diff --git a/src/intel/compiler/brw/brw_from_nir.cpp b/src/intel/compiler/brw/brw_from_nir.cpp
index 615f150705e..aa66a1dfe13 100644
--- a/src/intel/compiler/brw/brw_from_nir.cpp
+++ b/src/intel/compiler/brw/brw_from_nir.cpp
@@ -7295,7 +7295,7 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
             components = last_component - first_read_component + 1;
          }
 
-         total = ALIGN(components, REG_SIZE * reg_unit(devinfo) / 4);
+         total = align(components, REG_SIZE * reg_unit(devinfo) / 4);
          dest = ubld.vgrf(BRW_TYPE_UD, total);
       } else {
          total = components * bld.dispatch_width();
@@ -7902,7 +7902,7 @@ brw_from_nir(brw_shader *s)
     */
    brw_from_nir_setup_outputs(ntb);
    brw_from_nir_emit_system_values(ntb);
-   ntb.s.last_scratch = ALIGN(ntb.nir->scratch_size, 4) * ntb.s.dispatch_width;
+   ntb.s.last_scratch = align(ntb.nir->scratch_size, 4) * ntb.s.dispatch_width;
 
    brw_from_nir_emit_impl(ntb, nir_shader_get_entrypoint((nir_shader *)ntb.nir));
 
diff --git a/src/intel/compiler/brw/brw_inst.h b/src/intel/compiler/brw/brw_inst.h
index f439a2100f9..f867120073d 100644
--- a/src/intel/compiler/brw/brw_inst.h
+++ b/src/intel/compiler/brw/brw_inst.h
@@ -527,7 +527,7 @@ brw_flag_mask(const brw_inst *inst, unsigned width)
    assert(util_is_power_of_two_nonzero(width));
    const unsigned start = (inst->flag_subreg * 16 + inst->group) &
                           ~(width - 1);
-   const unsigned end = start + ALIGN(inst->exec_size, width);
+   const unsigned end = start + align(inst->exec_size, width);
    return ((1 << DIV_ROUND_UP(end, 8)) - 1) & ~((1 << (start / 8)) - 1);
 }
 
diff --git a/src/intel/compiler/brw/brw_lower.cpp b/src/intel/compiler/brw/brw_lower.cpp
index 7bc7b7e58cc..7214b9658e1 100644
--- a/src/intel/compiler/brw/brw_lower.cpp
+++ b/src/intel/compiler/brw/brw_lower.cpp
@@ -467,7 +467,7 @@ brw_lower_find_live_channel(brw_shader &s)
           * specified quarter control as result.
           */
          if (inst->group > 0)
-            ubld.SHR(mask, mask, brw_imm_ud(ALIGN(inst->group, 8)));
+            ubld.SHR(mask, mask, brw_imm_ud(align(inst->group, 8)));
 
          ubld.AND(mask, exec_mask, mask);
          exec_mask = mask;
diff --git a/src/intel/compiler/brw/brw_nir_analyze_ubo_ranges.c b/src/intel/compiler/brw/brw_nir_analyze_ubo_ranges.c
index f666fd0c399..7adb7f1f03a 100644
--- a/src/intel/compiler/brw/brw_nir_analyze_ubo_ranges.c
+++ b/src/intel/compiler/brw/brw_nir_analyze_ubo_ranges.c
@@ -150,7 +150,7 @@ analyze_ubos_block(struct ubo_analysis_state *state, nir_block *block)
             nir_def_last_component_read(&intrin->def) + 1;
          const int bytes = num_components * (intrin->def.bit_size / 8);
          const int start = ROUND_DOWN_TO(byte_offset, sizeof_GRF);
-         const int end = ALIGN(byte_offset + bytes, sizeof_GRF);
+         const int end = align(byte_offset + bytes, sizeof_GRF);
          const int chunks = (end - start) / sizeof_GRF;
 
          /* TODO: should we count uses in loops as higher benefit? */
diff --git a/src/intel/compiler/brw/brw_opt_combine_constants.cpp b/src/intel/compiler/brw/brw_opt_combine_constants.cpp
index 29a4e9a9105..0bc5f38c8a0 100644
--- a/src/intel/compiler/brw/brw_opt_combine_constants.cpp
+++ b/src/intel/compiler/brw/brw_opt_combine_constants.cpp
@@ -1574,7 +1574,7 @@ brw_opt_combine_constants(brw_shader &s)
        * seem to have additional alignment requirements, so account for that
        * too.
        */
-      assert(reg.offset == ALIGN(reg.offset, get_alignment_for_imm(imm)));
+      assert(reg.offset == align(reg.offset, get_alignment_for_imm(imm)));
 
       struct brw_reg imm_reg = build_imm_reg_for_copy(imm);
 
diff --git a/src/intel/compiler/brw/brw_reg_allocate.cpp b/src/intel/compiler/brw/brw_reg_allocate.cpp
index 6d3526692e8..1929fd10b73 100644
--- a/src/intel/compiler/brw/brw_reg_allocate.cpp
+++ b/src/intel/compiler/brw/brw_reg_allocate.cpp
@@ -52,7 +52,7 @@ brw_assign_regs_trivial(brw_shader &s)
    int reg_width = s.dispatch_width / 8;
 
    /* Note that compressed instructions require alignment to 2 registers. */
-   hw_reg_mapping[0] = ALIGN(s.first_non_payload_grf, reg_width);
+   hw_reg_mapping[0] = align(s.first_non_payload_grf, reg_width);
    for (i = 1; i <= s.alloc.count; i++) {
       hw_reg_mapping[i] = (hw_reg_mapping[i - 1] +
                            DIV_ROUND_UP(s.alloc.sizes[i - 1],
@@ -264,7 +264,7 @@ public:
        * for reg_width == 2.
        */
       int reg_width = fs->dispatch_width / 8;
-      payload_node_count = ALIGN(fs->first_non_payload_grf, reg_width);
+      payload_node_count = align(fs->first_non_payload_grf, reg_width);
 
       /* Get payload IP information */
       payload_last_use_ip = ralloc_array(mem_ctx, int, payload_node_count);
@@ -1180,7 +1180,7 @@ brw_reg_alloc::choose_spill_reg()
 brw_reg
 brw_reg_alloc::alloc_spill_reg(unsigned size, int ip)
 {
-   int vgrf = brw_allocate_vgrf_units(*fs, ALIGN(size, reg_unit(devinfo))).nr;
+   int vgrf = brw_allocate_vgrf_units(*fs, align(size, reg_unit(devinfo))).nr;
    int class_idx = DIV_ROUND_UP(size, reg_unit(devinfo)) - 1;
    int n = ra_add_node(g, compiler->reg_set.classes[class_idx]);
    assert(n == first_vgrf_node + vgrf);
@@ -1216,7 +1216,7 @@ brw_reg_alloc::spill_reg(unsigned spill_reg)
 {
    int size = fs->alloc.sizes[spill_reg];
    unsigned int spill_offset = fs->last_scratch;
-   assert(ALIGN(spill_offset, 16) == spill_offset); /* oword read/write req. */
+   assert(align(spill_offset, 16) == spill_offset); /* oword read/write req. */
 
    fs->spilled_any_registers = true;
 
diff --git a/src/intel/compiler/brw/brw_rt.h b/src/intel/compiler/brw/brw_rt.h
index f71273efb53..ca09b6f55db 100644
--- a/src/intel/compiler/brw/brw_rt.h
+++ b/src/intel/compiler/brw/brw_rt.h
@@ -233,7 +233,7 @@ brw_rt_compute_scratch_layout(struct brw_rt_scratch_layout *layout,
     */
    assert(size % 64 == 0);
    layout->sw_stack_start = size;
-   layout->sw_stack_size = ALIGN(sw_stack_size, 64);
+   layout->sw_stack_size = align(sw_stack_size, 64);
 
    /* Currently it's always the case that sw_stack_size is a power of
     * two, but power-of-two SW stack sizes are prone to causing
diff --git a/src/intel/compiler/elk/elk_eu_emit.c b/src/intel/compiler/elk/elk_eu_emit.c
index e851aa07777..ba61ba0171f 100644
--- a/src/intel/compiler/elk/elk_eu_emit.c
+++ b/src/intel/compiler/elk/elk_eu_emit.c
@@ -570,7 +570,7 @@ elk_append_insns(struct elk_codegen *p, unsigned nr_insn, unsigned alignment)
    assert(util_is_power_of_two_or_zero(sizeof(elk_inst)));
    assert(util_is_power_of_two_or_zero(alignment));
    const unsigned align_insn = MAX2(alignment / sizeof(elk_inst), 1);
-   const unsigned start_insn = ALIGN(p->nr_insn, align_insn);
+   const unsigned start_insn = align(p->nr_insn, align_insn);
    const unsigned new_nr_insn = start_insn + nr_insn;
 
    if (p->store_size < new_nr_insn) {
diff --git a/src/intel/compiler/elk/elk_fs.cpp b/src/intel/compiler/elk/elk_fs.cpp
index 948ca304550..9ad48310716 100644
--- a/src/intel/compiler/elk/elk_fs.cpp
+++ b/src/intel/compiler/elk/elk_fs.cpp
@@ -945,7 +945,7 @@ namespace {
       assert(util_is_power_of_two_nonzero(width));
       const unsigned start = (inst->flag_subreg * 16 + inst->group) &
                              ~(width - 1);
-      const unsigned end = start + ALIGN(inst->exec_size, width);
+      const unsigned end = start + align(inst->exec_size, width);
       return ((1 << DIV_ROUND_UP(end, 8)) - 1) & ~((1 << (start / 8)) - 1);
    }
 
@@ -1525,7 +1525,7 @@ elk_fs_visitor::assign_urb_setup()
             const bool per_prim = inst->src[i].nr < prog_data->num_per_primitive_inputs;
             const unsigned base = urb_start +
                (per_prim ? 0 :
-                ALIGN(prog_data->num_per_primitive_inputs / 2,
+                align(prog_data->num_per_primitive_inputs / 2,
                       reg_unit(devinfo)));
             const unsigned idx = per_prim ? inst->src[i].nr :
                inst->src[i].nr - prog_data->num_per_primitive_inputs;
@@ -5296,7 +5296,7 @@ elk_fs_visitor::lower_find_live_channel()
           * specified quarter control as result.
           */
          if (inst->group > 0)
-            ubld.SHR(mask, mask, elk_imm_ud(ALIGN(inst->group, 8)));
+            ubld.SHR(mask, mask, elk_imm_ud(align(inst->group, 8)));
 
          ubld.AND(mask, exec_mask, mask);
          exec_mask = mask;
@@ -6003,7 +6003,7 @@ elk_fs_visitor::allocate_registers(bool allow_spilling)
              * field documentation, platforms prior to Haswell measure scratch
              * size linearly with a range of [1kB, 12kB] and 1kB granularity.
              */
-            prog_data->total_scratch = ALIGN(last_scratch, 1024);
+            prog_data->total_scratch = align(last_scratch, 1024);
             max_scratch_size = 12 * 1024;
          }
       }
@@ -6666,7 +6666,7 @@ elk_nir_populate_wm_prog_data(nir_shader *shader,
 static inline int
 elk_register_blocks(int reg_count)
 {
-   return ALIGN(reg_count, 16) / 16 - 1;
+   return align(reg_count, 16) / 16 - 1;
 }
 
 const unsigned *
diff --git a/src/intel/compiler/elk/elk_fs_combine_constants.cpp b/src/intel/compiler/elk/elk_fs_combine_constants.cpp
index 76873fa35dd..c5a34f522a3 100644
--- a/src/intel/compiler/elk/elk_fs_combine_constants.cpp
+++ b/src/intel/compiler/elk/elk_fs_combine_constants.cpp
@@ -1461,7 +1461,7 @@ elk_fs_visitor::opt_combine_constants()
           * instructions seem to have additional alignment requirements, so
           * account for that too.
           */
-         reg.offset = ALIGN(reg.offset, get_alignment_for_imm(imm));
+         reg.offset = align(reg.offset, get_alignment_for_imm(imm));
 
          /* Ensure we have enough space in the register to copy the immediate */
          if (reg.offset + imm->size > REG_SIZE) {
@@ -1555,7 +1555,7 @@ elk_fs_visitor::opt_combine_constants()
        * seem to have additional alignment requirements, so account for that
        * too.
        */
-      assert(reg.offset == ALIGN(reg.offset, get_alignment_for_imm(imm)));
+      assert(reg.offset == align(reg.offset, get_alignment_for_imm(imm)));
 
       struct elk_reg imm_reg = build_imm_reg_for_copy(imm);
 
diff --git a/src/intel/compiler/elk/elk_fs_nir.cpp b/src/intel/compiler/elk/elk_fs_nir.cpp
index bdbee3d362d..8fb9121db1c 100644
--- a/src/intel/compiler/elk/elk_fs_nir.cpp
+++ b/src/intel/compiler/elk/elk_fs_nir.cpp
@@ -5036,7 +5036,7 @@ fs_nir_emit_intrinsic(nir_to_elk_state &ntb,
             }
 
             const unsigned total_dwords =
-               ALIGN(instr->num_components, REG_SIZE * reg_unit(devinfo) / 4);
+               align(instr->num_components, REG_SIZE * reg_unit(devinfo) / 4);
             unsigned loaded_dwords = 0;
 
             const elk_fs_reg packed_consts =
@@ -5225,7 +5225,7 @@ fs_nir_emit_intrinsic(nir_to_elk_state &ntb,
       break;
 
    case nir_intrinsic_load_global_constant_uniform_block_intel: {
-      const unsigned total_dwords = ALIGN(instr->num_components,
+      const unsigned total_dwords = align(instr->num_components,
                                           REG_SIZE * reg_unit(devinfo) / 4);
       unsigned loaded_dwords = 0;
 
@@ -5360,7 +5360,7 @@ fs_nir_emit_intrinsic(nir_to_elk_state &ntb,
          srcs[SURFACE_LOGICAL_SRC_SURFACE] = elk_fs_reg(elk_imm_ud(GFX7_BTI_SLM));
       }
 
-      const unsigned total_dwords = ALIGN(instr->num_components,
+      const unsigned total_dwords = align(instr->num_components,
                                           REG_SIZE * reg_unit(devinfo) / 4);
       unsigned loaded_dwords = 0;
 
@@ -7002,7 +7002,7 @@ nir_to_elk(elk_fs_visitor *s)
    fs_nir_setup_outputs(ntb);
    fs_nir_setup_uniforms(ntb.s);
    fs_nir_emit_system_values(ntb);
-   ntb.s.last_scratch = ALIGN(ntb.nir->scratch_size, 4) * ntb.s.dispatch_width;
+   ntb.s.last_scratch = align(ntb.nir->scratch_size, 4) * ntb.s.dispatch_width;
 
    fs_nir_emit_impl(ntb, nir_shader_get_entrypoint((nir_shader *)ntb.nir));
 
diff --git a/src/intel/compiler/elk/elk_fs_reg_allocate.cpp b/src/intel/compiler/elk/elk_fs_reg_allocate.cpp
index f2beac2195e..73422e0e849 100644
--- a/src/intel/compiler/elk/elk_fs_reg_allocate.cpp
+++ b/src/intel/compiler/elk/elk_fs_reg_allocate.cpp
@@ -52,7 +52,7 @@ elk_fs_visitor::assign_regs_trivial()
    int reg_width = dispatch_width / 8;
 
    /* Note that compressed instructions require alignment to 2 registers. */
-   hw_reg_mapping[0] = ALIGN(this->first_non_payload_grf, reg_width);
+   hw_reg_mapping[0] = align(this->first_non_payload_grf, reg_width);
    for (i = 1; i <= this->alloc.count; i++) {
       hw_reg_mapping[i] = (hw_reg_mapping[i - 1] +
                            DIV_ROUND_UP(this->alloc.sizes[i - 1],
@@ -325,7 +325,7 @@ public:
        */
       int reg_width = fs->dispatch_width / 8;
       rsi = util_logbase2(reg_width);
-      payload_node_count = ALIGN(fs->first_non_payload_grf, reg_width);
+      payload_node_count = align(fs->first_non_payload_grf, reg_width);
 
       /* Get payload IP information */
       payload_last_use_ip = ralloc_array(mem_ctx, int, payload_node_count);
@@ -940,7 +940,7 @@ elk_fs_reg_alloc::choose_spill_reg()
 elk_fs_reg
 elk_fs_reg_alloc::alloc_spill_reg(unsigned size, int ip)
 {
-   int vgrf = fs->alloc.allocate(ALIGN(size, reg_unit(devinfo)));
+   int vgrf = fs->alloc.allocate(align(size, reg_unit(devinfo)));
    int class_idx = DIV_ROUND_UP(size, reg_unit(devinfo)) - 1;
    int n = ra_add_node(g, compiler->fs_reg_sets[rsi].classes[class_idx]);
    assert(n == first_vgrf_node + vgrf);
@@ -975,7 +975,7 @@ elk_fs_reg_alloc::spill_reg(unsigned spill_reg)
 {
    int size = fs->alloc.sizes[spill_reg];
    unsigned int spill_offset = fs->last_scratch;
-   assert(ALIGN(spill_offset, 16) == spill_offset); /* oword read/write req. */
+   assert(align(spill_offset, 16) == spill_offset); /* oword read/write req. */
 
    /* Spills may use MRFs 13-15 in the SIMD16 case.  Our texturing is done
     * using up to 11 MRFs starting from either m1 or m2, and fb writes can use
diff --git a/src/intel/compiler/elk/elk_nir_analyze_ubo_ranges.c b/src/intel/compiler/elk/elk_nir_analyze_ubo_ranges.c
index eb213e710ff..cdb3f977332 100644
--- a/src/intel/compiler/elk/elk_nir_analyze_ubo_ranges.c
+++ b/src/intel/compiler/elk/elk_nir_analyze_ubo_ranges.c
@@ -160,7 +160,7 @@ analyze_ubos_block(struct ubo_analysis_state *state, nir_block *block)
          const int bytes = nir_intrinsic_dest_components(intrin) *
                            (intrin->def.bit_size / 8);
          const int start = ROUND_DOWN_TO(byte_offset, 32);
-         const int end = ALIGN(byte_offset + bytes, 32);
+         const int end = align(byte_offset + bytes, 32);
          const int chunks = (end - start) / 32;
 
          /* TODO: should we count uses in loops as higher benefit? */
diff --git a/src/intel/compiler/elk/elk_shader.cpp b/src/intel/compiler/elk/elk_shader.cpp
index 2bda4cadc47..167bcf9e214 100644
--- a/src/intel/compiler/elk/elk_shader.cpp
+++ b/src/intel/compiler/elk/elk_shader.cpp
@@ -1301,7 +1301,7 @@ elk_compile_tes(const struct elk_compiler *compiler,
       BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
 
    /* URB entry sizes are stored as a multiple of 64 bytes. */
-   prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
+   prog_data->base.urb_entry_size = align(output_size_bytes, 64) / 64;
 
    prog_data->base.urb_read_length = 0;
 
diff --git a/src/intel/compiler/elk/elk_vec4_gs_visitor.cpp b/src/intel/compiler/elk/elk_vec4_gs_visitor.cpp
index 13c5b2abd0b..f87de294aee 100644
--- a/src/intel/compiler/elk/elk_vec4_gs_visitor.cpp
+++ b/src/intel/compiler/elk/elk_vec4_gs_visitor.cpp
@@ -120,7 +120,7 @@ vec4_gs_visitor::setup_varying_inputs(int payload_reg,
       }
    }
 
-   int regs_used = ALIGN(input_array_stride * num_input_vertices,
+   int regs_used = align(input_array_stride * num_input_vertices,
                          attributes_per_reg) / attributes_per_reg;
    return payload_reg + regs_used;
 }
@@ -672,7 +672,7 @@ elk_compile_gs(const struct elk_compiler *compiler,
 
    /* 1 HWORD = 32 bytes = 256 bits */
    prog_data->control_data_header_size_hwords =
-      ALIGN(c.control_data_header_size_bits, 256) / 256;
+      align(c.control_data_header_size_bits, 256) / 256;
 
    /* Compute the output vertex size.
     *
@@ -726,7 +726,7 @@ elk_compile_gs(const struct elk_compiler *compiler,
    assert(compiler->devinfo->ver == 6 ||
           output_vertex_size_bytes <= GFX7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES);
    prog_data->output_vertex_size_hwords =
-      ALIGN(output_vertex_size_bytes, 32) / 32;
+      align(output_vertex_size_bytes, 32) / 32;
 
    /* Compute URB entry size.  The maximum allowed URB entry size is 32k.
     * That divides up as follows:
@@ -793,9 +793,9 @@ elk_compile_gs(const struct elk_compiler *compiler,
     * a multiple of 128 bytes in gfx6.
     */
    if (compiler->devinfo->ver >= 7) {
-      prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
+      prog_data->base.urb_entry_size = align(output_size_bytes, 64) / 64;
    } else {
-      prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128;
+      prog_data->base.urb_entry_size = align(output_size_bytes, 128) / 128;
    }
 
    assert(nir->info.gs.output_primitive < ARRAY_SIZE(elk::gl_prim_to_hw_prim));
diff --git a/src/intel/compiler/elk/elk_vec4_tcs.cpp b/src/intel/compiler/elk/elk_vec4_tcs.cpp
index 1c88920719f..897b7b3fb9f 100644
--- a/src/intel/compiler/elk/elk_vec4_tcs.cpp
+++ b/src/intel/compiler/elk/elk_vec4_tcs.cpp
@@ -423,7 +423,7 @@ elk_compile_tcs(const struct elk_compiler *compiler,
       return NULL;
 
    /* URB entry sizes are stored as a multiple of 64 bytes. */
-   vue_prog_data->urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
+   vue_prog_data->urb_entry_size = align(output_size_bytes, 64) / 64;
 
    /* HS does not use the usual payload pushing from URB to GRFs,
     * because we don't have enough registers for a full-size payload, and
diff --git a/src/intel/isl/isl_tiled_memcpy.c b/src/intel/isl/isl_tiled_memcpy.c
index d93bf1ed76d..685263863ee 100644
--- a/src/intel/isl/isl_tiled_memcpy.c
+++ b/src/intel/isl/isl_tiled_memcpy.c
@@ -45,7 +45,7 @@
 #define FILE_DEBUG_FLAG DEBUG_TEXTURE
 
 #define ALIGN_DOWN(a, b) ROUND_DOWN_TO(a, b)
-#define ALIGN_UP(a, b) ALIGN(a, b)
+#define ALIGN_UP(a, b) align(a, b)
 
 /* Tile dimensions.  Width and span are in bytes, height is in pixels (i.e.
  * unitless).  A "span" is the most number of bytes we can copy from linear
diff --git a/src/intel/tools/aub_write.c b/src/intel/tools/aub_write.c
index 2dcd33ac838..53aafde6cc4 100644
--- a/src/intel/tools/aub_write.c
+++ b/src/intel/tools/aub_write.c
@@ -94,7 +94,7 @@ write_execlists_header(struct aub_file *aub, const char *name)
    app_name_len =
       snprintf(app_name, sizeof(app_name), "PCI-ID=0x%X %s",
                aub->pci_id, name);
-   app_name_len = ALIGN(app_name_len, sizeof(uint32_t));
+   app_name_len = align(app_name_len, sizeof(uint32_t));
 
    dwords = 5 + app_name_len / sizeof(uint32_t);
    dword_out(aub, CMD_MEM_TRACE_VERSION | (dwords - 1));
@@ -189,7 +189,7 @@ mem_trace_memory_write_header_out(struct aub_file *aub, uint64_t addr,
                                   uint32_t len, uint32_t addr_space,
                                   const char *desc)
 {
-   uint32_t dwords = ALIGN(len, sizeof(uint32_t)) / sizeof(uint32_t);
+   uint32_t dwords = align(len, sizeof(uint32_t)) / sizeof(uint32_t);
 
    if (aub->verbose_log_file) {
       fprintf(aub->verbose_log_file,
diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c
index 6b3ece78bc1..38eda494354 100644
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -1070,7 +1070,7 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
 
    const unsigned push_constant_alignment = 64;
    const unsigned aligned_total_push_constants_size =
-      ALIGN(total_push_constants_size, push_constant_alignment);
+      align(total_push_constants_size, push_constant_alignment);
    struct anv_state state;
    if (devinfo->verx10 >= 125) {
       state = anv_cmd_buffer_alloc_general_state(cmd_buffer,
diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c
index 4672b530a36..facefe3918b 100644
--- a/src/intel/vulkan/anv_descriptor_set.c
+++ b/src/intel/vulkan/anv_descriptor_set.c
@@ -299,7 +299,7 @@ anv_descriptor_data_size(enum anv_descriptor_data data,
          surface_size += ANV_SAMPLER_STATE_SIZE;
 
       if (data & ANV_DESCRIPTOR_SURFACE_SAMPLER) {
-         surface_size += ALIGN(ANV_SURFACE_STATE_SIZE + ANV_SAMPLER_STATE_SIZE,
+         surface_size += align(ANV_SURFACE_STATE_SIZE + ANV_SAMPLER_STATE_SIZE,
                                ANV_SURFACE_STATE_SIZE);
       }
    }
@@ -1037,7 +1037,7 @@ anv_descriptor_set_layout_descriptor_buffer_size(const struct anv_descriptor_set
    const struct anv_descriptor_set_binding_layout *dynamic_binding =
       set_layout_dynamic_binding(set_layout);
    if (dynamic_binding == NULL) {
-      *out_surface_size = ALIGN(set_layout->descriptor_buffer_surface_size,
+      *out_surface_size = align(set_layout->descriptor_buffer_surface_size,
                                 ANV_UBO_ALIGNMENT);
       *out_sampler_size = set_layout->descriptor_buffer_sampler_size;
       return;
@@ -1062,7 +1062,7 @@ anv_descriptor_set_layout_descriptor_buffer_size(const struct anv_descriptor_set
           var_desc_count * dynamic_binding->descriptor_sampler_stride;
    }
 
-   *out_surface_size = ALIGN(set_surface_size, ANV_UBO_ALIGNMENT);
+   *out_surface_size = align(set_surface_size, ANV_UBO_ALIGNMENT);
    *out_sampler_size = set_sampler_size;
 }
 
diff --git a/src/intel/vulkan/anv_nir_compute_push_layout.c b/src/intel/vulkan/anv_nir_compute_push_layout.c
index eb8eeae53b1..da6667a0181 100644
--- a/src/intel/vulkan/anv_nir_compute_push_layout.c
+++ b/src/intel/vulkan/anv_nir_compute_push_layout.c
@@ -189,14 +189,14 @@ anv_nir_compute_push_layout(nir_shader *nir,
 
    /* For scalar, push data size needs to be aligned to a DWORD. */
    const unsigned alignment = 4;
-   nir->num_uniforms = ALIGN(push_end - push_start, alignment);
+   nir->num_uniforms = align(push_end - push_start, alignment);
    prog_data->nr_params = nir->num_uniforms / 4;
    prog_data->param = rzalloc_array(mem_ctx, uint32_t, prog_data->nr_params);
 
    struct anv_push_range push_constant_range = {
       .set = ANV_DESCRIPTOR_SET_PUSH_CONSTANTS,
       .start = push_start / 32,
-      .length = ALIGN(push_end - push_start, devinfo->grf_size) / 32,
+      .length = align(push_end - push_start, devinfo->grf_size) / 32,
    };
 
    if (has_push_intrinsic) {
@@ -419,7 +419,7 @@ anv_nir_validate_push_layout(const struct anv_physical_device *pdevice,
                              struct anv_pipeline_bind_map *map)
 {
 #ifndef NDEBUG
-   unsigned prog_data_push_size = ALIGN(prog_data->nr_params, pdevice->info.grf_size / 4) / 8;
+   unsigned prog_data_push_size = align(prog_data->nr_params, pdevice->info.grf_size / 4) / 8;
 
    for (unsigned i = 0; i < 4; i++)
       prog_data_push_size += prog_data->ubo_ranges[i].length;
diff --git a/src/intel/vulkan/anv_physical_device.c b/src/intel/vulkan/anv_physical_device.c
index 0a841b3312d..9aa4e6f6e03 100644
--- a/src/intel/vulkan/anv_physical_device.c
+++ b/src/intel/vulkan/anv_physical_device.c
@@ -1888,7 +1888,7 @@ get_properties(const struct anv_physical_device *pdevice,
 
       /* NumPrim + Primitive Data List */
       const uint32_t max_indices_memory =
-         ALIGN(sizeof(uint32_t) +
+         align(sizeof(uint32_t) +
                sizeof(uint32_t) * props->maxMeshOutputVertices, 32);
 
       props->maxMeshOutputMemorySize = MIN2(max_urb_size - max_indices_memory, 32768);
diff --git a/src/intel/vulkan/anv_shader_compile.c b/src/intel/vulkan/anv_shader_compile.c
index db1f5f15bdb..5219d83d149 100644
--- a/src/intel/vulkan/anv_shader_compile.c
+++ b/src/intel/vulkan/anv_shader_compile.c
@@ -1500,7 +1500,7 @@ anv_shader_lower_nir(struct anv_device *device,
           * used by the shader to chunk_size -- which does simplify the logic.
           */
          const unsigned chunk_size = 16;
-         const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
+         const unsigned shared_size = align(nir->info.shared_size, chunk_size);
          assert(shared_size <=
                 intel_compute_slm_calculate_size(compiler->devinfo->ver,
                                                  nir->info.shared_size));
diff --git a/src/intel/vulkan/anv_video.c b/src/intel/vulkan/anv_video.c
index 2a265f3252b..c22a166947a 100644
--- a/src/intel/vulkan/anv_video.c
+++ b/src/intel/vulkan/anv_video.c
@@ -1396,7 +1396,7 @@ anv_video_get_image_mv_size(struct anv_device *device,
                  profile_list->pProfiles[i].videoCodecOperation == VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR) {
          unsigned w_mb = DIV_ROUND_UP(image->vk.extent.width, 32);
          unsigned h_mb = DIV_ROUND_UP(image->vk.extent.height, 32);
-         size = ALIGN(w_mb * h_mb, 2) << 6;
+         size = align(w_mb * h_mb, 2) << 6;
       } else if (profile_list->pProfiles[i].videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR) {
          unsigned w_ctb = DIV_ROUND_UP(image->vk.extent.width, ANV_MAX_VP9_CTB_SIZE);
          unsigned h_ctb = DIV_ROUND_UP(image->vk.extent.height, ANV_MAX_VP9_CTB_SIZE);
diff --git a/src/intel/vulkan/genX_cmd_video.c b/src/intel/vulkan/genX_cmd_video.c
index bcdd3576942..6a0f4108f93 100644
--- a/src/intel/vulkan/genX_cmd_video.c
+++ b/src/intel/vulkan/genX_cmd_video.c
@@ -2630,7 +2630,7 @@ anv_av1_calculate_xstep_qn(struct anv_cmd_buffer *cmd_buffer,
    int32_t mib_size_log2 = seq_hdr->flags.use_128x128_superblock ?
       av1_max_mib_size_log2 : av1_min_mib_size_log2;
 
-   int32_t mi_cols = ALIGN(frameExtent.width, 8) >> mib_size_log2;
+   int32_t mi_cols = align(frameExtent.width, 8) >> mib_size_log2;
 
    int denom = std_pic_info->coded_denom + 9;
    unsigned downscaled_width = (frameExtent.width * 8 + denom / 2) / denom;
@@ -2638,8 +2638,8 @@ anv_av1_calculate_xstep_qn(struct anv_cmd_buffer *cmd_buffer,
    for (uint8_t i = 0; i < 2; i++) { /* i == 0 : luma, i == 1 : chroma */
       int subsampling_x = seq_hdr->pColorConfig->subsampling_x;
       int ssx = i & subsampling_x;
-      int downscaled = ALIGN(downscaled_width, 2) >> ssx;
-      int upscaled = ALIGN(frameExtent.width, 2) >> ssx;
+      int downscaled = align(downscaled_width, 2) >> ssx;
+      int upscaled = align(frameExtent.width, 2) >> ssx;
 
       int xstep_qn = ((downscaled << av1_rs_scale_subpel_bits) + upscaled / 2) / upscaled;
 
@@ -3275,8 +3275,8 @@ anv_vp9_decode_video(struct anv_cmd_buffer *cmd_buffer,
    anv_batch_emit(&cmd_buffer->batch, GENX(HCP_VP9_PIC_STATE), pic) {
       if (std_pic->flags.segmentation_enabled)
          assert(segmentation != NULL);
-      pic.FrameWidth = ALIGN(frame_width, 8) - 1;
-      pic.FrameHeight = ALIGN(frame_height, 8) - 1;
+      pic.FrameWidth = align(frame_width, 8) - 1;
+      pic.FrameHeight = align(frame_height, 8) - 1;
       /* STD_VIDEO_VP9_FRAME_TYPE_KEY == VP9_Key_frmae
        * STD_VIDEO_VP9_FRAME_TYPE_NON_KEY == VP9_InterFrame
        */
diff --git a/src/intel/vulkan/genX_shader.c b/src/intel/vulkan/genX_shader.c
index 77591fe5bbd..fbe5727256c 100644
--- a/src/intel/vulkan/genX_shader.c
+++ b/src/intel/vulkan/genX_shader.c
@@ -1205,7 +1205,7 @@ emit_cs_shader(struct anv_batch *batch,
                                   &walker);
 #else
    const uint32_t vfe_curbe_allocation =
-      ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads +
+      align(cs_prog_data->push.per_thread.regs * dispatch.threads +
             cs_prog_data->push.cross_thread.regs, 2);
 
    anv_shader_emit(batch, shader, cs.gfx9.vfe, GENX(MEDIA_VFE_STATE), vfe) {
diff --git a/src/intel/vulkan/genX_simple_shader.c b/src/intel/vulkan/genX_simple_shader.c
index 5ac0160121f..507fb48b3ca 100644
--- a/src/intel/vulkan/genX_simple_shader.c
+++ b/src/intel/vulkan/genX_simple_shader.c
@@ -682,7 +682,7 @@ genX(emit_simple_shader_dispatch)(struct anv_simple_shader *state,
 
 #else /* GFX_VERx10 < 125 */
       const uint32_t vfe_curbe_allocation =
-         ALIGN(prog_data->push.per_thread.regs * dispatch.threads +
+         align(prog_data->push.per_thread.regs * dispatch.threads +
                prog_data->push.cross_thread.regs, 2);
 
       /* From the Sky Lake PRM Vol 2a, MEDIA_VFE_STATE:
diff --git a/src/intel/vulkan_hasvk/anv_cmd_buffer.c b/src/intel/vulkan_hasvk/anv_cmd_buffer.c
index b5af8364248..259087c63cf 100644
--- a/src/intel/vulkan_hasvk/anv_cmd_buffer.c
+++ b/src/intel/vulkan_hasvk/anv_cmd_buffer.c
@@ -589,7 +589,7 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
    const unsigned push_constant_alignment =
       cmd_buffer->device->info->ver < 8 ? 32 : 64;
    const unsigned aligned_total_push_constants_size =
-      ALIGN(total_push_constants_size, push_constant_alignment);
+      align(total_push_constants_size, push_constant_alignment);
    struct anv_state state =
       anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
                                          aligned_total_push_constants_size,
diff --git a/src/intel/vulkan_hasvk/anv_descriptor_set.c b/src/intel/vulkan_hasvk/anv_descriptor_set.c
index 77ce1bc1905..1930d5f323b 100644
--- a/src/intel/vulkan_hasvk/anv_descriptor_set.c
+++ b/src/intel/vulkan_hasvk/anv_descriptor_set.c
@@ -665,7 +665,7 @@ anv_descriptor_set_layout_descriptor_buffer_size(const struct anv_descriptor_set
    const struct anv_descriptor_set_binding_layout *dynamic_binding =
       set_layout_dynamic_binding(set_layout);
    if (dynamic_binding == NULL)
-      return ALIGN(set_layout->descriptor_buffer_size, ANV_UBO_ALIGNMENT);
+      return align(set_layout->descriptor_buffer_size, ANV_UBO_ALIGNMENT);
 
    assert(var_desc_count <= dynamic_binding->array_size);
    uint32_t shrink = dynamic_binding->array_size - var_desc_count;
@@ -681,7 +681,7 @@ anv_descriptor_set_layout_descriptor_buffer_size(const struct anv_descriptor_set
                  shrink * dynamic_binding->descriptor_stride;
    }
 
-   return ALIGN(set_size, ANV_UBO_ALIGNMENT);
+   return align(set_size, ANV_UBO_ALIGNMENT);
 }
 
 void anv_DestroyDescriptorSetLayout(
@@ -904,7 +904,7 @@ VkResult anv_CreateDescriptorPool(
       descriptor_bo_size +=
          ANV_UBO_ALIGNMENT * inline_info->maxInlineUniformBlockBindings;
    }
-   descriptor_bo_size = ALIGN(descriptor_bo_size, 4096);
+   descriptor_bo_size = align(descriptor_bo_size, 4096);
 
    const size_t pool_size =
       pCreateInfo->maxSets * sizeof(struct anv_descriptor_set) +
diff --git a/src/intel/vulkan_hasvk/anv_nir_compute_push_layout.c b/src/intel/vulkan_hasvk/anv_nir_compute_push_layout.c
index a83b86534f7..254ff12b2cc 100644
--- a/src/intel/vulkan_hasvk/anv_nir_compute_push_layout.c
+++ b/src/intel/vulkan_hasvk/anv_nir_compute_push_layout.c
@@ -112,7 +112,7 @@ anv_nir_compute_push_layout(nir_shader *nir,
     * scalar, it needs to be aligned to a DWORD.
     */
    const unsigned alignment = compiler->scalar_stage[nir->info.stage] ? 4 : 16;
-   nir->num_uniforms = ALIGN(push_end - push_start, alignment);
+   nir->num_uniforms = align(push_end - push_start, alignment);
    prog_data->nr_params = nir->num_uniforms / 4;
    prog_data->param = rzalloc_array(mem_ctx, uint32_t, prog_data->nr_params);
 
diff --git a/src/intel/vulkan_hasvk/anv_pipeline.c b/src/intel/vulkan_hasvk/anv_pipeline.c
index 22939c4e250..82828301d96 100644
--- a/src/intel/vulkan_hasvk/anv_pipeline.c
+++ b/src/intel/vulkan_hasvk/anv_pipeline.c
@@ -565,7 +565,7 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
           * used by the shader to chunk_size -- which does simplify the logic.
           */
          const unsigned chunk_size = 16;
-         const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
+         const unsigned shared_size = align(nir->info.shared_size, chunk_size);
          assert(shared_size <=
                 intel_compute_slm_calculate_size(compiler->devinfo->ver, nir->info.shared_size));
 
diff --git a/src/intel/vulkan_hasvk/genX_cmd_buffer.c b/src/intel/vulkan_hasvk/genX_cmd_buffer.c
index da52f2517da..b0471b46591 100644
--- a/src/intel/vulkan_hasvk/genX_cmd_buffer.c
+++ b/src/intel/vulkan_hasvk/genX_cmd_buffer.c
@@ -2666,7 +2666,7 @@ get_push_range_bound_size(struct anv_cmd_buffer *cmd_buffer,
       return (range->start + range->length) * 32;
 
    case ANV_DESCRIPTOR_SET_SHADER_CONSTANTS:
-      return ALIGN(shader->prog_data->const_data_size, ANV_UBO_ALIGNMENT);
+      return align(shader->prog_data->const_data_size, ANV_UBO_ALIGNMENT);
 
    default: {
       assert(range->set < MAX_SETS);
diff --git a/src/intel/vulkan_hasvk/genX_pipeline.c b/src/intel/vulkan_hasvk/genX_pipeline.c
index fa8dc44023d..20621f7af81 100644
--- a/src/intel/vulkan_hasvk/genX_pipeline.c
+++ b/src/intel/vulkan_hasvk/genX_pipeline.c
@@ -1885,7 +1885,7 @@ genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline)
    const struct intel_cs_dispatch_info dispatch =
       elk_cs_get_dispatch_info(devinfo, cs_prog_data, NULL);
    const uint32_t vfe_curbe_allocation =
-      ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads +
+      align(cs_prog_data->push.per_thread.regs * dispatch.threads +
             cs_prog_data->push.cross_thread.regs, 2);
 
    const struct anv_shader_bin *cs_bin = pipeline->cs;
diff --git a/src/kosmickrisp/vulkan/kk_descriptor_set_layout.c b/src/kosmickrisp/vulkan/kk_descriptor_set_layout.c
index 11b4c98b9f6..21004dc818d 100644
--- a/src/kosmickrisp/vulkan/kk_descriptor_set_layout.c
+++ b/src/kosmickrisp/vulkan/kk_descriptor_set_layout.c
@@ -78,7 +78,7 @@ kk_descriptor_stride_align_for_type(
          *stride = MAX2(*stride, desc_stride);
          *alignment = MAX2(*alignment, desc_align);
       }
-      *stride = ALIGN(*stride, *alignment);
+      *stride = align(*stride, *alignment);
       break;
 
    default:
diff --git a/src/microsoft/vulkan/dzn_cmd_buffer.c b/src/microsoft/vulkan/dzn_cmd_buffer.c
index cbb0e896100..a3fff64cc0f 100644
--- a/src/microsoft/vulkan/dzn_cmd_buffer.c
+++ b/src/microsoft/vulkan/dzn_cmd_buffer.c
@@ -3516,7 +3516,7 @@ dzn_cmd_buffer_update_push_constants(struct dzn_cmd_buffer *cmdbuf, uint32_t bin
       &cmdbuf->state.push_constant.gfx : &cmdbuf->state.push_constant.compute;
 
    uint32_t offset = state->offset / 4;
-   uint32_t end = ALIGN(state->end, 4) / 4;
+   uint32_t end = align(state->end, 4) / 4;
    uint32_t count = end - offset;
 
    if (!count)
diff --git a/src/microsoft/vulkan/dzn_descriptor_set.c b/src/microsoft/vulkan/dzn_descriptor_set.c
index 368a3d66703..ed4e8eb3dd6 100644
--- a/src/microsoft/vulkan/dzn_descriptor_set.c
+++ b/src/microsoft/vulkan/dzn_descriptor_set.c
@@ -854,7 +854,7 @@ dzn_pipeline_layout_create(struct dzn_device *device,
 
       root_param->ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
       root_param->Constants.ShaderRegister = 0;
-      root_param->Constants.Num32BitValues = ALIGN(push_constant_size, 4) / 4;
+      root_param->Constants.Num32BitValues = align(push_constant_size, 4) / 4;
       root_param->Constants.RegisterSpace = DZN_REGISTER_SPACE_PUSH_CONSTANT;
       root_param->ShaderVisibility = translate_desc_visibility(push_constant_flags);
       root_dwords += root_param->Constants.Num32BitValues;
@@ -1680,7 +1680,7 @@ dzn_descriptor_set_init(struct dzn_descriptor_set *set,
       dzn_foreach_pool_type(type) {
          set->heap_offsets[type] = pool->free_offset[type];
          if (device->bindless)
-            set->heap_offsets[type] = ALIGN(set->heap_offsets[type], 2);
+            set->heap_offsets[type] = align(set->heap_offsets[type], 2);
          set->heap_sizes[type] = layout->range_desc_count[type] + variable_descriptor_count[type];
          set->pool->free_offset[type] = set->heap_offsets[type] + set->heap_sizes[type];
       }
diff --git a/src/nouveau/vulkan/nvk_descriptor_set_layout.c b/src/nouveau/vulkan/nvk_descriptor_set_layout.c
index c99562b170e..10188fb9515 100644
--- a/src/nouveau/vulkan/nvk_descriptor_set_layout.c
+++ b/src/nouveau/vulkan/nvk_descriptor_set_layout.c
@@ -118,7 +118,7 @@ nvk_descriptor_stride_align_for_type(const struct nvk_physical_device *pdev,
          *stride = MAX2(*stride, desc_stride);
          *alignment = MAX2(*alignment, desc_align);
       }
-      *stride = ALIGN(*stride, *alignment);
+      *stride = align(*stride, *alignment);
       break;
 
    default: