treewide: Replace calling to function ALIGN with align

This is done by grep ALIGN( to align(

docs,*.xml,blake3 is excluded

Signed-off-by: Yonggang Luo <luoyonggang@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38365>
This commit is contained in:
Yonggang Luo 2025-11-11 15:33:54 +08:00 committed by Marge Bot
parent 03a32b3fe2
commit ecb0ccf603
119 changed files with 246 additions and 246 deletions

View file

@ -2317,8 +2317,8 @@ void ac_get_task_info(const struct radeon_info *info,
/* Ensure that the addresses of each ring are 256 byte aligned. */
task_info->payload_entry_size = payload_entry_size;
task_info->num_entries = num_entries;
task_info->draw_ring_offset = ALIGN(AC_TASK_CTRLBUF_BYTES, 256);
task_info->payload_ring_offset = ALIGN(task_info->draw_ring_offset + draw_ring_bytes, 256);
task_info->draw_ring_offset = align(AC_TASK_CTRLBUF_BYTES, 256);
task_info->payload_ring_offset = align(task_info->draw_ring_offset + draw_ring_bytes, 256);
task_info->bo_size_bytes = task_info->payload_ring_offset + payload_ring_bytes;
}

View file

@ -1046,7 +1046,7 @@ ac_sqtt_dump_data(const struct radeon_info *rad_info, struct ac_sqtt_trace *sqtt
sizeof(struct sqtt_code_object_database_record),
record, &elf_size_calc, flags);
/* Align to 4 bytes per the RGP file spec. */
code_object_record.size = ALIGN(elf_size_calc, 4);
code_object_record.size = align(elf_size_calc, 4);
fseek(output, file_offset, SEEK_SET);
fwrite(&code_object_record, sizeof(struct sqtt_code_object_database_record),
1, output);

View file

@ -245,7 +245,7 @@ ac_rgp_write_msgpack(FILE *output,
}
}
ac_msgpack_resize_if_required(&msgpack, 4 - (msgpack.offset % 4));
msgpack.offset = ALIGN(msgpack.offset, 4);
msgpack.offset = align(msgpack.offset, 4);
fwrite(msgpack.mem, 1, msgpack.offset, output);
*written_size = msgpack.offset;
ac_msgpack_destroy(&msgpack);
@ -321,7 +321,7 @@ ac_rgp_file_write_elf_text(FILE *output, uint32_t *elf_size_calc,
}
symbol_offset += rgp_shader_data->code_size;
uint32_t aligned = ALIGN(symbol_offset, 256) - symbol_offset;
uint32_t aligned = align(symbol_offset, 256) - symbol_offset;
fseek(output, aligned, SEEK_CUR);
*elf_size_calc += aligned;
*text_size = symbol_offset + aligned;

View file

@ -1393,7 +1393,7 @@ radv_fill_code_object_record(struct radv_device *device, struct rgp_shader_data
shader_data->vgpr_count = shader->config.num_vgprs;
shader_data->sgpr_count = shader->config.num_sgprs;
shader_data->scratch_memory_size = shader->config.scratch_bytes_per_wave;
shader_data->lds_size = ALIGN(shader->config.lds_size, lds_increment);
shader_data->lds_size = align(shader->config.lds_size, lds_increment);
shader_data->wavefront_size = shader->info.wave_size;
shader_data->base_address = va & 0xffffffffffff;
shader_data->elf_symbol_offset = 0;

View file

@ -162,7 +162,7 @@ radv_get_acceleration_structure_layout(struct radv_device *device,
offset += bvh_size / 64 * 4;
/* The BVH and hence bvh_offset needs 64 byte alignment for RT nodes. */
offset = ALIGN(offset, 64);
offset = align(offset, 64);
accel_struct->bvh_offset = offset;
/* root node */

View file

@ -543,7 +543,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
if (radv_shader_should_clear_lds(device, stage->nir)) {
const unsigned chunk_size = 16; /* max single store size */
const unsigned shared_size = ALIGN(stage->nir->info.shared_size, chunk_size);
const unsigned shared_size = align(stage->nir->info.shared_size, chunk_size);
NIR_PASS(_, stage->nir, nir_clear_shared_memory, shared_size, chunk_size);
}

View file

@ -146,9 +146,9 @@ radv_sparse_image_bind_memory(struct radv_device *device, const VkSparseImageMem
(uint64_t)bind_offset.x * surface->prt_tile_height * surface->prt_tile_depth) *
bs;
uint32_t aligned_extent_width = ALIGN(bind_extent.width, surface->prt_tile_width);
uint32_t aligned_extent_height = ALIGN(bind_extent.height, surface->prt_tile_height);
uint32_t aligned_extent_depth = ALIGN(bind_extent.depth, surface->prt_tile_depth);
uint32_t aligned_extent_width = align(bind_extent.width, surface->prt_tile_width);
uint32_t aligned_extent_height = align(bind_extent.height, surface->prt_tile_height);
uint32_t aligned_extent_depth = align(bind_extent.depth, surface->prt_tile_depth);
bool whole_subres = (bind_extent.height <= surface->prt_tile_height || aligned_extent_width == pitch) &&
(bind_extent.depth <= surface->prt_tile_depth ||

View file

@ -134,7 +134,7 @@ radv_sdma_get_chunked_copy_info(const struct radv_device *const device, const st
{
const unsigned extent_horizontal_blocks = DIV_ROUND_UP(extent.width * img->texel_scale, img->blk_w);
const unsigned extent_vertical_blocks = DIV_ROUND_UP(extent.height, img->blk_h);
const unsigned aligned_row_pitch = ALIGN(extent_horizontal_blocks, 4);
const unsigned aligned_row_pitch = align(extent_horizontal_blocks, 4);
const unsigned aligned_row_bytes = aligned_row_pitch * img->bpp;
/* Assume that we can always copy at least one full row at a time. */

View file

@ -760,7 +760,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
if (nir->info.zero_initialize_shared_memory && nir->info.shared_size > 0) {
const unsigned chunk_size = 16; /* max single store size */
const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
const unsigned shared_size = align(nir->info.shared_size, chunk_size);
NIR_PASS(_, nir, nir_zero_initialize_shared_memory, shared_size, chunk_size);
}
}
@ -945,7 +945,7 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage,
&ngg_stage->info.ngg_lds_scratch_size);
} else if (nir->info.stage == MESA_SHADER_MESH) {
/* ACO aligns the workgroup size to the wave size. */
unsigned hw_workgroup_size = ALIGN(info->workgroup_size, info->wave_size);
unsigned hw_workgroup_size = align(info->workgroup_size, info->wave_size);
bool scratch_ring = false;
NIR_PASS(_, nir, ac_nir_lower_ngg_mesh, &pdev->info, options.export_clipdist_mask, options.vs_output_param_offset,

View file

@ -620,7 +620,7 @@ radv_get_shader_binary_size(const struct radv_shader_binary *binary)
size_t size = sizeof(uint32_t); /* has_binary */
if (binary)
size += SHA1_DIGEST_LENGTH + 4 + ALIGN(binary->total_size, 4);
size += SHA1_DIGEST_LENGTH + 4 + align(binary->total_size, 4);
return size;
}

View file

@ -3450,7 +3450,7 @@ agx_compile_function_nir(nir_shader *nir, nir_function_impl *impl,
*/
if (ctx->any_scratch) {
assert(!ctx->is_preamble && "preambles don't use scratch");
ctx->scratch_size_B = ALIGN(nir->scratch_size, 16);
ctx->scratch_size_B = align(nir->scratch_size, 16);
}
/* Stop the main shader or preamble shader after the exit block. For real
@ -3509,7 +3509,7 @@ agx_compile_function_nir(nir_shader *nir, nir_function_impl *impl,
if (ctx->scratch_size_B > 0) {
/* Apple always allocate 40 more bytes in the entrypoint and align to 4. */
uint64_t stack_size = ALIGN(DIV_ROUND_UP(ctx->scratch_size_B, 4) + 10, 4);
uint64_t stack_size = align(DIV_ROUND_UP(ctx->scratch_size_B, 4) + 10, 4);
assert(stack_size < INT16_MAX);

View file

@ -339,10 +339,10 @@ static inline bool
ail_is_level_allocated_compressed(const struct ail_layout *layout,
unsigned level)
{
unsigned width_sa = ALIGN(
unsigned width_sa = align(
ail_effective_width_sa(layout->width_px, layout->sample_count_sa), 16);
unsigned height_sa = ALIGN(
unsigned height_sa = align(
ail_effective_height_sa(layout->height_px, layout->sample_count_sa), 16);
return layout->compressed &&

View file

@ -80,7 +80,7 @@ hk_descriptor_stride_align_for_type(
*stride = MAX2(*stride, desc_stride);
*alignment = MAX2(*alignment, desc_align);
}
*stride = ALIGN(*stride, *alignment);
*stride = align(*stride, *alignment);
break;
default:

View file

@ -2093,7 +2093,7 @@ xfb_decl_store(struct xfb_decl *xfb_decl, const struct gl_constants *consts,
if (max_member_alignment && has_xfb_qualifiers) {
max_member_alignment[buffer] = MAX2(max_member_alignment[buffer],
_mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1);
info->Buffers[buffer].Stride = ALIGN(xfb_offset,
info->Buffers[buffer].Stride = align(xfb_offset,
max_member_alignment[buffer]);
} else {
info->Buffers[buffer].Stride = xfb_offset;
@ -2926,7 +2926,7 @@ varying_matches_assign_locations(struct varying_matches *vm,
(previous_packing_class != vm->matches[i].packing_class) ||
(vm->matches[i].packing_order == PACKING_ORDER_VEC3 &&
dont_pack_vec3)) {
*location = ALIGN(*location, 4);
*location = align(*location, 4);
}
previous_var_xfb = var->data.is_xfb;
@ -2967,7 +2967,7 @@ varying_matches_assign_locations(struct varying_matches *vm,
break;
}
*location = ALIGN(*location + 1, 4);
*location = align(*location + 1, 4);
slot_end = *location + num_components - 1;
}
@ -3229,9 +3229,9 @@ tfeedback_candidate_generator(struct tfeedback_candidate_generator_state *state,
* (c) each double-precision variable captured must be aligned to a
* multiple of eight bytes relative to the beginning of a vertex.
*/
state->xfb_offset_floats = ALIGN(state->xfb_offset_floats, 2);
state->xfb_offset_floats = align(state->xfb_offset_floats, 2);
/* 64-bit members of structs are also aligned. */
state->varying_floats = ALIGN(state->varying_floats, 2);
state->varying_floats = align(state->varying_floats, 2);
}
candidate->xfb_offset_floats = state->xfb_offset_floats;

View file

@ -586,7 +586,7 @@ spill_ssa_defs_and_lower_shader_calls(nir_shader *shader, uint32_t num_calls,
def = nir_b2b32(&before, def);
const unsigned comp_size = def->bit_size / 8;
offset = ALIGN(offset, comp_size);
offset = align(offset, comp_size);
new_def = spill_fill(&before, &after, def,
index, call_idx,
@ -617,7 +617,7 @@ spill_ssa_defs_and_lower_shader_calls(nir_shader *shader, uint32_t num_calls,
nir_builder *b = &before;
offset = ALIGN(offset, options->stack_alignment);
offset = align(offset, options->stack_alignment);
max_scratch_size = MAX2(max_scratch_size, offset);
/* First thing on the called shader's stack is the resume address
@ -1642,11 +1642,11 @@ nir_opt_sort_and_pack_stack(nir_shader *shader,
unsigned scratch_size = start_call_scratch;
util_dynarray_foreach(&ops, struct scratch_item, item) {
item->new_offset = ALIGN(scratch_size, item->bit_size / 8);
item->new_offset = align(scratch_size, item->bit_size / 8);
scratch_size = item->new_offset + (item->bit_size * item->num_components) / 8;
_mesa_hash_table_u64_insert(value_id_to_item, item->value, item);
}
shader->scratch_size = ALIGN(scratch_size, stack_alignment);
shader->scratch_size = align(scratch_size, stack_alignment);
/* Update offsets in the instructions */
nir_foreach_block_safe(block, impl) {

View file

@ -132,7 +132,7 @@ static bool
nir_lower_nv_task_count(nir_shader *shader)
{
lower_task_nv_state state = {
.task_count_shared_addr = ALIGN(shader->info.shared_size, 4),
.task_count_shared_addr = align(shader->info.shared_size, 4),
};
shader->info.shared_size += 4;
@ -277,7 +277,7 @@ emit_shared_to_payload_copy(nir_builder *b,
off += remaining_dwords * 4;
}
assert(s->payload_offset_in_bytes + ALIGN(payload_size, 4) == off);
assert(s->payload_offset_in_bytes + align(payload_size, 4) == off);
}
static bool
@ -453,7 +453,7 @@ nir_lower_task_shader(nir_shader *shader,
requires_payload_in_shared(shader, atomics, small_types);
lower_task_state state = {
.payload_shared_addr = ALIGN(shader->info.shared_size, 16),
.payload_shared_addr = align(shader->info.shared_size, 16),
.payload_in_shared = payload_in_shared,
.payload_offset_in_bytes = options.payload_offset_in_bytes,
};

View file

@ -151,7 +151,7 @@ struct etna_bo *etna_bo_cache_alloc(struct etna_bo_cache *cache, uint32_t *size,
struct etna_bo *bo;
struct etna_bo_bucket *bucket;
*size = ALIGN(*size, 4096);
*size = align(*size, 4096);
bucket = get_bucket(cache, *size);
/* see if we can be green and recycle: */

View file

@ -105,7 +105,7 @@ struct etna_cmd_stream *etna_cmd_stream_new(struct etna_pipe *pipe,
}
/* allocate even number of 32-bit words */
size = ALIGN(size, 2);
size = align(size, 2);
stream->base.buffer = malloc(size * sizeof(uint32_t));
if (!stream->base.buffer) {

View file

@ -257,7 +257,7 @@ cs_program_emit_regs(fd_cs &cs, struct kernel *kernel)
if (v->pvtmem_size > 0) {
uint32_t per_fiber_size = v->pvtmem_size;
uint32_t per_sp_size =
ALIGN(per_fiber_size * a6xx_backend->info->fibers_per_sp, 1 << 12);
align(per_fiber_size * a6xx_backend->info->fibers_per_sp, 1 << 12);
uint32_t total_size = per_sp_size * a6xx_backend->info->num_sp_cores;
struct fd_bo *pvtmem = fd_bo_new(a6xx_backend->dev, total_size, 0, "pvtmem");

View file

@ -268,7 +268,7 @@ next_sect(struct state *state, int *sect_size)
*sect_size = end - state->buf;
/* copy the section to keep things nicely 32b aligned: */
sect = malloc(ALIGN(*sect_size, 4));
sect = malloc(align(*sect_size, 4));
memcpy(sect, state->buf, *sect_size);
state->sz -= *sect_size + 4;
@ -584,8 +584,8 @@ dump_shaders_a3xx(struct state *state)
if (!compact) {
if (state->hdr->revision >= 7) {
instrs += ALIGN(instrs_size, 8) - instrs_size;
instrs_size = ALIGN(instrs_size, 8);
instrs += align(instrs_size, 8) - instrs_size;
instrs_size = align(instrs_size, 8);
}
instrs += 32;
instrs_size -= 32;

View file

@ -347,9 +347,9 @@ decode_shader_descriptor_block(struct state *state,
unsigned dwords = 2 * stats.instlen;
if (gpu_id >= 400) {
dwords = ALIGN(dwords, 16 * 2);
dwords = align(dwords, 16 * 2);
} else {
dwords = ALIGN(dwords, 4 * 2);
dwords = align(dwords, 4 * 2);
}
unsigned half_regs = state->half_regs;

View file

@ -205,7 +205,7 @@ fd_bo_heap_alloc(struct fd_bo_heap *heap, uint32_t size, uint32_t flags)
*/
size = MAX2(size, SUBALLOC_ALIGNMENT);
size = ALIGN(size, SUBALLOC_ALIGNMENT);
size = align(size, SUBALLOC_ALIGNMENT);
simple_mtx_lock(&heap->lock);
/* Allocate larger buffers from the bottom, and smaller buffers from top

View file

@ -6026,7 +6026,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
ir3_instr_move_before(unlock, end);
}
so->pvtmem_size = ALIGN(so->pvtmem_size, compiler->pvtmem_per_fiber_align);
so->pvtmem_size = align(so->pvtmem_size, compiler->pvtmem_per_fiber_align);
/* Note that max_bary counts inputs that are not bary.f'd for FS: */
if (so->type == MESA_SHADER_FRAGMENT)

View file

@ -36,7 +36,7 @@ get_ubo_load_range(nir_shader *nir, nir_intrinsic_instr *instr,
return false;
r->start = ROUND_DOWN_TO(offset, alignment * 16);
r->end = ALIGN(offset + size, alignment * 16);
r->end = align(offset + size, alignment * 16);
return true;
}

View file

@ -1237,7 +1237,7 @@ compress_regs_left(struct ra_ctx *ctx, struct ra_file *file,
}
if (!(cur_reg->flags & IR3_REG_HALF))
physreg = ALIGN(physreg, 2);
physreg = align(physreg, 2);
d("pushing reg %u physreg %u\n", cur_reg->name, physreg);
@ -1330,7 +1330,7 @@ find_best_gap(struct ra_ctx *ctx, struct ra_file *file,
BITSET_WORD *available =
is_early_clobber(dst) ? file->available_to_evict : file->available;
unsigned start = ALIGN(file->start, alignment);
unsigned start = align(file->start, alignment);
if (start + size > file_size)
start = 0;
unsigned candidate = start;

View file

@ -37,7 +37,7 @@ ir3_const_ensure_imm_size(struct ir3_shader_variant *v, unsigned size)
/* Immediates are uploaded in units of vec4 so make sure our buffer is large
* enough.
*/
size = ALIGN(size, 4);
size = align(size, 4);
/* Pre-a7xx, the immediates that get lowered to const registers are
* emitted as part of the const state so the total size of immediates

View file

@ -362,7 +362,7 @@ find_best_gap(struct ra_ctx *ctx, struct ir3_register *dst, unsigned size,
if (size > file_size)
return (physreg_t) ~0;
unsigned start = ALIGN(ctx->start, alignment);
unsigned start = align(ctx->start, alignment);
if (start + size > file_size)
start = 0;
unsigned candidate = start;
@ -395,7 +395,7 @@ find_best_spill_reg(struct ra_ctx *ctx, struct ir3_register *reg,
unsigned file_size = reg_file_size(reg);
unsigned min_cost = UINT_MAX;
unsigned start = ALIGN(ctx->start, alignment);
unsigned start = align(ctx->start, alignment);
if (start + size > file_size)
start = 0;
physreg_t candidate = start;

View file

@ -602,7 +602,7 @@ tu_descriptor_set_create(struct tu_device *device,
&layout->binding[layout->binding_count - 1];
if (binding->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
layout_size = binding->offset +
ALIGN(variable_count, 4 * A6XX_TEX_CONST_DWORDS);
align(variable_count, 4 * A6XX_TEX_CONST_DWORDS);
} else {
uint32_t stride = binding->size;
layout_size = binding->offset + variable_count * stride;

View file

@ -683,7 +683,7 @@ tu6_emit_link_map(struct tu_cs *cs,
enum a6xx_state_block sb)
{
const struct ir3_const_state *const_state = ir3_const_state(consumer);
uint32_t size = ALIGN(consumer->input_size, 4);
uint32_t size = align(consumer->input_size, 4);
if (size == 0)
return;

View file

@ -2289,9 +2289,9 @@ tu_setup_pvtmem(struct tu_device *dev,
tu_bo_finish(dev, pvtmem_bo->bo);
pvtmem_bo->per_fiber_size =
util_next_power_of_two(ALIGN(pvtmem_bytes, 512));
util_next_power_of_two(align(pvtmem_bytes, 512));
pvtmem_bo->per_sp_size =
ALIGN(pvtmem_bo->per_fiber_size *
align(pvtmem_bo->per_fiber_size *
dev->physical_device->info->fibers_per_sp,
1 << 12);
uint32_t total_size =
@ -2729,7 +2729,7 @@ tu_shader_create(struct tu_device *dev,
* is allocated at the API level, and it's up to the user to ensure
* that accesses are limited to those bounds.
*/
const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
const unsigned shared_size = align(nir->info.shared_size, chunk_size);
NIR_PASS(_, nir, nir_zero_initialize_shared_memory, shared_size,
chunk_size);
}

View file

@ -55,7 +55,7 @@ tu_suballoc_bo_alloc(struct tu_suballoc_bo *suballoc_bo,
{
struct tu_bo *bo = suballoc->bo;
if (bo) {
uint32_t offset = ALIGN(suballoc->next_offset, alignment);
uint32_t offset = align(suballoc->next_offset, alignment);
if (offset + size <= bo->size) {
suballoc_bo->bo = tu_bo_get_ref(bo);
suballoc_bo->iova = bo->iova + offset;

View file

@ -6073,7 +6073,7 @@ void lp_build_nir_soa_func(struct gallivm_state *gallivm,
bld.shader = shader;
bld.scratch_size = ALIGN(shader->scratch_size, 8);
bld.scratch_size = align(shader->scratch_size, 8);
if (params->scratch_ptr)
bld.scratch_ptr = params->scratch_ptr;
else if (shader->scratch_size) {

View file

@ -872,7 +872,7 @@ submit_batch(struct crocus_batch *batch)
.buffer_count = batch->exec_count,
.batch_start_offset = 0,
/* This must be QWord aligned. */
.batch_len = ALIGN(batch->primary_batch_size, 8),
.batch_len = align(batch->primary_batch_size, 8),
.flags = I915_EXEC_RENDER |
I915_EXEC_NO_RELOC |
I915_EXEC_BATCH_FIRST |

View file

@ -56,11 +56,11 @@ stream_state(struct crocus_batch *batch,
uint32_t *out_offset,
struct crocus_bo **out_bo)
{
uint32_t offset = ALIGN(batch->state.used, alignment);
uint32_t offset = align(batch->state.used, alignment);
if (offset + size >= STATE_SZ && !batch->no_wrap) {
crocus_batch_flush(batch);
offset = ALIGN(batch->state.used, alignment);
offset = align(batch->state.used, alignment);
} else if (offset + size >= batch->state.bo->size) {
const unsigned new_size =
MIN2(batch->state.bo->size + batch->state.bo->size / 2,

View file

@ -62,7 +62,7 @@ crocus_init_identifier_bo(struct crocus_context *ice)
return false;
ice->workaround_bo->kflags |= EXEC_OBJECT_CAPTURE;
ice->workaround_offset = ALIGN(
ice->workaround_offset = align(
intel_debug_write_identifiers(bo_map, 4096, "Crocus"), 32);
crocus_bo_unmap(ice->workaround_bo);

View file

@ -179,7 +179,7 @@ crocus_alloc_item_data(struct crocus_context *ice, uint32_t size)
uint32_t offset = ice->shaders.cache_next_offset;
/* Programs are always 64-byte aligned, so set up the next one now */
ice->shaders.cache_next_offset = ALIGN(offset + size, 64);
ice->shaders.cache_next_offset = align(offset + size, 64);
return offset;
}

View file

@ -229,7 +229,7 @@ crocus_resource_configure_main(const struct crocus_screen *screen,
devinfo->ver < 6) {
/* align row pitch to 4 so we can keep using BLT engine */
row_pitch_B = util_format_get_stride(templ->format, templ->width0);
row_pitch_B = ALIGN(row_pitch_B, 4);
row_pitch_B = align(row_pitch_B, 4);
}
const struct isl_surf_init_info init_info = {
@ -1437,7 +1437,7 @@ crocus_map_tiled_memcpy(struct crocus_transfer *map)
struct crocus_resource *res = (struct crocus_resource *) xfer->resource;
struct isl_surf *surf = &res->surf;
xfer->stride = ALIGN(surf->row_pitch_B, 16);
xfer->stride = align(surf->row_pitch_B, 16);
xfer->layer_stride = xfer->stride * box->height;
unsigned x1, x2, y1, y2;

View file

@ -329,11 +329,11 @@ stream_state(struct crocus_batch *batch,
unsigned alignment,
uint32_t *out_offset)
{
uint32_t offset = ALIGN(batch->state.used, alignment);
uint32_t offset = align(batch->state.used, alignment);
if (offset + size >= STATE_SZ && !batch->no_wrap) {
crocus_batch_flush(batch);
offset = ALIGN(batch->state.used, alignment);
offset = align(batch->state.used, alignment);
} else if (offset + size >= batch->state.bo->size) {
const unsigned new_size =
MIN2(batch->state.bo->size + batch->state.bo->size / 2,
@ -6591,7 +6591,7 @@ crocus_upload_dirty_render_state(struct crocus_context *ice,
struct crocus_resource *res = (void *) tgt->base.buffer;
uint32_t start = tgt->base.buffer_offset;
#if GFX_VER < 8
uint32_t end = ALIGN(start + tgt->base.buffer_size, 4);
uint32_t end = align(start + tgt->base.buffer_size, 4);
#endif
crocus_emit_cmd(batch, GENX(3DSTATE_SO_BUFFER), sob) {
sob.SOBufferIndex = i;
@ -8143,7 +8143,7 @@ crocus_upload_compute_state(struct crocus_context *ice,
vfe.URBEntryAllocationSize = GFX_VER == 8 ? 2 : 0;
vfe.CURBEAllocationSize =
ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads +
align(cs_prog_data->push.per_thread.regs * dispatch.threads +
cs_prog_data->push.cross_thread.regs, 2);
}
}
@ -8159,15 +8159,15 @@ crocus_upload_compute_state(struct crocus_context *ice,
elk_cs_push_const_total_size(cs_prog_data, dispatch.threads);
uint32_t *curbe_data_map =
stream_state(batch,
ALIGN(push_const_size, 64), 64,
align(push_const_size, 64), 64,
&curbe_data_offset);
assert(curbe_data_map);
memset(curbe_data_map, 0x5a, ALIGN(push_const_size, 64));
memset(curbe_data_map, 0x5a, align(push_const_size, 64));
crocus_fill_cs_push_const_buffer(cs_prog_data, dispatch.threads,
curbe_data_map);
crocus_emit_cmd(batch, GENX(MEDIA_CURBE_LOAD), curbe) {
curbe.CURBETotalDataLength = ALIGN(push_const_size, 64);
curbe.CURBETotalDataLength = align(push_const_size, 64);
curbe.CURBEDataStartAddress = curbe_data_offset;
}
}

View file

@ -1199,11 +1199,11 @@ fill_buffer_location(struct d3d12_context *ctx,
buf_loc.PlacedFootprint.Footprint.Height = res->base.b.height0;
buf_loc.PlacedFootprint.Footprint.Depth = res->base.b.depth0;
} else {
buf_loc.PlacedFootprint.Footprint.Width = ALIGN(trans->base.b.box.width,
buf_loc.PlacedFootprint.Footprint.Width = align(trans->base.b.box.width,
util_format_get_blockwidth(res->base.b.format));
buf_loc.PlacedFootprint.Footprint.Height = ALIGN(trans->base.b.box.height,
buf_loc.PlacedFootprint.Footprint.Height = align(trans->base.b.box.height,
util_format_get_blockheight(res->base.b.format));
buf_loc.PlacedFootprint.Footprint.Depth = ALIGN(depth,
buf_loc.PlacedFootprint.Footprint.Depth = align(depth,
util_format_get_blockdepth(res->base.b.format));
}

View file

@ -479,8 +479,8 @@ d3d12_video_bitstream_builder_hevc::build_sps(const HevcVideoParameterSet& paren
viewport.Width = crop_window_upper_layer.front /* passes width */ - ((crop_window_upper_layer.left + crop_window_upper_layer.right) * SubWidthC);
viewport.Height = crop_window_upper_layer.back /* passes height */- ((crop_window_upper_layer.top + crop_window_upper_layer.bottom) * SubHeightC);
m_latest_sps.pic_width_in_luma_samples = ALIGN(encodeResolution.Width, picDimensionMultipleRequirement);
m_latest_sps.pic_height_in_luma_samples = ALIGN(encodeResolution.Height, picDimensionMultipleRequirement);
m_latest_sps.pic_width_in_luma_samples = align(encodeResolution.Width, picDimensionMultipleRequirement);
m_latest_sps.pic_height_in_luma_samples = align(encodeResolution.Height, picDimensionMultipleRequirement);
m_latest_sps.conf_win_right_offset = (m_latest_sps.pic_width_in_luma_samples - viewport.Width) / SubWidthC;
m_latest_sps.conf_win_bottom_offset = (m_latest_sps.pic_height_in_luma_samples - viewport.Height) / SubHeightC;

View file

@ -90,7 +90,7 @@ emit_strides(
if (tensor->layout == ETHOSU_LAYOUT_NHCWB16) {
tensor_x = 16 * elem_size;
tensor_c = tensor_x * tensor->shape.width;
tensor_y = elem_size * tensor->shape.width * ALIGN(tensor->shape.depth, 16);
tensor_y = elem_size * tensor->shape.width * align(tensor->shape.depth, 16);
} else {
tensor_c = elem_size;
tensor_x = tensor->shape.depth * tensor_c;

View file

@ -16,7 +16,7 @@ fill_scale_and_biases(struct ethosu_subgraph *subgraph, struct ethosu_operation
PIPE_MAP_READ, &transfer_in);
unsigned idx = 0;
*scales_size = ALIGN(operation->ofm.shape.depth * 10, 16);
*scales_size = align(operation->ofm.shape.depth * 10, 16);
*scales = malloc(*scales_size);
memset(*scales, 0, *scales_size);

View file

@ -228,7 +228,7 @@ ethosu_lower_concatenation(struct ethosu_subgraph *subgraph,
if (tensor->layout == ETHOSU_LAYOUT_NHWC)
operation->ofm.tiles.addresses[0] += poperation->input_tensors[i]->dims[3];
else if (tensor->layout == ETHOSU_LAYOUT_NHCWB16)
operation->ofm.tiles.addresses[0] += poperation->input_tensors[i]->dims[2] * ALIGN(poperation->input_tensors[i]->dims[3], 16);
operation->ofm.tiles.addresses[0] += poperation->input_tensors[i]->dims[2] * align(poperation->input_tensors[i]->dims[3], 16);
else
assert(0 && "Unsupported layout");
}

View file

@ -69,7 +69,7 @@ ethosu_allocate_feature_map(struct ethosu_subgraph *subgraph, struct ethosu_feat
if (tensor->layout == ETHOSU_LAYOUT_NHWC) {
size = tensor->shape.width * tensor->shape.height * tensor->shape.depth;
} else if (tensor->layout == ETHOSU_LAYOUT_NHCWB16) {
size = tensor->shape.width * tensor->shape.height * ALIGN(tensor->shape.depth, 16);
size = tensor->shape.width * tensor->shape.height * align(tensor->shape.depth, 16);
} else {
assert(0 && "Unsupported layout");
size = 0; // This should never happen

View file

@ -18,11 +18,11 @@ _get_ifm_blocksize(struct ethosu_operation *operation, struct ethosu_block ofm_b
// IFM block height
int h = required_input_size(ofm_block.height, operation->kernel.stride_y, MIN2(operation->kernel.height, SUB_KERNEL_MAX.height));
h = ALIGN(h, OFM_UBLOCK.height);
h = align(h, OFM_UBLOCK.height);
// IFM block width
int w = required_input_size(ofm_block.width, operation->kernel.stride_x, MIN2(operation->kernel.width, SUB_KERNEL_MAX.width));
w = ALIGN(w, OFM_UBLOCK.width);
w = align(w, OFM_UBLOCK.width);
ifm_block.height = h;
ifm_block.width = w;
@ -34,8 +34,8 @@ _get_ifm_blocksize(struct ethosu_operation *operation, struct ethosu_block ofm_b
static bool
try_block_config(struct ethosu_operation *operation, struct ethosu_block ofm_block, struct ethosu_block ifm_block, struct ethosu_shram_layout *layout)
{
int ifm_bytes = ifm_block.width * ifm_block.height * ALIGN(ifm_block.depth, 8);
int ifm_banks = ALIGN(DIV_ROUND_UP(ifm_bytes, BANK_SIZE_BYTES) * 2, IFM_GRANULE);
int ifm_bytes = ifm_block.width * ifm_block.height * align(ifm_block.depth, 8);
int ifm_banks = align(DIV_ROUND_UP(ifm_bytes, BANK_SIZE_BYTES) * 2, IFM_GRANULE);
int lut_bytes = operation->type == ETHOSU_OPERATION_TYPE_ELTWISE ? operation->eltwise.lut_bytes : 0;
int lut_banks = MAX2(DIV_ROUND_UP(lut_bytes, 1024), SHRAM_RESERVED_END_BANKS);
int lut_start = SHRAM_TOTAL_BANKS - lut_banks;
@ -44,8 +44,8 @@ try_block_config(struct ethosu_operation *operation, struct ethosu_block ofm_blo
int acc_start = lut_start;
if (operation->type != ETHOSU_OPERATION_TYPE_ELTWISE) {
int acc_bytes = (ofm_block.width * ofm_block.height * ALIGN(ofm_block.depth, 8) * 32) / 8;
int acc_banks = ALIGN(DIV_ROUND_UP(acc_bytes, BANK_SIZE_BYTES) * 2, ACC_GRANULE);
int acc_bytes = (ofm_block.width * ofm_block.height * align(ofm_block.depth, 8) * 32) / 8;
int acc_banks = align(DIV_ROUND_UP(acc_bytes, BANK_SIZE_BYTES) * 2, ACC_GRANULE);
acc_start -= acc_banks;
} else {
int ifm2_banks = ifm_banks; /* TODO: Fix for scalar eltwise */
@ -89,12 +89,12 @@ find_block_config(struct ethosu_operation *operation)
unsigned depth = MAX2(OFM_UBLOCK.depth, MIN2(search_space.depth, ARCH_SPLIT_DEPTH));
if (depth < operation->ofm.shape.depth) {
depth = ALIGN(depth, ARCH_SPLIT_DEPTH);
depth = align(depth, ARCH_SPLIT_DEPTH);
}
search_space.width = ALIGN(search_space.width, OFM_UBLOCK.width);
search_space.height = ALIGN(search_space.height, OFM_UBLOCK.height);
search_space.depth = ALIGN(search_space.depth, OFM_UBLOCK.depth);
search_space.width = align(search_space.width, OFM_UBLOCK.width);
search_space.height = align(search_space.height, OFM_UBLOCK.height);
search_space.depth = align(search_space.depth, OFM_UBLOCK.depth);
while (depth <= search_space.depth) {
bool wont_fit[search_space.height + 1][search_space.width + 1];
@ -110,7 +110,7 @@ find_block_config(struct ethosu_operation *operation)
struct ethosu_block ifm_block = _get_ifm_blocksize(operation, ofm_block);
if (!is_equal_depth)
ifm_block.depth = ALIGN(MIN2(operation->ifm.shape.depth, operation->conv.part_kernel_first ? 16 : 32), IFM_UBLOCK.depth);
ifm_block.depth = align(MIN2(operation->ifm.shape.depth, operation->conv.part_kernel_first ? 16 : 32), IFM_UBLOCK.depth);
// Try to fit the blocks in SHRAM
struct ethosu_shram_layout layout = {0};
@ -179,7 +179,7 @@ find_block_config(struct ethosu_operation *operation)
depth += OFM_UBLOCK.depth;
if (depth < operation->ofm.shape.depth) {
depth = ALIGN(depth, ARCH_SPLIT_DEPTH);
depth = align(depth, ARCH_SPLIT_DEPTH);
}
}

View file

@ -1167,9 +1167,9 @@ create_nn_config(struct etna_ml_subgraph *subgraph, const struct etna_operation
unsigned in_image_tile_x_size = map->out_image_tile_x_size + weight_width - 1;
unsigned in_image_tile_y_size = map->out_image_tile_y_size + weight_width - 1;
image_cache_size = in_image_tile_x_size * in_image_tile_y_size;
image_cache_size = ALIGN(image_cache_size, 16);
image_cache_size = align(image_cache_size, 16);
image_cache_size *= input_channels;
image_cache_size = ALIGN(image_cache_size, 128);
image_cache_size = align(image_cache_size, 128);
}
ML_DBG("coefficients_size 0x%x (%d) image_size 0x%x (%d)\n", coef_cache_size, coef_cache_size, image_cache_size, image_cache_size);
@ -1182,7 +1182,7 @@ create_nn_config(struct etna_ml_subgraph *subgraph, const struct etna_operation
map->kernel_pattern_msb = 0x0;
map->kernel_pattern_low = 0x0;
map->kernel_pattern_high = 0x0;
map->kernel_cache_end_address = MAX2(MIN2(ALIGN(map->kernel_cache_start_address + coef_cache_size, 128), oc_sram_size), 0xa00);
map->kernel_cache_end_address = MAX2(MIN2(align(map->kernel_cache_start_address + coef_cache_size, 128), oc_sram_size), 0xa00);
} else {
/* Doesn't fit in the 512KB we have of on-chip SRAM */
map->kernel_caching_mode = SRAM_CACHE_MODE_PARTIAL_CACHE;

View file

@ -441,7 +441,7 @@ calculate_weight_bo_size(struct etna_ml_subgraph *subgraph, const struct etna_op
struct pipe_context *context = subgraph->base.context;
struct etna_context *ctx = etna_context(context);
unsigned nn_core_count = etna_ml_get_core_info(ctx)->nn_core_count;
unsigned header_size = ALIGN(nn_core_count * 4, 64);
unsigned header_size = align(nn_core_count * 4, 64);
unsigned input_channels = operation->addition ? 1 : operation->input_channels;
unsigned output_channels = operation->addition ? 1 : operation->output_channels;
unsigned cores_used = MIN2(output_channels, nn_core_count);
@ -453,7 +453,7 @@ calculate_weight_bo_size(struct etna_ml_subgraph *subgraph, const struct etna_op
weights_size = operation->weight_width * operation->weight_height * input_channels;
core_size = 1 + 2 + (weights_size + 4 + 4) * kernels_per_core;
core_size_aligned = ALIGN(core_size, 64);
core_size_aligned = align(core_size, 64);
compressed_size_aligned = header_size + core_size_aligned * cores_used;
return compressed_size_aligned;
@ -466,7 +466,7 @@ calculate_zrl_bits(struct etna_ml_subgraph *subgraph, const struct etna_operatio
struct etna_context *ctx = etna_context(context);
unsigned nn_core_count = etna_ml_get_core_info(ctx)->nn_core_count;
unsigned max_zrl_bits = etna_ml_get_core_info(ctx)->nn_zrl_bits;
unsigned header_size = ALIGN(nn_core_count * 4, 64);
unsigned header_size = align(nn_core_count * 4, 64);
unsigned input_channels = operation->addition ? 1 : operation->input_channels;
unsigned output_channels = operation->addition ? 1 : operation->output_channels;
unsigned cores_used = MIN2(output_channels, nn_core_count);
@ -518,7 +518,7 @@ etna_ml_create_coeffs_v7(struct etna_ml_subgraph *subgraph, const struct etna_op
struct pipe_context *context = subgraph->base.context;
struct etna_context *ctx = etna_context(context);
unsigned nn_core_count = etna_ml_get_core_info(ctx)->nn_core_count;
unsigned header_size = ALIGN(nn_core_count * 4, 64);
unsigned header_size = align(nn_core_count * 4, 64);
unsigned input_channels = operation->addition ? 1 : operation->input_channels;
unsigned output_channels = operation->addition ? 1 : operation->output_channels;
unsigned cores_used = MIN2(output_channels, nn_core_count);
@ -548,7 +548,7 @@ etna_ml_create_coeffs_v7(struct etna_ml_subgraph *subgraph, const struct etna_op
else
actual_size = write_core_sequential(subgraph, map, core, operation, zrl_bits);
actual_size = ALIGN(actual_size, 64);
actual_size = align(actual_size, 64);
max_core_size = MAX2(actual_size, max_core_size);
header[core] = actual_size;

View file

@ -685,7 +685,7 @@ create_bo(struct etna_ml_subgraph *subgraph, const struct etna_operation *operat
input_channels = 2 * output_channels;
unsigned header_size = 64;
unsigned body_size = ALIGN(DIV_ROUND_UP(output_channels, cores_used) * (input_channels * operation->weight_height * operation->weight_width + 4 + 4), 64) * 2;
unsigned body_size = align(DIV_ROUND_UP(output_channels, cores_used) * (input_channels * operation->weight_height * operation->weight_width + 4 + 4), 64) * 2;
unsigned tail_size = 64;
max_size = header_size + cores_used * body_size + tail_size;
@ -803,7 +803,7 @@ etna_ml_create_coeffs_v8(struct etna_ml_subgraph *subgraph, const struct etna_op
header->symbol_map = pack_symbol_map(symbol_map);
header->version = 1;
map += ALIGN(sizeof(*header), 64) / 4;
map += align(sizeof(*header), 64) / 4;
encoder_init(&encoder, symbol_map, map);

View file

@ -553,7 +553,7 @@ fd6_emit_link_map(struct fd_context *ctx, fd_cs &cs,
if (CHIP == A7XX && producer->compiler->load_shader_consts_via_preamble) {
const struct ir3_const_state *const_state = ir3_const_state(consumer);
int base = const_state->primitive_map_ubo.idx;
uint32_t size = ALIGN(consumer->input_size, 4);
uint32_t size = align(consumer->input_size, 4);
fd6_upload_emit_driver_ubo(ctx, cs, consumer, base, size, producer->output_loc);
} else {

View file

@ -1366,8 +1366,8 @@ set_blit_scissor(struct fd_batch *batch, fd_cs &cs)
blit_scissor.minx = 0;
blit_scissor.miny = 0;
blit_scissor.maxx = ALIGN(pfb->width, 16);
blit_scissor.maxy = ALIGN(pfb->height, 4);
blit_scissor.maxx = align(pfb->width, 16);
blit_scissor.maxy = align(pfb->height, 4);
fd_pkt4(cs, 2)
.add(A6XX_RB_RESOLVE_CNTL_1(.x = blit_scissor.minx, .y = blit_scissor.miny))

View file

@ -647,7 +647,7 @@ ir3_get_private_mem(struct fd_context *ctx, const struct ir3_shader_variant *so)
if (ctx->pvtmem[so->pvtmem_per_wave].bo)
fd_bo_del(ctx->pvtmem[so->pvtmem_per_wave].bo);
uint32_t per_sp_size = ALIGN(per_fiber_size * fibers_per_sp, 1 << 12);
uint32_t per_sp_size = align(per_fiber_size * fibers_per_sp, 1 << 12);
uint32_t total_size = per_sp_size * num_sp_cores;
ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size = per_fiber_size;

View file

@ -372,7 +372,7 @@ i915_batch_submit(struct iris_batch *batch)
.buffer_count = validation_count,
.batch_start_offset = 0,
/* This must be QWord aligned. */
.batch_len = ALIGN(batch->primary_batch_size, 8),
.batch_len = align(batch->primary_batch_size, 8),
.flags = batch->i915.exec_flags |
I915_EXEC_NO_RELOC |
I915_EXEC_BATCH_FIRST |

View file

@ -2305,7 +2305,7 @@ intel_aux_map_buffer_alloc(void *driver_ctx, uint32_t size)
struct iris_bufmgr *bufmgr = (struct iris_bufmgr *)driver_ctx;
unsigned int page_size = getpagesize();
size = MAX2(ALIGN(size, page_size), page_size);
size = MAX2(align(size, page_size), page_size);
struct iris_bo *bo = alloc_fresh_bo(bufmgr, size, BO_ALLOC_CAPTURE);
if (!bo) {

View file

@ -398,7 +398,7 @@ emit_indirect_generate_draw(struct iris_batch *batch,
float *vertices =
upload_state(batch, ice->state.dynamic_uploader,
&ice->draw.generation.vertices,
ALIGN(9 * sizeof(float), 8), 8);
align(9 * sizeof(float), 8), 8);
vertices[0] = x1; vertices[1] = y1; vertices[2] = z; /* v0 */
vertices[3] = x0; vertices[4] = y1; vertices[5] = z; /* v1 */

View file

@ -2254,7 +2254,7 @@ iris_map_tiled_memcpy(struct iris_transfer *map)
struct iris_resource *res = (struct iris_resource *) xfer->resource;
struct isl_surf *surf = &res->surf;
xfer->stride = ALIGN(surf->row_pitch_B, 16);
xfer->stride = align(surf->row_pitch_B, 16);
xfer->layer_stride = xfer->stride * box->height;
unsigned x1, x2, y1, y2;

View file

@ -617,7 +617,7 @@ iris_init_identifier_bo(struct iris_screen *screen)
screen->workaround_address = (struct iris_address) {
.bo = screen->workaround_bo,
.offset = ALIGN(
.offset = align(
intel_debug_write_identifiers(bo_map, 4096, "Iris"), 32),
};

View file

@ -9385,7 +9385,7 @@ iris_upload_gpgpu_walker(struct iris_context *ice,
vfe.URBEntryAllocationSize = 2;
vfe.CURBEAllocationSize =
ALIGN(cs_data->push.per_thread.regs * dispatch.threads +
align(cs_data->push.per_thread.regs * dispatch.threads +
cs_data->push.cross_thread.regs, 2);
}
}
@ -9403,15 +9403,15 @@ iris_upload_gpgpu_walker(struct iris_context *ice,
uint32_t *curbe_data_map =
stream_state(batch, ice->state.dynamic_uploader,
&ice->state.last_res.cs_thread_ids,
ALIGN(push_const_size, 64), 64,
align(push_const_size, 64), 64,
&curbe_data_offset);
assert(curbe_data_map);
memset(curbe_data_map, 0x5a, ALIGN(push_const_size, 64));
memset(curbe_data_map, 0x5a, align(push_const_size, 64));
iris_fill_cs_push_const_buffer(screen, shader, dispatch.threads,
curbe_data_map);
iris_emit_cmd(batch, GENX(MEDIA_CURBE_LOAD), curbe) {
curbe.CURBETotalDataLength = ALIGN(push_const_size, 64);
curbe.CURBETotalDataLength = align(push_const_size, 64);
curbe.CURBEDataStartAddress = curbe_data_offset;
}
}

View file

@ -63,7 +63,7 @@ lima_ioctl_gem_create(int fd, unsigned long request, void *arg)
struct shim_fd *shim_fd = drm_shim_fd_lookup(fd);
struct shim_bo *bo = calloc(1, sizeof(*bo));
size_t size = ALIGN(create->size, 4096);
size_t size = align(create->size, 4096);
drm_shim_bo_init(bo, size);

View file

@ -1343,7 +1343,7 @@ out:
info_out->bin.maxGPR = prog->maxGPR;
info_out->bin.code = prog->code;
info_out->bin.codeSize = prog->binSize;
info_out->bin.tlsSpace = ALIGN(prog->tlsSize, 0x10);
info_out->bin.tlsSpace = align(prog->tlsSize, 0x10);
delete prog;
nv50_ir::Target::destroy(targ);

View file

@ -389,7 +389,7 @@ nv50_compute_validate_surfaces(struct nv50_context *nv50)
PUSH_DATAh(push, res->address + buffer->buffer_offset);
PUSH_DATA (push, res->address + buffer->buffer_offset);
PUSH_DATA (push, 0); /* pitch? */
PUSH_DATA (push, ALIGN(buffer->buffer_size, 256) - 1);
PUSH_DATA (push, align(buffer->buffer_size, 256) - 1);
PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
BCTX_REFN(nv50->bufctx_cp, CP_BUF, res, RDWR);
util_range_add(&res->base, &res->valid_buffer_range,
@ -420,7 +420,7 @@ nv50_compute_validate_surfaces(struct nv50_context *nv50)
PUSH_DATAh(push, address);
PUSH_DATA (push, address);
PUSH_DATA (push, 0); /* pitch? */
PUSH_DATA (push, ALIGN(view->u.buf.size, 0x100) - 1);
PUSH_DATA (push, align(view->u.buf.size, 0x100) - 1);
PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
} else {
struct nv50_miptree *mt = nv50_miptree(view->resource);

View file

@ -391,7 +391,7 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir,
shader->info.nr_param_exports || shader->info.nr_prim_param_exports,
&out_needs_scratch_ring,
shader->wave_size,
ALIGN(max_workgroup_size, shader->wave_size),
align(max_workgroup_size, shader->wave_size),
false,
false);
shader->info.uses_mesh_scratch_ring = out_needs_scratch_ring;
@ -923,7 +923,7 @@ static void run_late_optimization_and_lowering_passes(struct si_nir_shader_ctx *
if (si_should_clear_lds(sel->screen, nir)) {
const unsigned chunk_size = 16; /* max single store size */
const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
const unsigned shared_size = align(nir->info.shared_size, chunk_size);
NIR_PASS(_, nir, nir_clear_shared_memory, shared_size, chunk_size);
}

View file

@ -472,7 +472,7 @@ void si_shader_dump_stats_for_shader_db(struct si_screen *screen, struct si_shad
"HSPatchOuts: %u ESOutputs: %u GSOutputs: %u VSOutputs: %u PSOutputs: %u "
"InlineUniforms: %u DivergentLoop: %u (%s, W%u)",
conf->num_sgprs, conf->num_vgprs, si_get_shader_binary_size(screen, shader),
ALIGN(conf->lds_size, ac_shader_get_lds_alloc_granularity(screen->info.gfx_level)),
align(conf->lds_size, ac_shader_get_lds_alloc_granularity(screen->info.gfx_level)),
conf->scratch_bytes_per_wave, shader->info.max_simd_waves,
conf->spilled_sgprs, conf->spilled_vgprs, shader->info.private_mem_vgprs,
num_ls_outputs, num_hs_outputs,

View file

@ -769,7 +769,7 @@ si_sqtt_add_code_object(struct si_context *sctx,
record->shader_data[i].hw_stage = hw_stage;
record->shader_data[i].is_combined = false;
record->shader_data[i].scratch_memory_size = shader->config.scratch_bytes_per_wave;
record->shader_data[i].lds_size = ALIGN(shader->config.lds_size, ac_shader_get_lds_alloc_granularity(sctx->gfx_level));
record->shader_data[i].lds_size = align(shader->config.lds_size, ac_shader_get_lds_alloc_granularity(sctx->gfx_level));
record->shader_data[i].wavefront_size = shader->wave_size;
record->shader_stages_mask |= 1 << i;

View file

@ -31,12 +31,12 @@ rkt_fill_weights(struct rkt_ml_subgraph *subgraph,
input_channels = MAX2(input_channels, FEATURE_ATOMIC_SIZE);
output_channels = ALIGN(output_channels, 2);
output_channels = align(output_channels, 2);
if (rkt_is_depthwise(poperation))
output_channels = 1;
weights_size = weights_width * weights_height * output_channels *
ALIGN(input_channels, WEIGHT_ATOMIC_SIZE) * 2;
align(input_channels, WEIGHT_ATOMIC_SIZE) * 2;
rsc =
pipe_buffer_create(pcontext->screen, 0, PIPE_USAGE_DEFAULT, weights_size);
@ -62,7 +62,7 @@ rkt_fill_weights(struct rkt_ml_subgraph *subgraph,
unsigned oc = oc1 * WEIGHT_ATOMIC_SIZE + oc2;
unsigned ic = ic1 * input_channel_groups + ic2;
if (output_channels_real > 2 &&
oc >= ALIGN(output_channels_real, 2))
oc >= align(output_channels_real, 2))
continue;
if (oc >= output_channels_real)

View file

@ -105,7 +105,7 @@ compile_operation(struct rkt_ml_subgraph *subgraph,
unsigned size =
util_dynarray_num_elements(&regcfgs[i], uint64_t) * sizeof(uint64_t);
regcfg_total_size += ALIGN(size, 64);
regcfg_total_size += align(size, 64);
}
operation->regcmd = pipe_buffer_create(pcontext->screen, 0,
@ -129,13 +129,13 @@ compile_operation(struct rkt_ml_subgraph *subgraph,
util_dynarray_element(&regcfgs[i], uint64_t, reg_count - 3);
uint64_t addr = rkt_resource(operation->regcmd)->phys_addr +
regcmd_offset + ALIGN(size * sizeof(uint64_t), 64);
regcmd_offset + align(size * sizeof(uint64_t), 64);
*next_address_reg |= addr << 16;
unsigned regs_to_fetch =
util_dynarray_num_elements(&regcfgs[i + 1], uint64_t);
regs_to_fetch -= 4;
regs_to_fetch = ALIGN(regs_to_fetch / 2, 2);
regs_to_fetch = align(regs_to_fetch / 2, 2);
*reg_count_reg |= regs_to_fetch << 16;
}
@ -151,7 +151,7 @@ compile_operation(struct rkt_ml_subgraph *subgraph,
rkt_dump_buffer(regcmd, "regcmd", 0, i, regcmd_offset,
(size + 4) * sizeof(uint64_t));
regcmd_offset += ALIGN(size * sizeof(uint64_t), 64);
regcmd_offset += align(size * sizeof(uint64_t), 64);
}
pipe_buffer_unmap(pcontext, transfer);

View file

@ -101,7 +101,7 @@ fill_task(struct rkt_ml_subgraph *subgraph,
task->input_height = operation->input_height;
task->input_channels =
ALIGN(MAX2(operation->input_channels, FEATURE_ATOMIC_SIZE),
align(MAX2(operation->input_channels, FEATURE_ATOMIC_SIZE),
FEATURE_ATOMIC_SIZE);
task->input_channels_real = operation->input_channels;
task->input_zero_point = operation->input_zero_point;
@ -111,11 +111,11 @@ fill_task(struct rkt_ml_subgraph *subgraph,
task->output_height = operation->output_height;
task->output_channels_real = operation->output_channels;
task->output_channels = ALIGN(MAX2(operation->output_channels, 32), 32);
task->output_channels = align(MAX2(operation->output_channels, 32), 32);
if (operation->depthwise) {
if (task->output_channels_real <= 32)
task->output_channels *= 2;
task->output_channels = ALIGN(task->output_channels, 64);
task->output_channels = align(task->output_channels, 64);
}
task->output_zero_point = operation->output_zero_point;
@ -170,7 +170,7 @@ fill_task(struct rkt_ml_subgraph *subgraph,
if (operation->depthwise)
task->weights_kernels = 1;
else
task->weights_kernels = ALIGN(operation->output_channels, 2);
task->weights_kernels = align(operation->output_channels, 2);
task->surfaces_per_row = task->output_width * task->output_height * 2;
if (operation->depthwise)

View file

@ -201,8 +201,8 @@ virgl_drm_winsys_resource_create_blob(struct virgl_winsys *qws,
return NULL;
/* Make sure blob is page aligned. */
width = ALIGN(width, getpagesize());
size = ALIGN(size, getpagesize());
width = align(width, getpagesize());
size = align(size, getpagesize());
blob_id = p_atomic_inc_return(&qdws->blob_id);
cmd[0] = VIRGL_CMD0(VIRGL_CCMD_PIPE_RESOURCE_CREATE, 0, VIRGL_PIPE_RES_CREATE_SIZE);

View file

@ -311,8 +311,8 @@ virgl_vtest_winsys_resource_create(struct virgl_winsys *vws,
if ((flags & (VIRGL_RESOURCE_FLAG_MAP_PERSISTENT |
VIRGL_RESOURCE_FLAG_MAP_COHERENT))) {
width = ALIGN(width, getpagesize());
size = ALIGN(size, getpagesize());
width = align(width, getpagesize());
size = align(size, getpagesize());
new_handle = virgl_vtest_winsys_resource_create_blob(vws, target, format, bind,
width, height, depth,
array_size, last_level, nr_samples,

View file

@ -89,7 +89,7 @@ static void pvr_image_init_physical_extent(struct pvr_image *image)
if (image->vk.usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
VK_IMAGE_USAGE_TRANSFER_DST_BIT)) {
image->physical_extent.width =
ALIGN(image->physical_extent.width,
align(image->physical_extent.width,
ROGUE_PBESTATE_REG_WORD0_LINESTRIDE_UNIT_SIZE);
}
}
@ -110,11 +110,11 @@ static void pvr_image_setup_mip_levels(struct pvr_image *image)
for (uint32_t i = 0; i < image->vk.mip_levels; i++) {
struct pvr_mip_level *mip_level = &image->mip_levels[i];
mip_level->pitch = cpp * ALIGN(extent.width, extent_alignment);
mip_level->height_pitch = ALIGN(extent.height, extent_alignment);
mip_level->pitch = cpp * align(extent.width, extent_alignment);
mip_level->height_pitch = align(extent.height, extent_alignment);
mip_level->size = image->vk.samples * mip_level->pitch *
mip_level->height_pitch *
ALIGN(extent.depth, extent_alignment);
align(extent.depth, extent_alignment);
mip_level->offset = image->layer_size;
image->layer_size += mip_level->size;
@ -129,11 +129,11 @@ static void pvr_image_setup_mip_levels(struct pvr_image *image)
* were present so we need to account for that in the `layer_size`.
*/
while (extent.height != 1 || extent.width != 1 || extent.depth != 1) {
const uint32_t height_pitch = ALIGN(extent.height, extent_alignment);
const uint32_t pitch = cpp * ALIGN(extent.width, extent_alignment);
const uint32_t height_pitch = align(extent.height, extent_alignment);
const uint32_t pitch = cpp * align(extent.width, extent_alignment);
image->layer_size += image->vk.samples * pitch * height_pitch *
ALIGN(extent.depth, extent_alignment);
align(extent.depth, extent_alignment);
extent.height = u_minify(extent.height, 1);
extent.width = u_minify(extent.width, 1);

View file

@ -61,7 +61,7 @@ VkResult pvr_init_robustness_buffer(struct pvr_device *device)
VkResult result;
#define ROBUSTNESS_BUFFER_OFFSET_ALIGN16(cur_offset, add) \
((uint16_t)ALIGN((cur_offset + (uint16_t)(add)), 16))
((uint16_t)align((cur_offset + (uint16_t)(add)), 16))
robustness_buffer_offsets[PVR_ROBUSTNESS_BUFFER_FORMAT_UINT64] = offset;
offset = ROBUSTNESS_BUFFER_OFFSET_ALIGN16(offset, sizeof(uint64_t) * 4);

View file

@ -314,8 +314,8 @@ blorp_hiz_op(struct blorp_batch *batch, struct blorp_surf *surf,
params.depth.view.base_level);
params.y1 = u_minify(params.depth.surf.logical_level0_px.height,
params.depth.view.base_level);
params.x1 = ALIGN(params.x1, 8);
params.y1 = ALIGN(params.y1, 4);
params.x1 = align(params.x1, 8);
params.y1 = align(params.y1, 4);
if (params.depth.view.base_level == 0) {
/* TODO: What about MSAA? */

View file

@ -1731,9 +1731,9 @@ blorp_surf_retile_w_to_y(const struct isl_device *isl_dev,
const unsigned x_align = 8, y_align = info->surf.samples != 0 ? 8 : 4;
info->surf.tiling = ISL_TILING_Y0;
info->surf.logical_level0_px.width =
ALIGN(info->surf.logical_level0_px.width, x_align) * 2;
align(info->surf.logical_level0_px.width, x_align) * 2;
info->surf.logical_level0_px.height =
ALIGN(info->surf.logical_level0_px.height, y_align) / 2;
align(info->surf.logical_level0_px.height, y_align) / 2;
info->tile_x_sa *= 2;
info->tile_y_sa /= 2;
}
@ -1984,8 +1984,8 @@ try_blorp_blit(struct blorp_batch *batch,
isl_get_interleaved_msaa_px_size_sa(params->dst.surf.samples);
params->x0 = ROUND_DOWN_TO(params->x0, 2) * px_size_sa.width;
params->y0 = ROUND_DOWN_TO(params->y0, 2) * px_size_sa.height;
params->x1 = ALIGN(params->x1, 2) * px_size_sa.width;
params->y1 = ALIGN(params->y1, 2) * px_size_sa.height;
params->x1 = align(params->x1, 2) * px_size_sa.width;
params->y1 = align(params->y1, 2) * px_size_sa.height;
blorp_surf_fake_interleaved_msaa(batch->blorp->isl_dev, &params->dst);
@ -2045,8 +2045,8 @@ try_blorp_blit(struct blorp_batch *batch,
const unsigned y_align = params->dst.surf.samples != 0 ? 8 : 4;
params->x0 = ROUND_DOWN_TO(params->x0, x_align) * 2;
params->y0 = ROUND_DOWN_TO(params->y0, y_align) / 2;
params->x1 = ALIGN(params->x1, x_align) * 2;
params->y1 = ALIGN(params->y1, y_align) / 2;
params->x1 = align(params->x1, x_align) * 2;
params->y1 = align(params->y1, y_align) / 2;
/* Retile the surface to Y-tiled */
blorp_surf_retile_w_to_y(batch->blorp->isl_dev, &params->dst);

View file

@ -387,8 +387,8 @@ get_fast_clear_rect(const struct isl_device *dev,
*x0 = ROUND_DOWN_TO(*x0, x_align) / x_scaledown;
*y0 = ROUND_DOWN_TO(*y0, y_align) / y_scaledown;
*x1 = ALIGN(*x1, x_align) / x_scaledown;
*y1 = ALIGN(*y1, y_align) / y_scaledown;
*x1 = align(*x1, x_align) / x_scaledown;
*y1 = align(*y1, y_align) / y_scaledown;
}
static void
@ -541,7 +541,7 @@ blorp_fast_clear(struct blorp_batch *batch,
assert(surf->surf->levels > 1 ||
surf->surf->logical_level0_px.d > 1 ||
surf->surf->logical_level0_px.a > 1);
const int phys_height0 = ALIGN(surf->surf->logical_level0_px.h,
const int phys_height0 = align(surf->surf->logical_level0_px.h,
surf->surf->image_alignment_el.h);
unaligned_height = phys_height0 % 32;
size_B = (int64_t)surf->surf->row_pitch_B * (phys_height0 - unaligned_height);
@ -583,7 +583,7 @@ blorp_fast_clear(struct blorp_batch *batch,
mem_surf.addr.offset, size_B, ISL_TILING_4);
assert(isl_surf.logical_level0_px.h == 32);
assert(isl_surf.logical_level0_px.a == 1);
isl_surf.row_pitch_B = ALIGN(isl_surf.row_pitch_B, 16 * 128);
isl_surf.row_pitch_B = align(isl_surf.row_pitch_B, 16 * 128);
} else {
isl_surf_from_mem(batch->blorp->isl_dev, &isl_surf,
mem_surf.addr.offset, size_B, ISL_TILING_64);
@ -1341,8 +1341,8 @@ blorp_ccs_resolve(struct blorp_batch *batch,
x_scaledown = aux_fmtl->bw / 2;
y_scaledown = aux_fmtl->bh / 2;
}
params.x1 = ALIGN(params.x1, x_scaledown) / x_scaledown;
params.y1 = ALIGN(params.y1, y_scaledown) / y_scaledown;
params.x1 = align(params.x1, x_scaledown) / x_scaledown;
params.y1 = align(params.y1, y_scaledown) / y_scaledown;
}
if (batch->blorp->isl_dev->info->ver >= 10) {

View file

@ -1656,7 +1656,7 @@ blorp_get_compute_push_const(struct blorp_batch *batch,
{
const struct brw_cs_prog_data *cs_prog_data = params->cs_prog_data;
const unsigned push_const_size =
ALIGN(brw_cs_push_const_total_size(cs_prog_data, threads), 64);
align(brw_cs_push_const_total_size(cs_prog_data, threads), 64);
assert(cs_prog_data->push.cross_thread.size +
cs_prog_data->push.per_thread.size == sizeof(params->wm_inputs));
@ -1860,7 +1860,7 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
vfe.URBEntryAllocationSize = 2;
const uint32_t vfe_curbe_allocation =
ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads +
align(cs_prog_data->push.per_thread.regs * dispatch.threads +
cs_prog_data->push.cross_thread.regs, 2);
vfe.CURBEAllocationSize = vfe_curbe_allocation;
}

View file

@ -1882,7 +1882,7 @@ blorp_get_compute_push_const(struct blorp_batch *batch,
{
const struct elk_cs_prog_data *cs_prog_data = params->cs_prog_data;
const unsigned push_const_size =
ALIGN(elk_cs_push_const_total_size(cs_prog_data, threads), 64);
align(elk_cs_push_const_total_size(cs_prog_data, threads), 64);
assert(cs_prog_data->push.cross_thread.size +
cs_prog_data->push.per_thread.size == sizeof(params->wm_inputs));
@ -1981,7 +1981,7 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
vfe.URBEntryAllocationSize = GFX_VER >= 8 ? 2 : 0;
const uint32_t vfe_curbe_allocation =
ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads +
align(cs_prog_data->push.per_thread.regs * dispatch.threads +
cs_prog_data->push.cross_thread.regs, 2);
vfe.CURBEAllocationSize = vfe_curbe_allocation;
}

View file

@ -135,7 +135,7 @@ intel_get_urb_config(const struct intel_device_info *devinfo,
* Round them all up.
*/
for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
min_entries[i] = ALIGN(min_entries[i], granularity[i]);
min_entries[i] = align(min_entries[i], granularity[i]);
}
unsigned entry_size_bytes[4];
@ -323,7 +323,7 @@ intel_get_mesh_urb_config(const struct intel_device_info *devinfo,
* in slices beyond Slice0) of the MESH URB allocation, specified in
* multiples of 8 KB.
*/
push_constant_kb = ALIGN(push_constant_kb, 8);
push_constant_kb = align(push_constant_kb, 8);
total_urb_kb -= push_constant_kb;
const unsigned total_urb_avail_mesh_task_kb = total_urb_kb;
@ -360,9 +360,9 @@ intel_get_mesh_urb_config(const struct intel_device_info *devinfo,
*/
const unsigned min_mesh_entries = urb_cfg->size[MESA_SHADER_MESH] < 9 ? 8 : 1;
const unsigned min_task_entries = urb_cfg->size[MESA_SHADER_TASK] < 9 ? 8 : 1;
const unsigned min_mesh_urb_kb = ALIGN(urb_cfg->size[MESA_SHADER_MESH] *
const unsigned min_mesh_urb_kb = align(urb_cfg->size[MESA_SHADER_MESH] *
min_mesh_entries * 64, 1024) / 1024;
const unsigned min_task_urb_kb = ALIGN(urb_cfg->size[MESA_SHADER_TASK] *
const unsigned min_task_urb_kb = align(urb_cfg->size[MESA_SHADER_TASK] *
min_task_entries * 64, 1024) / 1024;
total_urb_kb -= (min_mesh_urb_kb + min_task_urb_kb);
@ -380,8 +380,8 @@ intel_get_mesh_urb_config(const struct intel_device_info *devinfo,
* in slices beyond Slice0) of the TASK URB allocation, specified in
* multiples of 8 KB.
*/
if ((total_urb_avail_mesh_task_kb - ALIGN(mesh_urb_kb, 8)) >= min_task_entries) {
mesh_urb_kb = ALIGN(mesh_urb_kb, 8);
if ((total_urb_avail_mesh_task_kb - align(mesh_urb_kb, 8)) >= min_task_entries) {
mesh_urb_kb = align(mesh_urb_kb, 8);
} else {
mesh_urb_kb = ROUND_DOWN_TO(mesh_urb_kb, 8);
}

View file

@ -1219,7 +1219,7 @@ remap_attr_reg(brw_shader &s,
const bool per_prim = src.nr < prog_data->num_per_primitive_inputs;
const unsigned base = urb_start +
(per_prim ? 0 :
ALIGN(prog_data->num_per_primitive_inputs / 2,
align(prog_data->num_per_primitive_inputs / 2,
reg_unit(s.devinfo)) * s.max_polygons);
const unsigned idx = per_prim ? src.nr :
src.nr - prog_data->num_per_primitive_inputs;

View file

@ -223,7 +223,7 @@ brw_compile_gs(const struct brw_compiler *compiler,
/* 1 HWORD = 32 bytes = 256 bits */
prog_data->control_data_header_size_hwords =
ALIGN(control_data_header_size_bits, 256) / 256;
align(control_data_header_size_bits, 256) / 256;
/* Compute the output vertex size.
*
@ -276,7 +276,7 @@ brw_compile_gs(const struct brw_compiler *compiler,
unsigned output_vertex_size_bytes = prog_data->base.vue_map.num_slots * 16;
assert(output_vertex_size_bytes <= GFX7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES);
prog_data->output_vertex_size_hwords =
ALIGN(output_vertex_size_bytes, 32) / 32;
align(output_vertex_size_bytes, 32) / 32;
/* Compute URB entry size. The maximum allowed URB entry size is 32k.
* That divides up as follows:
@ -330,7 +330,7 @@ brw_compile_gs(const struct brw_compiler *compiler,
/* URB entry sizes are stored as a multiple of 64 bytes in gfx7+. */
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
prog_data->base.urb_entry_size = align(output_size_bytes, 64) / 64;
assert(nir->info.gs.output_primitive < ARRAY_SIZE(gl_prim_to_hw_prim));
prog_data->output_topology =

View file

@ -124,7 +124,7 @@ brw_nir_lower_tue_outputs(nir_shader *nir, brw_tue_map *map)
NIR_PASS(_, nir, nir_lower_explicit_io,
nir_var_mem_task_payload, nir_address_format_32bit_offset);
map->size_dw = ALIGN(DIV_ROUND_UP(nir->info.task_payload_size, 4), 8);
map->size_dw = align(DIV_ROUND_UP(nir->info.task_payload_size, 4), 8);
}
static void
@ -203,7 +203,7 @@ brw_nir_align_launch_mesh_workgroups_instr(nir_builder *b,
/* This will avoid special case in nir_lower_task_shader dealing with
* not vec4-aligned payload when payload_in_shared workaround is enabled.
*/
nir_intrinsic_set_range(intrin, ALIGN(range, 16));
nir_intrinsic_set_range(intrin, align(range, 16));
return true;
}
@ -505,7 +505,7 @@ brw_nir_lower_tue_inputs(nir_shader *nir, const brw_tue_map *map)
/* The types for Task Output and Mesh Input should match, so their sizes
* should also match.
*/
assert(!map || map->size_dw == ALIGN(DIV_ROUND_UP(nir->info.task_payload_size, 4), 8));
assert(!map || map->size_dw == align(DIV_ROUND_UP(nir->info.task_payload_size, 4), 8));
} else {
/* Mesh doesn't read any input, to make it clearer set the
* task_payload_size to zero instead of keeping an incomplete size that

View file

@ -263,7 +263,7 @@ brw_compile_tcs(const struct brw_compiler *compiler,
return NULL;
/* URB entry sizes are stored as a multiple of 64 bytes. */
vue_prog_data->urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
vue_prog_data->urb_entry_size = align(output_size_bytes, 64) / 64;
/* HS does not use the usual payload pushing from URB to GRFs,
* because we don't have enough registers for a full-size payload, and

View file

@ -133,7 +133,7 @@ brw_compile_tes(const struct brw_compiler *compiler,
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
/* URB entry sizes are stored as a multiple of 64 bytes. */
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
prog_data->base.urb_entry_size = align(output_size_bytes, 64) / 64;
prog_data->base.urb_read_length = 0;

View file

@ -407,7 +407,7 @@ brw_append_insns(struct brw_codegen *p, unsigned nr_insn, unsigned alignment)
assert(util_is_power_of_two_or_zero(sizeof(brw_eu_inst)));
assert(util_is_power_of_two_or_zero(alignment));
const unsigned align_insn = MAX2(alignment / sizeof(brw_eu_inst), 1);
const unsigned start_insn = ALIGN(p->nr_insn, align_insn);
const unsigned start_insn = align(p->nr_insn, align_insn);
const unsigned new_nr_insn = start_insn + nr_insn;
if (p->store_size < new_nr_insn) {

View file

@ -7295,7 +7295,7 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
components = last_component - first_read_component + 1;
}
total = ALIGN(components, REG_SIZE * reg_unit(devinfo) / 4);
total = align(components, REG_SIZE * reg_unit(devinfo) / 4);
dest = ubld.vgrf(BRW_TYPE_UD, total);
} else {
total = components * bld.dispatch_width();
@ -7902,7 +7902,7 @@ brw_from_nir(brw_shader *s)
*/
brw_from_nir_setup_outputs(ntb);
brw_from_nir_emit_system_values(ntb);
ntb.s.last_scratch = ALIGN(ntb.nir->scratch_size, 4) * ntb.s.dispatch_width;
ntb.s.last_scratch = align(ntb.nir->scratch_size, 4) * ntb.s.dispatch_width;
brw_from_nir_emit_impl(ntb, nir_shader_get_entrypoint((nir_shader *)ntb.nir));

View file

@ -527,7 +527,7 @@ brw_flag_mask(const brw_inst *inst, unsigned width)
assert(util_is_power_of_two_nonzero(width));
const unsigned start = (inst->flag_subreg * 16 + inst->group) &
~(width - 1);
const unsigned end = start + ALIGN(inst->exec_size, width);
const unsigned end = start + align(inst->exec_size, width);
return ((1 << DIV_ROUND_UP(end, 8)) - 1) & ~((1 << (start / 8)) - 1);
}

View file

@ -467,7 +467,7 @@ brw_lower_find_live_channel(brw_shader &s)
* specified quarter control as result.
*/
if (inst->group > 0)
ubld.SHR(mask, mask, brw_imm_ud(ALIGN(inst->group, 8)));
ubld.SHR(mask, mask, brw_imm_ud(align(inst->group, 8)));
ubld.AND(mask, exec_mask, mask);
exec_mask = mask;

View file

@ -150,7 +150,7 @@ analyze_ubos_block(struct ubo_analysis_state *state, nir_block *block)
nir_def_last_component_read(&intrin->def) + 1;
const int bytes = num_components * (intrin->def.bit_size / 8);
const int start = ROUND_DOWN_TO(byte_offset, sizeof_GRF);
const int end = ALIGN(byte_offset + bytes, sizeof_GRF);
const int end = align(byte_offset + bytes, sizeof_GRF);
const int chunks = (end - start) / sizeof_GRF;
/* TODO: should we count uses in loops as higher benefit? */

View file

@ -1574,7 +1574,7 @@ brw_opt_combine_constants(brw_shader &s)
* seem to have additional alignment requirements, so account for that
* too.
*/
assert(reg.offset == ALIGN(reg.offset, get_alignment_for_imm(imm)));
assert(reg.offset == align(reg.offset, get_alignment_for_imm(imm)));
struct brw_reg imm_reg = build_imm_reg_for_copy(imm);

View file

@ -52,7 +52,7 @@ brw_assign_regs_trivial(brw_shader &s)
int reg_width = s.dispatch_width / 8;
/* Note that compressed instructions require alignment to 2 registers. */
hw_reg_mapping[0] = ALIGN(s.first_non_payload_grf, reg_width);
hw_reg_mapping[0] = align(s.first_non_payload_grf, reg_width);
for (i = 1; i <= s.alloc.count; i++) {
hw_reg_mapping[i] = (hw_reg_mapping[i - 1] +
DIV_ROUND_UP(s.alloc.sizes[i - 1],
@ -264,7 +264,7 @@ public:
* for reg_width == 2.
*/
int reg_width = fs->dispatch_width / 8;
payload_node_count = ALIGN(fs->first_non_payload_grf, reg_width);
payload_node_count = align(fs->first_non_payload_grf, reg_width);
/* Get payload IP information */
payload_last_use_ip = ralloc_array(mem_ctx, int, payload_node_count);
@ -1180,7 +1180,7 @@ brw_reg_alloc::choose_spill_reg()
brw_reg
brw_reg_alloc::alloc_spill_reg(unsigned size, int ip)
{
int vgrf = brw_allocate_vgrf_units(*fs, ALIGN(size, reg_unit(devinfo))).nr;
int vgrf = brw_allocate_vgrf_units(*fs, align(size, reg_unit(devinfo))).nr;
int class_idx = DIV_ROUND_UP(size, reg_unit(devinfo)) - 1;
int n = ra_add_node(g, compiler->reg_set.classes[class_idx]);
assert(n == first_vgrf_node + vgrf);
@ -1216,7 +1216,7 @@ brw_reg_alloc::spill_reg(unsigned spill_reg)
{
int size = fs->alloc.sizes[spill_reg];
unsigned int spill_offset = fs->last_scratch;
assert(ALIGN(spill_offset, 16) == spill_offset); /* oword read/write req. */
assert(align(spill_offset, 16) == spill_offset); /* oword read/write req. */
fs->spilled_any_registers = true;

View file

@ -233,7 +233,7 @@ brw_rt_compute_scratch_layout(struct brw_rt_scratch_layout *layout,
*/
assert(size % 64 == 0);
layout->sw_stack_start = size;
layout->sw_stack_size = ALIGN(sw_stack_size, 64);
layout->sw_stack_size = align(sw_stack_size, 64);
/* Currently it's always the case that sw_stack_size is a power of
* two, but power-of-two SW stack sizes are prone to causing

View file

@ -570,7 +570,7 @@ elk_append_insns(struct elk_codegen *p, unsigned nr_insn, unsigned alignment)
assert(util_is_power_of_two_or_zero(sizeof(elk_inst)));
assert(util_is_power_of_two_or_zero(alignment));
const unsigned align_insn = MAX2(alignment / sizeof(elk_inst), 1);
const unsigned start_insn = ALIGN(p->nr_insn, align_insn);
const unsigned start_insn = align(p->nr_insn, align_insn);
const unsigned new_nr_insn = start_insn + nr_insn;
if (p->store_size < new_nr_insn) {

View file

@ -945,7 +945,7 @@ namespace {
assert(util_is_power_of_two_nonzero(width));
const unsigned start = (inst->flag_subreg * 16 + inst->group) &
~(width - 1);
const unsigned end = start + ALIGN(inst->exec_size, width);
const unsigned end = start + align(inst->exec_size, width);
return ((1 << DIV_ROUND_UP(end, 8)) - 1) & ~((1 << (start / 8)) - 1);
}
@ -1525,7 +1525,7 @@ elk_fs_visitor::assign_urb_setup()
const bool per_prim = inst->src[i].nr < prog_data->num_per_primitive_inputs;
const unsigned base = urb_start +
(per_prim ? 0 :
ALIGN(prog_data->num_per_primitive_inputs / 2,
align(prog_data->num_per_primitive_inputs / 2,
reg_unit(devinfo)));
const unsigned idx = per_prim ? inst->src[i].nr :
inst->src[i].nr - prog_data->num_per_primitive_inputs;
@ -5296,7 +5296,7 @@ elk_fs_visitor::lower_find_live_channel()
* specified quarter control as result.
*/
if (inst->group > 0)
ubld.SHR(mask, mask, elk_imm_ud(ALIGN(inst->group, 8)));
ubld.SHR(mask, mask, elk_imm_ud(align(inst->group, 8)));
ubld.AND(mask, exec_mask, mask);
exec_mask = mask;
@ -6003,7 +6003,7 @@ elk_fs_visitor::allocate_registers(bool allow_spilling)
* field documentation, platforms prior to Haswell measure scratch
* size linearly with a range of [1kB, 12kB] and 1kB granularity.
*/
prog_data->total_scratch = ALIGN(last_scratch, 1024);
prog_data->total_scratch = align(last_scratch, 1024);
max_scratch_size = 12 * 1024;
}
}
@ -6666,7 +6666,7 @@ elk_nir_populate_wm_prog_data(nir_shader *shader,
static inline int
elk_register_blocks(int reg_count)
{
return ALIGN(reg_count, 16) / 16 - 1;
return align(reg_count, 16) / 16 - 1;
}
const unsigned *

View file

@ -1461,7 +1461,7 @@ elk_fs_visitor::opt_combine_constants()
* instructions seem to have additional alignment requirements, so
* account for that too.
*/
reg.offset = ALIGN(reg.offset, get_alignment_for_imm(imm));
reg.offset = align(reg.offset, get_alignment_for_imm(imm));
/* Ensure we have enough space in the register to copy the immediate */
if (reg.offset + imm->size > REG_SIZE) {
@ -1555,7 +1555,7 @@ elk_fs_visitor::opt_combine_constants()
* seem to have additional alignment requirements, so account for that
* too.
*/
assert(reg.offset == ALIGN(reg.offset, get_alignment_for_imm(imm)));
assert(reg.offset == align(reg.offset, get_alignment_for_imm(imm)));
struct elk_reg imm_reg = build_imm_reg_for_copy(imm);

View file

@ -5036,7 +5036,7 @@ fs_nir_emit_intrinsic(nir_to_elk_state &ntb,
}
const unsigned total_dwords =
ALIGN(instr->num_components, REG_SIZE * reg_unit(devinfo) / 4);
align(instr->num_components, REG_SIZE * reg_unit(devinfo) / 4);
unsigned loaded_dwords = 0;
const elk_fs_reg packed_consts =
@ -5225,7 +5225,7 @@ fs_nir_emit_intrinsic(nir_to_elk_state &ntb,
break;
case nir_intrinsic_load_global_constant_uniform_block_intel: {
const unsigned total_dwords = ALIGN(instr->num_components,
const unsigned total_dwords = align(instr->num_components,
REG_SIZE * reg_unit(devinfo) / 4);
unsigned loaded_dwords = 0;
@ -5360,7 +5360,7 @@ fs_nir_emit_intrinsic(nir_to_elk_state &ntb,
srcs[SURFACE_LOGICAL_SRC_SURFACE] = elk_fs_reg(elk_imm_ud(GFX7_BTI_SLM));
}
const unsigned total_dwords = ALIGN(instr->num_components,
const unsigned total_dwords = align(instr->num_components,
REG_SIZE * reg_unit(devinfo) / 4);
unsigned loaded_dwords = 0;
@ -7002,7 +7002,7 @@ nir_to_elk(elk_fs_visitor *s)
fs_nir_setup_outputs(ntb);
fs_nir_setup_uniforms(ntb.s);
fs_nir_emit_system_values(ntb);
ntb.s.last_scratch = ALIGN(ntb.nir->scratch_size, 4) * ntb.s.dispatch_width;
ntb.s.last_scratch = align(ntb.nir->scratch_size, 4) * ntb.s.dispatch_width;
fs_nir_emit_impl(ntb, nir_shader_get_entrypoint((nir_shader *)ntb.nir));

View file

@ -52,7 +52,7 @@ elk_fs_visitor::assign_regs_trivial()
int reg_width = dispatch_width / 8;
/* Note that compressed instructions require alignment to 2 registers. */
hw_reg_mapping[0] = ALIGN(this->first_non_payload_grf, reg_width);
hw_reg_mapping[0] = align(this->first_non_payload_grf, reg_width);
for (i = 1; i <= this->alloc.count; i++) {
hw_reg_mapping[i] = (hw_reg_mapping[i - 1] +
DIV_ROUND_UP(this->alloc.sizes[i - 1],
@ -325,7 +325,7 @@ public:
*/
int reg_width = fs->dispatch_width / 8;
rsi = util_logbase2(reg_width);
payload_node_count = ALIGN(fs->first_non_payload_grf, reg_width);
payload_node_count = align(fs->first_non_payload_grf, reg_width);
/* Get payload IP information */
payload_last_use_ip = ralloc_array(mem_ctx, int, payload_node_count);
@ -940,7 +940,7 @@ elk_fs_reg_alloc::choose_spill_reg()
elk_fs_reg
elk_fs_reg_alloc::alloc_spill_reg(unsigned size, int ip)
{
int vgrf = fs->alloc.allocate(ALIGN(size, reg_unit(devinfo)));
int vgrf = fs->alloc.allocate(align(size, reg_unit(devinfo)));
int class_idx = DIV_ROUND_UP(size, reg_unit(devinfo)) - 1;
int n = ra_add_node(g, compiler->fs_reg_sets[rsi].classes[class_idx]);
assert(n == first_vgrf_node + vgrf);
@ -975,7 +975,7 @@ elk_fs_reg_alloc::spill_reg(unsigned spill_reg)
{
int size = fs->alloc.sizes[spill_reg];
unsigned int spill_offset = fs->last_scratch;
assert(ALIGN(spill_offset, 16) == spill_offset); /* oword read/write req. */
assert(align(spill_offset, 16) == spill_offset); /* oword read/write req. */
/* Spills may use MRFs 13-15 in the SIMD16 case. Our texturing is done
* using up to 11 MRFs starting from either m1 or m2, and fb writes can use

View file

@ -160,7 +160,7 @@ analyze_ubos_block(struct ubo_analysis_state *state, nir_block *block)
const int bytes = nir_intrinsic_dest_components(intrin) *
(intrin->def.bit_size / 8);
const int start = ROUND_DOWN_TO(byte_offset, 32);
const int end = ALIGN(byte_offset + bytes, 32);
const int end = align(byte_offset + bytes, 32);
const int chunks = (end - start) / 32;
/* TODO: should we count uses in loops as higher benefit? */

View file

@ -1301,7 +1301,7 @@ elk_compile_tes(const struct elk_compiler *compiler,
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
/* URB entry sizes are stored as a multiple of 64 bytes. */
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
prog_data->base.urb_entry_size = align(output_size_bytes, 64) / 64;
prog_data->base.urb_read_length = 0;

View file

@ -120,7 +120,7 @@ vec4_gs_visitor::setup_varying_inputs(int payload_reg,
}
}
int regs_used = ALIGN(input_array_stride * num_input_vertices,
int regs_used = align(input_array_stride * num_input_vertices,
attributes_per_reg) / attributes_per_reg;
return payload_reg + regs_used;
}
@ -672,7 +672,7 @@ elk_compile_gs(const struct elk_compiler *compiler,
/* 1 HWORD = 32 bytes = 256 bits */
prog_data->control_data_header_size_hwords =
ALIGN(c.control_data_header_size_bits, 256) / 256;
align(c.control_data_header_size_bits, 256) / 256;
/* Compute the output vertex size.
*
@ -726,7 +726,7 @@ elk_compile_gs(const struct elk_compiler *compiler,
assert(compiler->devinfo->ver == 6 ||
output_vertex_size_bytes <= GFX7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES);
prog_data->output_vertex_size_hwords =
ALIGN(output_vertex_size_bytes, 32) / 32;
align(output_vertex_size_bytes, 32) / 32;
/* Compute URB entry size. The maximum allowed URB entry size is 32k.
* That divides up as follows:
@ -793,9 +793,9 @@ elk_compile_gs(const struct elk_compiler *compiler,
* a multiple of 128 bytes in gfx6.
*/
if (compiler->devinfo->ver >= 7) {
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
prog_data->base.urb_entry_size = align(output_size_bytes, 64) / 64;
} else {
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128;
prog_data->base.urb_entry_size = align(output_size_bytes, 128) / 128;
}
assert(nir->info.gs.output_primitive < ARRAY_SIZE(elk::gl_prim_to_hw_prim));

View file

@ -423,7 +423,7 @@ elk_compile_tcs(const struct elk_compiler *compiler,
return NULL;
/* URB entry sizes are stored as a multiple of 64 bytes. */
vue_prog_data->urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
vue_prog_data->urb_entry_size = align(output_size_bytes, 64) / 64;
/* HS does not use the usual payload pushing from URB to GRFs,
* because we don't have enough registers for a full-size payload, and

View file

@ -45,7 +45,7 @@
#define FILE_DEBUG_FLAG DEBUG_TEXTURE
#define ALIGN_DOWN(a, b) ROUND_DOWN_TO(a, b)
#define ALIGN_UP(a, b) ALIGN(a, b)
#define ALIGN_UP(a, b) align(a, b)
/* Tile dimensions. Width and span are in bytes, height is in pixels (i.e.
* unitless). A "span" is the most number of bytes we can copy from linear

View file

@ -94,7 +94,7 @@ write_execlists_header(struct aub_file *aub, const char *name)
app_name_len =
snprintf(app_name, sizeof(app_name), "PCI-ID=0x%X %s",
aub->pci_id, name);
app_name_len = ALIGN(app_name_len, sizeof(uint32_t));
app_name_len = align(app_name_len, sizeof(uint32_t));
dwords = 5 + app_name_len / sizeof(uint32_t);
dword_out(aub, CMD_MEM_TRACE_VERSION | (dwords - 1));
@ -189,7 +189,7 @@ mem_trace_memory_write_header_out(struct aub_file *aub, uint64_t addr,
uint32_t len, uint32_t addr_space,
const char *desc)
{
uint32_t dwords = ALIGN(len, sizeof(uint32_t)) / sizeof(uint32_t);
uint32_t dwords = align(len, sizeof(uint32_t)) / sizeof(uint32_t);
if (aub->verbose_log_file) {
fprintf(aub->verbose_log_file,

Some files were not shown because too many files have changed in this diff Show more