mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-09 23:30:13 +01:00
treewide: Replace calling to function ALIGN with align
This is done by grep ALIGN( to align( docs,*.xml,blake3 is excluded Signed-off-by: Yonggang Luo <luoyonggang@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38365>
This commit is contained in:
parent
03a32b3fe2
commit
ecb0ccf603
119 changed files with 246 additions and 246 deletions
|
|
@ -2317,8 +2317,8 @@ void ac_get_task_info(const struct radeon_info *info,
|
|||
/* Ensure that the addresses of each ring are 256 byte aligned. */
|
||||
task_info->payload_entry_size = payload_entry_size;
|
||||
task_info->num_entries = num_entries;
|
||||
task_info->draw_ring_offset = ALIGN(AC_TASK_CTRLBUF_BYTES, 256);
|
||||
task_info->payload_ring_offset = ALIGN(task_info->draw_ring_offset + draw_ring_bytes, 256);
|
||||
task_info->draw_ring_offset = align(AC_TASK_CTRLBUF_BYTES, 256);
|
||||
task_info->payload_ring_offset = align(task_info->draw_ring_offset + draw_ring_bytes, 256);
|
||||
task_info->bo_size_bytes = task_info->payload_ring_offset + payload_ring_bytes;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1046,7 +1046,7 @@ ac_sqtt_dump_data(const struct radeon_info *rad_info, struct ac_sqtt_trace *sqtt
|
|||
sizeof(struct sqtt_code_object_database_record),
|
||||
record, &elf_size_calc, flags);
|
||||
/* Align to 4 bytes per the RGP file spec. */
|
||||
code_object_record.size = ALIGN(elf_size_calc, 4);
|
||||
code_object_record.size = align(elf_size_calc, 4);
|
||||
fseek(output, file_offset, SEEK_SET);
|
||||
fwrite(&code_object_record, sizeof(struct sqtt_code_object_database_record),
|
||||
1, output);
|
||||
|
|
|
|||
|
|
@ -245,7 +245,7 @@ ac_rgp_write_msgpack(FILE *output,
|
|||
}
|
||||
}
|
||||
ac_msgpack_resize_if_required(&msgpack, 4 - (msgpack.offset % 4));
|
||||
msgpack.offset = ALIGN(msgpack.offset, 4);
|
||||
msgpack.offset = align(msgpack.offset, 4);
|
||||
fwrite(msgpack.mem, 1, msgpack.offset, output);
|
||||
*written_size = msgpack.offset;
|
||||
ac_msgpack_destroy(&msgpack);
|
||||
|
|
@ -321,7 +321,7 @@ ac_rgp_file_write_elf_text(FILE *output, uint32_t *elf_size_calc,
|
|||
}
|
||||
|
||||
symbol_offset += rgp_shader_data->code_size;
|
||||
uint32_t aligned = ALIGN(symbol_offset, 256) - symbol_offset;
|
||||
uint32_t aligned = align(symbol_offset, 256) - symbol_offset;
|
||||
fseek(output, aligned, SEEK_CUR);
|
||||
*elf_size_calc += aligned;
|
||||
*text_size = symbol_offset + aligned;
|
||||
|
|
|
|||
|
|
@ -1393,7 +1393,7 @@ radv_fill_code_object_record(struct radv_device *device, struct rgp_shader_data
|
|||
shader_data->vgpr_count = shader->config.num_vgprs;
|
||||
shader_data->sgpr_count = shader->config.num_sgprs;
|
||||
shader_data->scratch_memory_size = shader->config.scratch_bytes_per_wave;
|
||||
shader_data->lds_size = ALIGN(shader->config.lds_size, lds_increment);
|
||||
shader_data->lds_size = align(shader->config.lds_size, lds_increment);
|
||||
shader_data->wavefront_size = shader->info.wave_size;
|
||||
shader_data->base_address = va & 0xffffffffffff;
|
||||
shader_data->elf_symbol_offset = 0;
|
||||
|
|
|
|||
|
|
@ -162,7 +162,7 @@ radv_get_acceleration_structure_layout(struct radv_device *device,
|
|||
offset += bvh_size / 64 * 4;
|
||||
|
||||
/* The BVH and hence bvh_offset needs 64 byte alignment for RT nodes. */
|
||||
offset = ALIGN(offset, 64);
|
||||
offset = align(offset, 64);
|
||||
accel_struct->bvh_offset = offset;
|
||||
|
||||
/* root node */
|
||||
|
|
|
|||
|
|
@ -543,7 +543,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
|||
|
||||
if (radv_shader_should_clear_lds(device, stage->nir)) {
|
||||
const unsigned chunk_size = 16; /* max single store size */
|
||||
const unsigned shared_size = ALIGN(stage->nir->info.shared_size, chunk_size);
|
||||
const unsigned shared_size = align(stage->nir->info.shared_size, chunk_size);
|
||||
NIR_PASS(_, stage->nir, nir_clear_shared_memory, shared_size, chunk_size);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -146,9 +146,9 @@ radv_sparse_image_bind_memory(struct radv_device *device, const VkSparseImageMem
|
|||
(uint64_t)bind_offset.x * surface->prt_tile_height * surface->prt_tile_depth) *
|
||||
bs;
|
||||
|
||||
uint32_t aligned_extent_width = ALIGN(bind_extent.width, surface->prt_tile_width);
|
||||
uint32_t aligned_extent_height = ALIGN(bind_extent.height, surface->prt_tile_height);
|
||||
uint32_t aligned_extent_depth = ALIGN(bind_extent.depth, surface->prt_tile_depth);
|
||||
uint32_t aligned_extent_width = align(bind_extent.width, surface->prt_tile_width);
|
||||
uint32_t aligned_extent_height = align(bind_extent.height, surface->prt_tile_height);
|
||||
uint32_t aligned_extent_depth = align(bind_extent.depth, surface->prt_tile_depth);
|
||||
|
||||
bool whole_subres = (bind_extent.height <= surface->prt_tile_height || aligned_extent_width == pitch) &&
|
||||
(bind_extent.depth <= surface->prt_tile_depth ||
|
||||
|
|
|
|||
|
|
@ -134,7 +134,7 @@ radv_sdma_get_chunked_copy_info(const struct radv_device *const device, const st
|
|||
{
|
||||
const unsigned extent_horizontal_blocks = DIV_ROUND_UP(extent.width * img->texel_scale, img->blk_w);
|
||||
const unsigned extent_vertical_blocks = DIV_ROUND_UP(extent.height, img->blk_h);
|
||||
const unsigned aligned_row_pitch = ALIGN(extent_horizontal_blocks, 4);
|
||||
const unsigned aligned_row_pitch = align(extent_horizontal_blocks, 4);
|
||||
const unsigned aligned_row_bytes = aligned_row_pitch * img->bpp;
|
||||
|
||||
/* Assume that we can always copy at least one full row at a time. */
|
||||
|
|
|
|||
|
|
@ -760,7 +760,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
|
|||
|
||||
if (nir->info.zero_initialize_shared_memory && nir->info.shared_size > 0) {
|
||||
const unsigned chunk_size = 16; /* max single store size */
|
||||
const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
|
||||
const unsigned shared_size = align(nir->info.shared_size, chunk_size);
|
||||
NIR_PASS(_, nir, nir_zero_initialize_shared_memory, shared_size, chunk_size);
|
||||
}
|
||||
}
|
||||
|
|
@ -945,7 +945,7 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage,
|
|||
&ngg_stage->info.ngg_lds_scratch_size);
|
||||
} else if (nir->info.stage == MESA_SHADER_MESH) {
|
||||
/* ACO aligns the workgroup size to the wave size. */
|
||||
unsigned hw_workgroup_size = ALIGN(info->workgroup_size, info->wave_size);
|
||||
unsigned hw_workgroup_size = align(info->workgroup_size, info->wave_size);
|
||||
|
||||
bool scratch_ring = false;
|
||||
NIR_PASS(_, nir, ac_nir_lower_ngg_mesh, &pdev->info, options.export_clipdist_mask, options.vs_output_param_offset,
|
||||
|
|
|
|||
|
|
@ -620,7 +620,7 @@ radv_get_shader_binary_size(const struct radv_shader_binary *binary)
|
|||
size_t size = sizeof(uint32_t); /* has_binary */
|
||||
|
||||
if (binary)
|
||||
size += SHA1_DIGEST_LENGTH + 4 + ALIGN(binary->total_size, 4);
|
||||
size += SHA1_DIGEST_LENGTH + 4 + align(binary->total_size, 4);
|
||||
|
||||
return size;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3450,7 +3450,7 @@ agx_compile_function_nir(nir_shader *nir, nir_function_impl *impl,
|
|||
*/
|
||||
if (ctx->any_scratch) {
|
||||
assert(!ctx->is_preamble && "preambles don't use scratch");
|
||||
ctx->scratch_size_B = ALIGN(nir->scratch_size, 16);
|
||||
ctx->scratch_size_B = align(nir->scratch_size, 16);
|
||||
}
|
||||
|
||||
/* Stop the main shader or preamble shader after the exit block. For real
|
||||
|
|
@ -3509,7 +3509,7 @@ agx_compile_function_nir(nir_shader *nir, nir_function_impl *impl,
|
|||
|
||||
if (ctx->scratch_size_B > 0) {
|
||||
/* Apple always allocate 40 more bytes in the entrypoint and align to 4. */
|
||||
uint64_t stack_size = ALIGN(DIV_ROUND_UP(ctx->scratch_size_B, 4) + 10, 4);
|
||||
uint64_t stack_size = align(DIV_ROUND_UP(ctx->scratch_size_B, 4) + 10, 4);
|
||||
|
||||
assert(stack_size < INT16_MAX);
|
||||
|
||||
|
|
|
|||
|
|
@ -339,10 +339,10 @@ static inline bool
|
|||
ail_is_level_allocated_compressed(const struct ail_layout *layout,
|
||||
unsigned level)
|
||||
{
|
||||
unsigned width_sa = ALIGN(
|
||||
unsigned width_sa = align(
|
||||
ail_effective_width_sa(layout->width_px, layout->sample_count_sa), 16);
|
||||
|
||||
unsigned height_sa = ALIGN(
|
||||
unsigned height_sa = align(
|
||||
ail_effective_height_sa(layout->height_px, layout->sample_count_sa), 16);
|
||||
|
||||
return layout->compressed &&
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@ hk_descriptor_stride_align_for_type(
|
|||
*stride = MAX2(*stride, desc_stride);
|
||||
*alignment = MAX2(*alignment, desc_align);
|
||||
}
|
||||
*stride = ALIGN(*stride, *alignment);
|
||||
*stride = align(*stride, *alignment);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -2093,7 +2093,7 @@ xfb_decl_store(struct xfb_decl *xfb_decl, const struct gl_constants *consts,
|
|||
if (max_member_alignment && has_xfb_qualifiers) {
|
||||
max_member_alignment[buffer] = MAX2(max_member_alignment[buffer],
|
||||
_mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1);
|
||||
info->Buffers[buffer].Stride = ALIGN(xfb_offset,
|
||||
info->Buffers[buffer].Stride = align(xfb_offset,
|
||||
max_member_alignment[buffer]);
|
||||
} else {
|
||||
info->Buffers[buffer].Stride = xfb_offset;
|
||||
|
|
@ -2926,7 +2926,7 @@ varying_matches_assign_locations(struct varying_matches *vm,
|
|||
(previous_packing_class != vm->matches[i].packing_class) ||
|
||||
(vm->matches[i].packing_order == PACKING_ORDER_VEC3 &&
|
||||
dont_pack_vec3)) {
|
||||
*location = ALIGN(*location, 4);
|
||||
*location = align(*location, 4);
|
||||
}
|
||||
|
||||
previous_var_xfb = var->data.is_xfb;
|
||||
|
|
@ -2967,7 +2967,7 @@ varying_matches_assign_locations(struct varying_matches *vm,
|
|||
break;
|
||||
}
|
||||
|
||||
*location = ALIGN(*location + 1, 4);
|
||||
*location = align(*location + 1, 4);
|
||||
slot_end = *location + num_components - 1;
|
||||
}
|
||||
|
||||
|
|
@ -3229,9 +3229,9 @@ tfeedback_candidate_generator(struct tfeedback_candidate_generator_state *state,
|
|||
* (c) each double-precision variable captured must be aligned to a
|
||||
* multiple of eight bytes relative to the beginning of a vertex.
|
||||
*/
|
||||
state->xfb_offset_floats = ALIGN(state->xfb_offset_floats, 2);
|
||||
state->xfb_offset_floats = align(state->xfb_offset_floats, 2);
|
||||
/* 64-bit members of structs are also aligned. */
|
||||
state->varying_floats = ALIGN(state->varying_floats, 2);
|
||||
state->varying_floats = align(state->varying_floats, 2);
|
||||
}
|
||||
|
||||
candidate->xfb_offset_floats = state->xfb_offset_floats;
|
||||
|
|
|
|||
|
|
@ -586,7 +586,7 @@ spill_ssa_defs_and_lower_shader_calls(nir_shader *shader, uint32_t num_calls,
|
|||
def = nir_b2b32(&before, def);
|
||||
|
||||
const unsigned comp_size = def->bit_size / 8;
|
||||
offset = ALIGN(offset, comp_size);
|
||||
offset = align(offset, comp_size);
|
||||
|
||||
new_def = spill_fill(&before, &after, def,
|
||||
index, call_idx,
|
||||
|
|
@ -617,7 +617,7 @@ spill_ssa_defs_and_lower_shader_calls(nir_shader *shader, uint32_t num_calls,
|
|||
|
||||
nir_builder *b = &before;
|
||||
|
||||
offset = ALIGN(offset, options->stack_alignment);
|
||||
offset = align(offset, options->stack_alignment);
|
||||
max_scratch_size = MAX2(max_scratch_size, offset);
|
||||
|
||||
/* First thing on the called shader's stack is the resume address
|
||||
|
|
@ -1642,11 +1642,11 @@ nir_opt_sort_and_pack_stack(nir_shader *shader,
|
|||
|
||||
unsigned scratch_size = start_call_scratch;
|
||||
util_dynarray_foreach(&ops, struct scratch_item, item) {
|
||||
item->new_offset = ALIGN(scratch_size, item->bit_size / 8);
|
||||
item->new_offset = align(scratch_size, item->bit_size / 8);
|
||||
scratch_size = item->new_offset + (item->bit_size * item->num_components) / 8;
|
||||
_mesa_hash_table_u64_insert(value_id_to_item, item->value, item);
|
||||
}
|
||||
shader->scratch_size = ALIGN(scratch_size, stack_alignment);
|
||||
shader->scratch_size = align(scratch_size, stack_alignment);
|
||||
|
||||
/* Update offsets in the instructions */
|
||||
nir_foreach_block_safe(block, impl) {
|
||||
|
|
|
|||
|
|
@ -132,7 +132,7 @@ static bool
|
|||
nir_lower_nv_task_count(nir_shader *shader)
|
||||
{
|
||||
lower_task_nv_state state = {
|
||||
.task_count_shared_addr = ALIGN(shader->info.shared_size, 4),
|
||||
.task_count_shared_addr = align(shader->info.shared_size, 4),
|
||||
};
|
||||
|
||||
shader->info.shared_size += 4;
|
||||
|
|
@ -277,7 +277,7 @@ emit_shared_to_payload_copy(nir_builder *b,
|
|||
off += remaining_dwords * 4;
|
||||
}
|
||||
|
||||
assert(s->payload_offset_in_bytes + ALIGN(payload_size, 4) == off);
|
||||
assert(s->payload_offset_in_bytes + align(payload_size, 4) == off);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -453,7 +453,7 @@ nir_lower_task_shader(nir_shader *shader,
|
|||
requires_payload_in_shared(shader, atomics, small_types);
|
||||
|
||||
lower_task_state state = {
|
||||
.payload_shared_addr = ALIGN(shader->info.shared_size, 16),
|
||||
.payload_shared_addr = align(shader->info.shared_size, 16),
|
||||
.payload_in_shared = payload_in_shared,
|
||||
.payload_offset_in_bytes = options.payload_offset_in_bytes,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -151,7 +151,7 @@ struct etna_bo *etna_bo_cache_alloc(struct etna_bo_cache *cache, uint32_t *size,
|
|||
struct etna_bo *bo;
|
||||
struct etna_bo_bucket *bucket;
|
||||
|
||||
*size = ALIGN(*size, 4096);
|
||||
*size = align(*size, 4096);
|
||||
bucket = get_bucket(cache, *size);
|
||||
|
||||
/* see if we can be green and recycle: */
|
||||
|
|
|
|||
|
|
@ -105,7 +105,7 @@ struct etna_cmd_stream *etna_cmd_stream_new(struct etna_pipe *pipe,
|
|||
}
|
||||
|
||||
/* allocate even number of 32-bit words */
|
||||
size = ALIGN(size, 2);
|
||||
size = align(size, 2);
|
||||
|
||||
stream->base.buffer = malloc(size * sizeof(uint32_t));
|
||||
if (!stream->base.buffer) {
|
||||
|
|
|
|||
|
|
@ -257,7 +257,7 @@ cs_program_emit_regs(fd_cs &cs, struct kernel *kernel)
|
|||
if (v->pvtmem_size > 0) {
|
||||
uint32_t per_fiber_size = v->pvtmem_size;
|
||||
uint32_t per_sp_size =
|
||||
ALIGN(per_fiber_size * a6xx_backend->info->fibers_per_sp, 1 << 12);
|
||||
align(per_fiber_size * a6xx_backend->info->fibers_per_sp, 1 << 12);
|
||||
uint32_t total_size = per_sp_size * a6xx_backend->info->num_sp_cores;
|
||||
|
||||
struct fd_bo *pvtmem = fd_bo_new(a6xx_backend->dev, total_size, 0, "pvtmem");
|
||||
|
|
|
|||
|
|
@ -268,7 +268,7 @@ next_sect(struct state *state, int *sect_size)
|
|||
*sect_size = end - state->buf;
|
||||
|
||||
/* copy the section to keep things nicely 32b aligned: */
|
||||
sect = malloc(ALIGN(*sect_size, 4));
|
||||
sect = malloc(align(*sect_size, 4));
|
||||
memcpy(sect, state->buf, *sect_size);
|
||||
|
||||
state->sz -= *sect_size + 4;
|
||||
|
|
@ -584,8 +584,8 @@ dump_shaders_a3xx(struct state *state)
|
|||
|
||||
if (!compact) {
|
||||
if (state->hdr->revision >= 7) {
|
||||
instrs += ALIGN(instrs_size, 8) - instrs_size;
|
||||
instrs_size = ALIGN(instrs_size, 8);
|
||||
instrs += align(instrs_size, 8) - instrs_size;
|
||||
instrs_size = align(instrs_size, 8);
|
||||
}
|
||||
instrs += 32;
|
||||
instrs_size -= 32;
|
||||
|
|
|
|||
|
|
@ -347,9 +347,9 @@ decode_shader_descriptor_block(struct state *state,
|
|||
unsigned dwords = 2 * stats.instlen;
|
||||
|
||||
if (gpu_id >= 400) {
|
||||
dwords = ALIGN(dwords, 16 * 2);
|
||||
dwords = align(dwords, 16 * 2);
|
||||
} else {
|
||||
dwords = ALIGN(dwords, 4 * 2);
|
||||
dwords = align(dwords, 4 * 2);
|
||||
}
|
||||
|
||||
unsigned half_regs = state->half_regs;
|
||||
|
|
|
|||
|
|
@ -205,7 +205,7 @@ fd_bo_heap_alloc(struct fd_bo_heap *heap, uint32_t size, uint32_t flags)
|
|||
*/
|
||||
size = MAX2(size, SUBALLOC_ALIGNMENT);
|
||||
|
||||
size = ALIGN(size, SUBALLOC_ALIGNMENT);
|
||||
size = align(size, SUBALLOC_ALIGNMENT);
|
||||
|
||||
simple_mtx_lock(&heap->lock);
|
||||
/* Allocate larger buffers from the bottom, and smaller buffers from top
|
||||
|
|
|
|||
|
|
@ -6026,7 +6026,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
|||
ir3_instr_move_before(unlock, end);
|
||||
}
|
||||
|
||||
so->pvtmem_size = ALIGN(so->pvtmem_size, compiler->pvtmem_per_fiber_align);
|
||||
so->pvtmem_size = align(so->pvtmem_size, compiler->pvtmem_per_fiber_align);
|
||||
|
||||
/* Note that max_bary counts inputs that are not bary.f'd for FS: */
|
||||
if (so->type == MESA_SHADER_FRAGMENT)
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ get_ubo_load_range(nir_shader *nir, nir_intrinsic_instr *instr,
|
|||
return false;
|
||||
|
||||
r->start = ROUND_DOWN_TO(offset, alignment * 16);
|
||||
r->end = ALIGN(offset + size, alignment * 16);
|
||||
r->end = align(offset + size, alignment * 16);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1237,7 +1237,7 @@ compress_regs_left(struct ra_ctx *ctx, struct ra_file *file,
|
|||
}
|
||||
|
||||
if (!(cur_reg->flags & IR3_REG_HALF))
|
||||
physreg = ALIGN(physreg, 2);
|
||||
physreg = align(physreg, 2);
|
||||
|
||||
d("pushing reg %u physreg %u\n", cur_reg->name, physreg);
|
||||
|
||||
|
|
@ -1330,7 +1330,7 @@ find_best_gap(struct ra_ctx *ctx, struct ra_file *file,
|
|||
BITSET_WORD *available =
|
||||
is_early_clobber(dst) ? file->available_to_evict : file->available;
|
||||
|
||||
unsigned start = ALIGN(file->start, alignment);
|
||||
unsigned start = align(file->start, alignment);
|
||||
if (start + size > file_size)
|
||||
start = 0;
|
||||
unsigned candidate = start;
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ ir3_const_ensure_imm_size(struct ir3_shader_variant *v, unsigned size)
|
|||
/* Immediates are uploaded in units of vec4 so make sure our buffer is large
|
||||
* enough.
|
||||
*/
|
||||
size = ALIGN(size, 4);
|
||||
size = align(size, 4);
|
||||
|
||||
/* Pre-a7xx, the immediates that get lowered to const registers are
|
||||
* emitted as part of the const state so the total size of immediates
|
||||
|
|
|
|||
|
|
@ -362,7 +362,7 @@ find_best_gap(struct ra_ctx *ctx, struct ir3_register *dst, unsigned size,
|
|||
if (size > file_size)
|
||||
return (physreg_t) ~0;
|
||||
|
||||
unsigned start = ALIGN(ctx->start, alignment);
|
||||
unsigned start = align(ctx->start, alignment);
|
||||
if (start + size > file_size)
|
||||
start = 0;
|
||||
unsigned candidate = start;
|
||||
|
|
@ -395,7 +395,7 @@ find_best_spill_reg(struct ra_ctx *ctx, struct ir3_register *reg,
|
|||
unsigned file_size = reg_file_size(reg);
|
||||
unsigned min_cost = UINT_MAX;
|
||||
|
||||
unsigned start = ALIGN(ctx->start, alignment);
|
||||
unsigned start = align(ctx->start, alignment);
|
||||
if (start + size > file_size)
|
||||
start = 0;
|
||||
physreg_t candidate = start;
|
||||
|
|
|
|||
|
|
@ -602,7 +602,7 @@ tu_descriptor_set_create(struct tu_device *device,
|
|||
&layout->binding[layout->binding_count - 1];
|
||||
if (binding->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
|
||||
layout_size = binding->offset +
|
||||
ALIGN(variable_count, 4 * A6XX_TEX_CONST_DWORDS);
|
||||
align(variable_count, 4 * A6XX_TEX_CONST_DWORDS);
|
||||
} else {
|
||||
uint32_t stride = binding->size;
|
||||
layout_size = binding->offset + variable_count * stride;
|
||||
|
|
|
|||
|
|
@ -683,7 +683,7 @@ tu6_emit_link_map(struct tu_cs *cs,
|
|||
enum a6xx_state_block sb)
|
||||
{
|
||||
const struct ir3_const_state *const_state = ir3_const_state(consumer);
|
||||
uint32_t size = ALIGN(consumer->input_size, 4);
|
||||
uint32_t size = align(consumer->input_size, 4);
|
||||
|
||||
if (size == 0)
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -2289,9 +2289,9 @@ tu_setup_pvtmem(struct tu_device *dev,
|
|||
tu_bo_finish(dev, pvtmem_bo->bo);
|
||||
|
||||
pvtmem_bo->per_fiber_size =
|
||||
util_next_power_of_two(ALIGN(pvtmem_bytes, 512));
|
||||
util_next_power_of_two(align(pvtmem_bytes, 512));
|
||||
pvtmem_bo->per_sp_size =
|
||||
ALIGN(pvtmem_bo->per_fiber_size *
|
||||
align(pvtmem_bo->per_fiber_size *
|
||||
dev->physical_device->info->fibers_per_sp,
|
||||
1 << 12);
|
||||
uint32_t total_size =
|
||||
|
|
@ -2729,7 +2729,7 @@ tu_shader_create(struct tu_device *dev,
|
|||
* is allocated at the API level, and it's up to the user to ensure
|
||||
* that accesses are limited to those bounds.
|
||||
*/
|
||||
const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
|
||||
const unsigned shared_size = align(nir->info.shared_size, chunk_size);
|
||||
NIR_PASS(_, nir, nir_zero_initialize_shared_memory, shared_size,
|
||||
chunk_size);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ tu_suballoc_bo_alloc(struct tu_suballoc_bo *suballoc_bo,
|
|||
{
|
||||
struct tu_bo *bo = suballoc->bo;
|
||||
if (bo) {
|
||||
uint32_t offset = ALIGN(suballoc->next_offset, alignment);
|
||||
uint32_t offset = align(suballoc->next_offset, alignment);
|
||||
if (offset + size <= bo->size) {
|
||||
suballoc_bo->bo = tu_bo_get_ref(bo);
|
||||
suballoc_bo->iova = bo->iova + offset;
|
||||
|
|
|
|||
|
|
@ -6073,7 +6073,7 @@ void lp_build_nir_soa_func(struct gallivm_state *gallivm,
|
|||
|
||||
bld.shader = shader;
|
||||
|
||||
bld.scratch_size = ALIGN(shader->scratch_size, 8);
|
||||
bld.scratch_size = align(shader->scratch_size, 8);
|
||||
if (params->scratch_ptr)
|
||||
bld.scratch_ptr = params->scratch_ptr;
|
||||
else if (shader->scratch_size) {
|
||||
|
|
|
|||
|
|
@ -872,7 +872,7 @@ submit_batch(struct crocus_batch *batch)
|
|||
.buffer_count = batch->exec_count,
|
||||
.batch_start_offset = 0,
|
||||
/* This must be QWord aligned. */
|
||||
.batch_len = ALIGN(batch->primary_batch_size, 8),
|
||||
.batch_len = align(batch->primary_batch_size, 8),
|
||||
.flags = I915_EXEC_RENDER |
|
||||
I915_EXEC_NO_RELOC |
|
||||
I915_EXEC_BATCH_FIRST |
|
||||
|
|
|
|||
|
|
@ -56,11 +56,11 @@ stream_state(struct crocus_batch *batch,
|
|||
uint32_t *out_offset,
|
||||
struct crocus_bo **out_bo)
|
||||
{
|
||||
uint32_t offset = ALIGN(batch->state.used, alignment);
|
||||
uint32_t offset = align(batch->state.used, alignment);
|
||||
|
||||
if (offset + size >= STATE_SZ && !batch->no_wrap) {
|
||||
crocus_batch_flush(batch);
|
||||
offset = ALIGN(batch->state.used, alignment);
|
||||
offset = align(batch->state.used, alignment);
|
||||
} else if (offset + size >= batch->state.bo->size) {
|
||||
const unsigned new_size =
|
||||
MIN2(batch->state.bo->size + batch->state.bo->size / 2,
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ crocus_init_identifier_bo(struct crocus_context *ice)
|
|||
return false;
|
||||
|
||||
ice->workaround_bo->kflags |= EXEC_OBJECT_CAPTURE;
|
||||
ice->workaround_offset = ALIGN(
|
||||
ice->workaround_offset = align(
|
||||
intel_debug_write_identifiers(bo_map, 4096, "Crocus"), 32);
|
||||
|
||||
crocus_bo_unmap(ice->workaround_bo);
|
||||
|
|
|
|||
|
|
@ -179,7 +179,7 @@ crocus_alloc_item_data(struct crocus_context *ice, uint32_t size)
|
|||
uint32_t offset = ice->shaders.cache_next_offset;
|
||||
|
||||
/* Programs are always 64-byte aligned, so set up the next one now */
|
||||
ice->shaders.cache_next_offset = ALIGN(offset + size, 64);
|
||||
ice->shaders.cache_next_offset = align(offset + size, 64);
|
||||
return offset;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -229,7 +229,7 @@ crocus_resource_configure_main(const struct crocus_screen *screen,
|
|||
devinfo->ver < 6) {
|
||||
/* align row pitch to 4 so we can keep using BLT engine */
|
||||
row_pitch_B = util_format_get_stride(templ->format, templ->width0);
|
||||
row_pitch_B = ALIGN(row_pitch_B, 4);
|
||||
row_pitch_B = align(row_pitch_B, 4);
|
||||
}
|
||||
|
||||
const struct isl_surf_init_info init_info = {
|
||||
|
|
@ -1437,7 +1437,7 @@ crocus_map_tiled_memcpy(struct crocus_transfer *map)
|
|||
struct crocus_resource *res = (struct crocus_resource *) xfer->resource;
|
||||
struct isl_surf *surf = &res->surf;
|
||||
|
||||
xfer->stride = ALIGN(surf->row_pitch_B, 16);
|
||||
xfer->stride = align(surf->row_pitch_B, 16);
|
||||
xfer->layer_stride = xfer->stride * box->height;
|
||||
|
||||
unsigned x1, x2, y1, y2;
|
||||
|
|
|
|||
|
|
@ -329,11 +329,11 @@ stream_state(struct crocus_batch *batch,
|
|||
unsigned alignment,
|
||||
uint32_t *out_offset)
|
||||
{
|
||||
uint32_t offset = ALIGN(batch->state.used, alignment);
|
||||
uint32_t offset = align(batch->state.used, alignment);
|
||||
|
||||
if (offset + size >= STATE_SZ && !batch->no_wrap) {
|
||||
crocus_batch_flush(batch);
|
||||
offset = ALIGN(batch->state.used, alignment);
|
||||
offset = align(batch->state.used, alignment);
|
||||
} else if (offset + size >= batch->state.bo->size) {
|
||||
const unsigned new_size =
|
||||
MIN2(batch->state.bo->size + batch->state.bo->size / 2,
|
||||
|
|
@ -6591,7 +6591,7 @@ crocus_upload_dirty_render_state(struct crocus_context *ice,
|
|||
struct crocus_resource *res = (void *) tgt->base.buffer;
|
||||
uint32_t start = tgt->base.buffer_offset;
|
||||
#if GFX_VER < 8
|
||||
uint32_t end = ALIGN(start + tgt->base.buffer_size, 4);
|
||||
uint32_t end = align(start + tgt->base.buffer_size, 4);
|
||||
#endif
|
||||
crocus_emit_cmd(batch, GENX(3DSTATE_SO_BUFFER), sob) {
|
||||
sob.SOBufferIndex = i;
|
||||
|
|
@ -8143,7 +8143,7 @@ crocus_upload_compute_state(struct crocus_context *ice,
|
|||
vfe.URBEntryAllocationSize = GFX_VER == 8 ? 2 : 0;
|
||||
|
||||
vfe.CURBEAllocationSize =
|
||||
ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads +
|
||||
align(cs_prog_data->push.per_thread.regs * dispatch.threads +
|
||||
cs_prog_data->push.cross_thread.regs, 2);
|
||||
}
|
||||
}
|
||||
|
|
@ -8159,15 +8159,15 @@ crocus_upload_compute_state(struct crocus_context *ice,
|
|||
elk_cs_push_const_total_size(cs_prog_data, dispatch.threads);
|
||||
uint32_t *curbe_data_map =
|
||||
stream_state(batch,
|
||||
ALIGN(push_const_size, 64), 64,
|
||||
align(push_const_size, 64), 64,
|
||||
&curbe_data_offset);
|
||||
assert(curbe_data_map);
|
||||
memset(curbe_data_map, 0x5a, ALIGN(push_const_size, 64));
|
||||
memset(curbe_data_map, 0x5a, align(push_const_size, 64));
|
||||
crocus_fill_cs_push_const_buffer(cs_prog_data, dispatch.threads,
|
||||
curbe_data_map);
|
||||
|
||||
crocus_emit_cmd(batch, GENX(MEDIA_CURBE_LOAD), curbe) {
|
||||
curbe.CURBETotalDataLength = ALIGN(push_const_size, 64);
|
||||
curbe.CURBETotalDataLength = align(push_const_size, 64);
|
||||
curbe.CURBEDataStartAddress = curbe_data_offset;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1199,11 +1199,11 @@ fill_buffer_location(struct d3d12_context *ctx,
|
|||
buf_loc.PlacedFootprint.Footprint.Height = res->base.b.height0;
|
||||
buf_loc.PlacedFootprint.Footprint.Depth = res->base.b.depth0;
|
||||
} else {
|
||||
buf_loc.PlacedFootprint.Footprint.Width = ALIGN(trans->base.b.box.width,
|
||||
buf_loc.PlacedFootprint.Footprint.Width = align(trans->base.b.box.width,
|
||||
util_format_get_blockwidth(res->base.b.format));
|
||||
buf_loc.PlacedFootprint.Footprint.Height = ALIGN(trans->base.b.box.height,
|
||||
buf_loc.PlacedFootprint.Footprint.Height = align(trans->base.b.box.height,
|
||||
util_format_get_blockheight(res->base.b.format));
|
||||
buf_loc.PlacedFootprint.Footprint.Depth = ALIGN(depth,
|
||||
buf_loc.PlacedFootprint.Footprint.Depth = align(depth,
|
||||
util_format_get_blockdepth(res->base.b.format));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -479,8 +479,8 @@ d3d12_video_bitstream_builder_hevc::build_sps(const HevcVideoParameterSet& paren
|
|||
viewport.Width = crop_window_upper_layer.front /* passes width */ - ((crop_window_upper_layer.left + crop_window_upper_layer.right) * SubWidthC);
|
||||
viewport.Height = crop_window_upper_layer.back /* passes height */- ((crop_window_upper_layer.top + crop_window_upper_layer.bottom) * SubHeightC);
|
||||
|
||||
m_latest_sps.pic_width_in_luma_samples = ALIGN(encodeResolution.Width, picDimensionMultipleRequirement);
|
||||
m_latest_sps.pic_height_in_luma_samples = ALIGN(encodeResolution.Height, picDimensionMultipleRequirement);
|
||||
m_latest_sps.pic_width_in_luma_samples = align(encodeResolution.Width, picDimensionMultipleRequirement);
|
||||
m_latest_sps.pic_height_in_luma_samples = align(encodeResolution.Height, picDimensionMultipleRequirement);
|
||||
m_latest_sps.conf_win_right_offset = (m_latest_sps.pic_width_in_luma_samples - viewport.Width) / SubWidthC;
|
||||
m_latest_sps.conf_win_bottom_offset = (m_latest_sps.pic_height_in_luma_samples - viewport.Height) / SubHeightC;
|
||||
|
||||
|
|
|
|||
|
|
@ -90,7 +90,7 @@ emit_strides(
|
|||
if (tensor->layout == ETHOSU_LAYOUT_NHCWB16) {
|
||||
tensor_x = 16 * elem_size;
|
||||
tensor_c = tensor_x * tensor->shape.width;
|
||||
tensor_y = elem_size * tensor->shape.width * ALIGN(tensor->shape.depth, 16);
|
||||
tensor_y = elem_size * tensor->shape.width * align(tensor->shape.depth, 16);
|
||||
} else {
|
||||
tensor_c = elem_size;
|
||||
tensor_x = tensor->shape.depth * tensor_c;
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ fill_scale_and_biases(struct ethosu_subgraph *subgraph, struct ethosu_operation
|
|||
PIPE_MAP_READ, &transfer_in);
|
||||
unsigned idx = 0;
|
||||
|
||||
*scales_size = ALIGN(operation->ofm.shape.depth * 10, 16);
|
||||
*scales_size = align(operation->ofm.shape.depth * 10, 16);
|
||||
*scales = malloc(*scales_size);
|
||||
memset(*scales, 0, *scales_size);
|
||||
|
||||
|
|
|
|||
|
|
@ -228,7 +228,7 @@ ethosu_lower_concatenation(struct ethosu_subgraph *subgraph,
|
|||
if (tensor->layout == ETHOSU_LAYOUT_NHWC)
|
||||
operation->ofm.tiles.addresses[0] += poperation->input_tensors[i]->dims[3];
|
||||
else if (tensor->layout == ETHOSU_LAYOUT_NHCWB16)
|
||||
operation->ofm.tiles.addresses[0] += poperation->input_tensors[i]->dims[2] * ALIGN(poperation->input_tensors[i]->dims[3], 16);
|
||||
operation->ofm.tiles.addresses[0] += poperation->input_tensors[i]->dims[2] * align(poperation->input_tensors[i]->dims[3], 16);
|
||||
else
|
||||
assert(0 && "Unsupported layout");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -69,7 +69,7 @@ ethosu_allocate_feature_map(struct ethosu_subgraph *subgraph, struct ethosu_feat
|
|||
if (tensor->layout == ETHOSU_LAYOUT_NHWC) {
|
||||
size = tensor->shape.width * tensor->shape.height * tensor->shape.depth;
|
||||
} else if (tensor->layout == ETHOSU_LAYOUT_NHCWB16) {
|
||||
size = tensor->shape.width * tensor->shape.height * ALIGN(tensor->shape.depth, 16);
|
||||
size = tensor->shape.width * tensor->shape.height * align(tensor->shape.depth, 16);
|
||||
} else {
|
||||
assert(0 && "Unsupported layout");
|
||||
size = 0; // This should never happen
|
||||
|
|
|
|||
|
|
@ -18,11 +18,11 @@ _get_ifm_blocksize(struct ethosu_operation *operation, struct ethosu_block ofm_b
|
|||
|
||||
// IFM block height
|
||||
int h = required_input_size(ofm_block.height, operation->kernel.stride_y, MIN2(operation->kernel.height, SUB_KERNEL_MAX.height));
|
||||
h = ALIGN(h, OFM_UBLOCK.height);
|
||||
h = align(h, OFM_UBLOCK.height);
|
||||
|
||||
// IFM block width
|
||||
int w = required_input_size(ofm_block.width, operation->kernel.stride_x, MIN2(operation->kernel.width, SUB_KERNEL_MAX.width));
|
||||
w = ALIGN(w, OFM_UBLOCK.width);
|
||||
w = align(w, OFM_UBLOCK.width);
|
||||
|
||||
ifm_block.height = h;
|
||||
ifm_block.width = w;
|
||||
|
|
@ -34,8 +34,8 @@ _get_ifm_blocksize(struct ethosu_operation *operation, struct ethosu_block ofm_b
|
|||
static bool
|
||||
try_block_config(struct ethosu_operation *operation, struct ethosu_block ofm_block, struct ethosu_block ifm_block, struct ethosu_shram_layout *layout)
|
||||
{
|
||||
int ifm_bytes = ifm_block.width * ifm_block.height * ALIGN(ifm_block.depth, 8);
|
||||
int ifm_banks = ALIGN(DIV_ROUND_UP(ifm_bytes, BANK_SIZE_BYTES) * 2, IFM_GRANULE);
|
||||
int ifm_bytes = ifm_block.width * ifm_block.height * align(ifm_block.depth, 8);
|
||||
int ifm_banks = align(DIV_ROUND_UP(ifm_bytes, BANK_SIZE_BYTES) * 2, IFM_GRANULE);
|
||||
int lut_bytes = operation->type == ETHOSU_OPERATION_TYPE_ELTWISE ? operation->eltwise.lut_bytes : 0;
|
||||
int lut_banks = MAX2(DIV_ROUND_UP(lut_bytes, 1024), SHRAM_RESERVED_END_BANKS);
|
||||
int lut_start = SHRAM_TOTAL_BANKS - lut_banks;
|
||||
|
|
@ -44,8 +44,8 @@ try_block_config(struct ethosu_operation *operation, struct ethosu_block ofm_blo
|
|||
int acc_start = lut_start;
|
||||
|
||||
if (operation->type != ETHOSU_OPERATION_TYPE_ELTWISE) {
|
||||
int acc_bytes = (ofm_block.width * ofm_block.height * ALIGN(ofm_block.depth, 8) * 32) / 8;
|
||||
int acc_banks = ALIGN(DIV_ROUND_UP(acc_bytes, BANK_SIZE_BYTES) * 2, ACC_GRANULE);
|
||||
int acc_bytes = (ofm_block.width * ofm_block.height * align(ofm_block.depth, 8) * 32) / 8;
|
||||
int acc_banks = align(DIV_ROUND_UP(acc_bytes, BANK_SIZE_BYTES) * 2, ACC_GRANULE);
|
||||
acc_start -= acc_banks;
|
||||
} else {
|
||||
int ifm2_banks = ifm_banks; /* TODO: Fix for scalar eltwise */
|
||||
|
|
@ -89,12 +89,12 @@ find_block_config(struct ethosu_operation *operation)
|
|||
unsigned depth = MAX2(OFM_UBLOCK.depth, MIN2(search_space.depth, ARCH_SPLIT_DEPTH));
|
||||
|
||||
if (depth < operation->ofm.shape.depth) {
|
||||
depth = ALIGN(depth, ARCH_SPLIT_DEPTH);
|
||||
depth = align(depth, ARCH_SPLIT_DEPTH);
|
||||
}
|
||||
|
||||
search_space.width = ALIGN(search_space.width, OFM_UBLOCK.width);
|
||||
search_space.height = ALIGN(search_space.height, OFM_UBLOCK.height);
|
||||
search_space.depth = ALIGN(search_space.depth, OFM_UBLOCK.depth);
|
||||
search_space.width = align(search_space.width, OFM_UBLOCK.width);
|
||||
search_space.height = align(search_space.height, OFM_UBLOCK.height);
|
||||
search_space.depth = align(search_space.depth, OFM_UBLOCK.depth);
|
||||
|
||||
while (depth <= search_space.depth) {
|
||||
bool wont_fit[search_space.height + 1][search_space.width + 1];
|
||||
|
|
@ -110,7 +110,7 @@ find_block_config(struct ethosu_operation *operation)
|
|||
struct ethosu_block ifm_block = _get_ifm_blocksize(operation, ofm_block);
|
||||
|
||||
if (!is_equal_depth)
|
||||
ifm_block.depth = ALIGN(MIN2(operation->ifm.shape.depth, operation->conv.part_kernel_first ? 16 : 32), IFM_UBLOCK.depth);
|
||||
ifm_block.depth = align(MIN2(operation->ifm.shape.depth, operation->conv.part_kernel_first ? 16 : 32), IFM_UBLOCK.depth);
|
||||
|
||||
// Try to fit the blocks in SHRAM
|
||||
struct ethosu_shram_layout layout = {0};
|
||||
|
|
@ -179,7 +179,7 @@ find_block_config(struct ethosu_operation *operation)
|
|||
|
||||
depth += OFM_UBLOCK.depth;
|
||||
if (depth < operation->ofm.shape.depth) {
|
||||
depth = ALIGN(depth, ARCH_SPLIT_DEPTH);
|
||||
depth = align(depth, ARCH_SPLIT_DEPTH);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1167,9 +1167,9 @@ create_nn_config(struct etna_ml_subgraph *subgraph, const struct etna_operation
|
|||
unsigned in_image_tile_x_size = map->out_image_tile_x_size + weight_width - 1;
|
||||
unsigned in_image_tile_y_size = map->out_image_tile_y_size + weight_width - 1;
|
||||
image_cache_size = in_image_tile_x_size * in_image_tile_y_size;
|
||||
image_cache_size = ALIGN(image_cache_size, 16);
|
||||
image_cache_size = align(image_cache_size, 16);
|
||||
image_cache_size *= input_channels;
|
||||
image_cache_size = ALIGN(image_cache_size, 128);
|
||||
image_cache_size = align(image_cache_size, 128);
|
||||
}
|
||||
|
||||
ML_DBG("coefficients_size 0x%x (%d) image_size 0x%x (%d)\n", coef_cache_size, coef_cache_size, image_cache_size, image_cache_size);
|
||||
|
|
@ -1182,7 +1182,7 @@ create_nn_config(struct etna_ml_subgraph *subgraph, const struct etna_operation
|
|||
map->kernel_pattern_msb = 0x0;
|
||||
map->kernel_pattern_low = 0x0;
|
||||
map->kernel_pattern_high = 0x0;
|
||||
map->kernel_cache_end_address = MAX2(MIN2(ALIGN(map->kernel_cache_start_address + coef_cache_size, 128), oc_sram_size), 0xa00);
|
||||
map->kernel_cache_end_address = MAX2(MIN2(align(map->kernel_cache_start_address + coef_cache_size, 128), oc_sram_size), 0xa00);
|
||||
} else {
|
||||
/* Doesn't fit in the 512KB we have of on-chip SRAM */
|
||||
map->kernel_caching_mode = SRAM_CACHE_MODE_PARTIAL_CACHE;
|
||||
|
|
|
|||
|
|
@ -441,7 +441,7 @@ calculate_weight_bo_size(struct etna_ml_subgraph *subgraph, const struct etna_op
|
|||
struct pipe_context *context = subgraph->base.context;
|
||||
struct etna_context *ctx = etna_context(context);
|
||||
unsigned nn_core_count = etna_ml_get_core_info(ctx)->nn_core_count;
|
||||
unsigned header_size = ALIGN(nn_core_count * 4, 64);
|
||||
unsigned header_size = align(nn_core_count * 4, 64);
|
||||
unsigned input_channels = operation->addition ? 1 : operation->input_channels;
|
||||
unsigned output_channels = operation->addition ? 1 : operation->output_channels;
|
||||
unsigned cores_used = MIN2(output_channels, nn_core_count);
|
||||
|
|
@ -453,7 +453,7 @@ calculate_weight_bo_size(struct etna_ml_subgraph *subgraph, const struct etna_op
|
|||
|
||||
weights_size = operation->weight_width * operation->weight_height * input_channels;
|
||||
core_size = 1 + 2 + (weights_size + 4 + 4) * kernels_per_core;
|
||||
core_size_aligned = ALIGN(core_size, 64);
|
||||
core_size_aligned = align(core_size, 64);
|
||||
compressed_size_aligned = header_size + core_size_aligned * cores_used;
|
||||
|
||||
return compressed_size_aligned;
|
||||
|
|
@ -466,7 +466,7 @@ calculate_zrl_bits(struct etna_ml_subgraph *subgraph, const struct etna_operatio
|
|||
struct etna_context *ctx = etna_context(context);
|
||||
unsigned nn_core_count = etna_ml_get_core_info(ctx)->nn_core_count;
|
||||
unsigned max_zrl_bits = etna_ml_get_core_info(ctx)->nn_zrl_bits;
|
||||
unsigned header_size = ALIGN(nn_core_count * 4, 64);
|
||||
unsigned header_size = align(nn_core_count * 4, 64);
|
||||
unsigned input_channels = operation->addition ? 1 : operation->input_channels;
|
||||
unsigned output_channels = operation->addition ? 1 : operation->output_channels;
|
||||
unsigned cores_used = MIN2(output_channels, nn_core_count);
|
||||
|
|
@ -518,7 +518,7 @@ etna_ml_create_coeffs_v7(struct etna_ml_subgraph *subgraph, const struct etna_op
|
|||
struct pipe_context *context = subgraph->base.context;
|
||||
struct etna_context *ctx = etna_context(context);
|
||||
unsigned nn_core_count = etna_ml_get_core_info(ctx)->nn_core_count;
|
||||
unsigned header_size = ALIGN(nn_core_count * 4, 64);
|
||||
unsigned header_size = align(nn_core_count * 4, 64);
|
||||
unsigned input_channels = operation->addition ? 1 : operation->input_channels;
|
||||
unsigned output_channels = operation->addition ? 1 : operation->output_channels;
|
||||
unsigned cores_used = MIN2(output_channels, nn_core_count);
|
||||
|
|
@ -548,7 +548,7 @@ etna_ml_create_coeffs_v7(struct etna_ml_subgraph *subgraph, const struct etna_op
|
|||
else
|
||||
actual_size = write_core_sequential(subgraph, map, core, operation, zrl_bits);
|
||||
|
||||
actual_size = ALIGN(actual_size, 64);
|
||||
actual_size = align(actual_size, 64);
|
||||
max_core_size = MAX2(actual_size, max_core_size);
|
||||
|
||||
header[core] = actual_size;
|
||||
|
|
|
|||
|
|
@ -685,7 +685,7 @@ create_bo(struct etna_ml_subgraph *subgraph, const struct etna_operation *operat
|
|||
input_channels = 2 * output_channels;
|
||||
|
||||
unsigned header_size = 64;
|
||||
unsigned body_size = ALIGN(DIV_ROUND_UP(output_channels, cores_used) * (input_channels * operation->weight_height * operation->weight_width + 4 + 4), 64) * 2;
|
||||
unsigned body_size = align(DIV_ROUND_UP(output_channels, cores_used) * (input_channels * operation->weight_height * operation->weight_width + 4 + 4), 64) * 2;
|
||||
unsigned tail_size = 64;
|
||||
max_size = header_size + cores_used * body_size + tail_size;
|
||||
|
||||
|
|
@ -803,7 +803,7 @@ etna_ml_create_coeffs_v8(struct etna_ml_subgraph *subgraph, const struct etna_op
|
|||
header->symbol_map = pack_symbol_map(symbol_map);
|
||||
header->version = 1;
|
||||
|
||||
map += ALIGN(sizeof(*header), 64) / 4;
|
||||
map += align(sizeof(*header), 64) / 4;
|
||||
|
||||
encoder_init(&encoder, symbol_map, map);
|
||||
|
||||
|
|
|
|||
|
|
@ -553,7 +553,7 @@ fd6_emit_link_map(struct fd_context *ctx, fd_cs &cs,
|
|||
if (CHIP == A7XX && producer->compiler->load_shader_consts_via_preamble) {
|
||||
const struct ir3_const_state *const_state = ir3_const_state(consumer);
|
||||
int base = const_state->primitive_map_ubo.idx;
|
||||
uint32_t size = ALIGN(consumer->input_size, 4);
|
||||
uint32_t size = align(consumer->input_size, 4);
|
||||
|
||||
fd6_upload_emit_driver_ubo(ctx, cs, consumer, base, size, producer->output_loc);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -1366,8 +1366,8 @@ set_blit_scissor(struct fd_batch *batch, fd_cs &cs)
|
|||
|
||||
blit_scissor.minx = 0;
|
||||
blit_scissor.miny = 0;
|
||||
blit_scissor.maxx = ALIGN(pfb->width, 16);
|
||||
blit_scissor.maxy = ALIGN(pfb->height, 4);
|
||||
blit_scissor.maxx = align(pfb->width, 16);
|
||||
blit_scissor.maxy = align(pfb->height, 4);
|
||||
|
||||
fd_pkt4(cs, 2)
|
||||
.add(A6XX_RB_RESOLVE_CNTL_1(.x = blit_scissor.minx, .y = blit_scissor.miny))
|
||||
|
|
|
|||
|
|
@ -647,7 +647,7 @@ ir3_get_private_mem(struct fd_context *ctx, const struct ir3_shader_variant *so)
|
|||
if (ctx->pvtmem[so->pvtmem_per_wave].bo)
|
||||
fd_bo_del(ctx->pvtmem[so->pvtmem_per_wave].bo);
|
||||
|
||||
uint32_t per_sp_size = ALIGN(per_fiber_size * fibers_per_sp, 1 << 12);
|
||||
uint32_t per_sp_size = align(per_fiber_size * fibers_per_sp, 1 << 12);
|
||||
uint32_t total_size = per_sp_size * num_sp_cores;
|
||||
|
||||
ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size = per_fiber_size;
|
||||
|
|
|
|||
|
|
@ -372,7 +372,7 @@ i915_batch_submit(struct iris_batch *batch)
|
|||
.buffer_count = validation_count,
|
||||
.batch_start_offset = 0,
|
||||
/* This must be QWord aligned. */
|
||||
.batch_len = ALIGN(batch->primary_batch_size, 8),
|
||||
.batch_len = align(batch->primary_batch_size, 8),
|
||||
.flags = batch->i915.exec_flags |
|
||||
I915_EXEC_NO_RELOC |
|
||||
I915_EXEC_BATCH_FIRST |
|
||||
|
|
|
|||
|
|
@ -2305,7 +2305,7 @@ intel_aux_map_buffer_alloc(void *driver_ctx, uint32_t size)
|
|||
struct iris_bufmgr *bufmgr = (struct iris_bufmgr *)driver_ctx;
|
||||
|
||||
unsigned int page_size = getpagesize();
|
||||
size = MAX2(ALIGN(size, page_size), page_size);
|
||||
size = MAX2(align(size, page_size), page_size);
|
||||
|
||||
struct iris_bo *bo = alloc_fresh_bo(bufmgr, size, BO_ALLOC_CAPTURE);
|
||||
if (!bo) {
|
||||
|
|
|
|||
|
|
@ -398,7 +398,7 @@ emit_indirect_generate_draw(struct iris_batch *batch,
|
|||
float *vertices =
|
||||
upload_state(batch, ice->state.dynamic_uploader,
|
||||
&ice->draw.generation.vertices,
|
||||
ALIGN(9 * sizeof(float), 8), 8);
|
||||
align(9 * sizeof(float), 8), 8);
|
||||
|
||||
vertices[0] = x1; vertices[1] = y1; vertices[2] = z; /* v0 */
|
||||
vertices[3] = x0; vertices[4] = y1; vertices[5] = z; /* v1 */
|
||||
|
|
|
|||
|
|
@ -2254,7 +2254,7 @@ iris_map_tiled_memcpy(struct iris_transfer *map)
|
|||
struct iris_resource *res = (struct iris_resource *) xfer->resource;
|
||||
struct isl_surf *surf = &res->surf;
|
||||
|
||||
xfer->stride = ALIGN(surf->row_pitch_B, 16);
|
||||
xfer->stride = align(surf->row_pitch_B, 16);
|
||||
xfer->layer_stride = xfer->stride * box->height;
|
||||
|
||||
unsigned x1, x2, y1, y2;
|
||||
|
|
|
|||
|
|
@ -617,7 +617,7 @@ iris_init_identifier_bo(struct iris_screen *screen)
|
|||
|
||||
screen->workaround_address = (struct iris_address) {
|
||||
.bo = screen->workaround_bo,
|
||||
.offset = ALIGN(
|
||||
.offset = align(
|
||||
intel_debug_write_identifiers(bo_map, 4096, "Iris"), 32),
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -9385,7 +9385,7 @@ iris_upload_gpgpu_walker(struct iris_context *ice,
|
|||
vfe.URBEntryAllocationSize = 2;
|
||||
|
||||
vfe.CURBEAllocationSize =
|
||||
ALIGN(cs_data->push.per_thread.regs * dispatch.threads +
|
||||
align(cs_data->push.per_thread.regs * dispatch.threads +
|
||||
cs_data->push.cross_thread.regs, 2);
|
||||
}
|
||||
}
|
||||
|
|
@ -9403,15 +9403,15 @@ iris_upload_gpgpu_walker(struct iris_context *ice,
|
|||
uint32_t *curbe_data_map =
|
||||
stream_state(batch, ice->state.dynamic_uploader,
|
||||
&ice->state.last_res.cs_thread_ids,
|
||||
ALIGN(push_const_size, 64), 64,
|
||||
align(push_const_size, 64), 64,
|
||||
&curbe_data_offset);
|
||||
assert(curbe_data_map);
|
||||
memset(curbe_data_map, 0x5a, ALIGN(push_const_size, 64));
|
||||
memset(curbe_data_map, 0x5a, align(push_const_size, 64));
|
||||
iris_fill_cs_push_const_buffer(screen, shader, dispatch.threads,
|
||||
curbe_data_map);
|
||||
|
||||
iris_emit_cmd(batch, GENX(MEDIA_CURBE_LOAD), curbe) {
|
||||
curbe.CURBETotalDataLength = ALIGN(push_const_size, 64);
|
||||
curbe.CURBETotalDataLength = align(push_const_size, 64);
|
||||
curbe.CURBEDataStartAddress = curbe_data_offset;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ lima_ioctl_gem_create(int fd, unsigned long request, void *arg)
|
|||
|
||||
struct shim_fd *shim_fd = drm_shim_fd_lookup(fd);
|
||||
struct shim_bo *bo = calloc(1, sizeof(*bo));
|
||||
size_t size = ALIGN(create->size, 4096);
|
||||
size_t size = align(create->size, 4096);
|
||||
|
||||
drm_shim_bo_init(bo, size);
|
||||
|
||||
|
|
|
|||
|
|
@ -1343,7 +1343,7 @@ out:
|
|||
info_out->bin.maxGPR = prog->maxGPR;
|
||||
info_out->bin.code = prog->code;
|
||||
info_out->bin.codeSize = prog->binSize;
|
||||
info_out->bin.tlsSpace = ALIGN(prog->tlsSize, 0x10);
|
||||
info_out->bin.tlsSpace = align(prog->tlsSize, 0x10);
|
||||
|
||||
delete prog;
|
||||
nv50_ir::Target::destroy(targ);
|
||||
|
|
|
|||
|
|
@ -389,7 +389,7 @@ nv50_compute_validate_surfaces(struct nv50_context *nv50)
|
|||
PUSH_DATAh(push, res->address + buffer->buffer_offset);
|
||||
PUSH_DATA (push, res->address + buffer->buffer_offset);
|
||||
PUSH_DATA (push, 0); /* pitch? */
|
||||
PUSH_DATA (push, ALIGN(buffer->buffer_size, 256) - 1);
|
||||
PUSH_DATA (push, align(buffer->buffer_size, 256) - 1);
|
||||
PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
|
||||
BCTX_REFN(nv50->bufctx_cp, CP_BUF, res, RDWR);
|
||||
util_range_add(&res->base, &res->valid_buffer_range,
|
||||
|
|
@ -420,7 +420,7 @@ nv50_compute_validate_surfaces(struct nv50_context *nv50)
|
|||
PUSH_DATAh(push, address);
|
||||
PUSH_DATA (push, address);
|
||||
PUSH_DATA (push, 0); /* pitch? */
|
||||
PUSH_DATA (push, ALIGN(view->u.buf.size, 0x100) - 1);
|
||||
PUSH_DATA (push, align(view->u.buf.size, 0x100) - 1);
|
||||
PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
|
||||
} else {
|
||||
struct nv50_miptree *mt = nv50_miptree(view->resource);
|
||||
|
|
|
|||
|
|
@ -391,7 +391,7 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir,
|
|||
shader->info.nr_param_exports || shader->info.nr_prim_param_exports,
|
||||
&out_needs_scratch_ring,
|
||||
shader->wave_size,
|
||||
ALIGN(max_workgroup_size, shader->wave_size),
|
||||
align(max_workgroup_size, shader->wave_size),
|
||||
false,
|
||||
false);
|
||||
shader->info.uses_mesh_scratch_ring = out_needs_scratch_ring;
|
||||
|
|
@ -923,7 +923,7 @@ static void run_late_optimization_and_lowering_passes(struct si_nir_shader_ctx *
|
|||
|
||||
if (si_should_clear_lds(sel->screen, nir)) {
|
||||
const unsigned chunk_size = 16; /* max single store size */
|
||||
const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
|
||||
const unsigned shared_size = align(nir->info.shared_size, chunk_size);
|
||||
NIR_PASS(_, nir, nir_clear_shared_memory, shared_size, chunk_size);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -472,7 +472,7 @@ void si_shader_dump_stats_for_shader_db(struct si_screen *screen, struct si_shad
|
|||
"HSPatchOuts: %u ESOutputs: %u GSOutputs: %u VSOutputs: %u PSOutputs: %u "
|
||||
"InlineUniforms: %u DivergentLoop: %u (%s, W%u)",
|
||||
conf->num_sgprs, conf->num_vgprs, si_get_shader_binary_size(screen, shader),
|
||||
ALIGN(conf->lds_size, ac_shader_get_lds_alloc_granularity(screen->info.gfx_level)),
|
||||
align(conf->lds_size, ac_shader_get_lds_alloc_granularity(screen->info.gfx_level)),
|
||||
conf->scratch_bytes_per_wave, shader->info.max_simd_waves,
|
||||
conf->spilled_sgprs, conf->spilled_vgprs, shader->info.private_mem_vgprs,
|
||||
num_ls_outputs, num_hs_outputs,
|
||||
|
|
|
|||
|
|
@ -769,7 +769,7 @@ si_sqtt_add_code_object(struct si_context *sctx,
|
|||
record->shader_data[i].hw_stage = hw_stage;
|
||||
record->shader_data[i].is_combined = false;
|
||||
record->shader_data[i].scratch_memory_size = shader->config.scratch_bytes_per_wave;
|
||||
record->shader_data[i].lds_size = ALIGN(shader->config.lds_size, ac_shader_get_lds_alloc_granularity(sctx->gfx_level));
|
||||
record->shader_data[i].lds_size = align(shader->config.lds_size, ac_shader_get_lds_alloc_granularity(sctx->gfx_level));
|
||||
record->shader_data[i].wavefront_size = shader->wave_size;
|
||||
|
||||
record->shader_stages_mask |= 1 << i;
|
||||
|
|
|
|||
|
|
@ -31,12 +31,12 @@ rkt_fill_weights(struct rkt_ml_subgraph *subgraph,
|
|||
|
||||
input_channels = MAX2(input_channels, FEATURE_ATOMIC_SIZE);
|
||||
|
||||
output_channels = ALIGN(output_channels, 2);
|
||||
output_channels = align(output_channels, 2);
|
||||
if (rkt_is_depthwise(poperation))
|
||||
output_channels = 1;
|
||||
|
||||
weights_size = weights_width * weights_height * output_channels *
|
||||
ALIGN(input_channels, WEIGHT_ATOMIC_SIZE) * 2;
|
||||
align(input_channels, WEIGHT_ATOMIC_SIZE) * 2;
|
||||
|
||||
rsc =
|
||||
pipe_buffer_create(pcontext->screen, 0, PIPE_USAGE_DEFAULT, weights_size);
|
||||
|
|
@ -62,7 +62,7 @@ rkt_fill_weights(struct rkt_ml_subgraph *subgraph,
|
|||
unsigned oc = oc1 * WEIGHT_ATOMIC_SIZE + oc2;
|
||||
unsigned ic = ic1 * input_channel_groups + ic2;
|
||||
if (output_channels_real > 2 &&
|
||||
oc >= ALIGN(output_channels_real, 2))
|
||||
oc >= align(output_channels_real, 2))
|
||||
continue;
|
||||
|
||||
if (oc >= output_channels_real)
|
||||
|
|
|
|||
|
|
@ -105,7 +105,7 @@ compile_operation(struct rkt_ml_subgraph *subgraph,
|
|||
|
||||
unsigned size =
|
||||
util_dynarray_num_elements(®cfgs[i], uint64_t) * sizeof(uint64_t);
|
||||
regcfg_total_size += ALIGN(size, 64);
|
||||
regcfg_total_size += align(size, 64);
|
||||
}
|
||||
|
||||
operation->regcmd = pipe_buffer_create(pcontext->screen, 0,
|
||||
|
|
@ -129,13 +129,13 @@ compile_operation(struct rkt_ml_subgraph *subgraph,
|
|||
util_dynarray_element(®cfgs[i], uint64_t, reg_count - 3);
|
||||
|
||||
uint64_t addr = rkt_resource(operation->regcmd)->phys_addr +
|
||||
regcmd_offset + ALIGN(size * sizeof(uint64_t), 64);
|
||||
regcmd_offset + align(size * sizeof(uint64_t), 64);
|
||||
*next_address_reg |= addr << 16;
|
||||
|
||||
unsigned regs_to_fetch =
|
||||
util_dynarray_num_elements(®cfgs[i + 1], uint64_t);
|
||||
regs_to_fetch -= 4;
|
||||
regs_to_fetch = ALIGN(regs_to_fetch / 2, 2);
|
||||
regs_to_fetch = align(regs_to_fetch / 2, 2);
|
||||
*reg_count_reg |= regs_to_fetch << 16;
|
||||
}
|
||||
|
||||
|
|
@ -151,7 +151,7 @@ compile_operation(struct rkt_ml_subgraph *subgraph,
|
|||
rkt_dump_buffer(regcmd, "regcmd", 0, i, regcmd_offset,
|
||||
(size + 4) * sizeof(uint64_t));
|
||||
|
||||
regcmd_offset += ALIGN(size * sizeof(uint64_t), 64);
|
||||
regcmd_offset += align(size * sizeof(uint64_t), 64);
|
||||
}
|
||||
|
||||
pipe_buffer_unmap(pcontext, transfer);
|
||||
|
|
|
|||
|
|
@ -101,7 +101,7 @@ fill_task(struct rkt_ml_subgraph *subgraph,
|
|||
|
||||
task->input_height = operation->input_height;
|
||||
task->input_channels =
|
||||
ALIGN(MAX2(operation->input_channels, FEATURE_ATOMIC_SIZE),
|
||||
align(MAX2(operation->input_channels, FEATURE_ATOMIC_SIZE),
|
||||
FEATURE_ATOMIC_SIZE);
|
||||
task->input_channels_real = operation->input_channels;
|
||||
task->input_zero_point = operation->input_zero_point;
|
||||
|
|
@ -111,11 +111,11 @@ fill_task(struct rkt_ml_subgraph *subgraph,
|
|||
task->output_height = operation->output_height;
|
||||
|
||||
task->output_channels_real = operation->output_channels;
|
||||
task->output_channels = ALIGN(MAX2(operation->output_channels, 32), 32);
|
||||
task->output_channels = align(MAX2(operation->output_channels, 32), 32);
|
||||
if (operation->depthwise) {
|
||||
if (task->output_channels_real <= 32)
|
||||
task->output_channels *= 2;
|
||||
task->output_channels = ALIGN(task->output_channels, 64);
|
||||
task->output_channels = align(task->output_channels, 64);
|
||||
}
|
||||
|
||||
task->output_zero_point = operation->output_zero_point;
|
||||
|
|
@ -170,7 +170,7 @@ fill_task(struct rkt_ml_subgraph *subgraph,
|
|||
if (operation->depthwise)
|
||||
task->weights_kernels = 1;
|
||||
else
|
||||
task->weights_kernels = ALIGN(operation->output_channels, 2);
|
||||
task->weights_kernels = align(operation->output_channels, 2);
|
||||
|
||||
task->surfaces_per_row = task->output_width * task->output_height * 2;
|
||||
if (operation->depthwise)
|
||||
|
|
|
|||
|
|
@ -201,8 +201,8 @@ virgl_drm_winsys_resource_create_blob(struct virgl_winsys *qws,
|
|||
return NULL;
|
||||
|
||||
/* Make sure blob is page aligned. */
|
||||
width = ALIGN(width, getpagesize());
|
||||
size = ALIGN(size, getpagesize());
|
||||
width = align(width, getpagesize());
|
||||
size = align(size, getpagesize());
|
||||
|
||||
blob_id = p_atomic_inc_return(&qdws->blob_id);
|
||||
cmd[0] = VIRGL_CMD0(VIRGL_CCMD_PIPE_RESOURCE_CREATE, 0, VIRGL_PIPE_RES_CREATE_SIZE);
|
||||
|
|
|
|||
|
|
@ -311,8 +311,8 @@ virgl_vtest_winsys_resource_create(struct virgl_winsys *vws,
|
|||
|
||||
if ((flags & (VIRGL_RESOURCE_FLAG_MAP_PERSISTENT |
|
||||
VIRGL_RESOURCE_FLAG_MAP_COHERENT))) {
|
||||
width = ALIGN(width, getpagesize());
|
||||
size = ALIGN(size, getpagesize());
|
||||
width = align(width, getpagesize());
|
||||
size = align(size, getpagesize());
|
||||
new_handle = virgl_vtest_winsys_resource_create_blob(vws, target, format, bind,
|
||||
width, height, depth,
|
||||
array_size, last_level, nr_samples,
|
||||
|
|
|
|||
|
|
@ -89,7 +89,7 @@ static void pvr_image_init_physical_extent(struct pvr_image *image)
|
|||
if (image->vk.usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
|
||||
VK_IMAGE_USAGE_TRANSFER_DST_BIT)) {
|
||||
image->physical_extent.width =
|
||||
ALIGN(image->physical_extent.width,
|
||||
align(image->physical_extent.width,
|
||||
ROGUE_PBESTATE_REG_WORD0_LINESTRIDE_UNIT_SIZE);
|
||||
}
|
||||
}
|
||||
|
|
@ -110,11 +110,11 @@ static void pvr_image_setup_mip_levels(struct pvr_image *image)
|
|||
for (uint32_t i = 0; i < image->vk.mip_levels; i++) {
|
||||
struct pvr_mip_level *mip_level = &image->mip_levels[i];
|
||||
|
||||
mip_level->pitch = cpp * ALIGN(extent.width, extent_alignment);
|
||||
mip_level->height_pitch = ALIGN(extent.height, extent_alignment);
|
||||
mip_level->pitch = cpp * align(extent.width, extent_alignment);
|
||||
mip_level->height_pitch = align(extent.height, extent_alignment);
|
||||
mip_level->size = image->vk.samples * mip_level->pitch *
|
||||
mip_level->height_pitch *
|
||||
ALIGN(extent.depth, extent_alignment);
|
||||
align(extent.depth, extent_alignment);
|
||||
mip_level->offset = image->layer_size;
|
||||
|
||||
image->layer_size += mip_level->size;
|
||||
|
|
@ -129,11 +129,11 @@ static void pvr_image_setup_mip_levels(struct pvr_image *image)
|
|||
* were present so we need to account for that in the `layer_size`.
|
||||
*/
|
||||
while (extent.height != 1 || extent.width != 1 || extent.depth != 1) {
|
||||
const uint32_t height_pitch = ALIGN(extent.height, extent_alignment);
|
||||
const uint32_t pitch = cpp * ALIGN(extent.width, extent_alignment);
|
||||
const uint32_t height_pitch = align(extent.height, extent_alignment);
|
||||
const uint32_t pitch = cpp * align(extent.width, extent_alignment);
|
||||
|
||||
image->layer_size += image->vk.samples * pitch * height_pitch *
|
||||
ALIGN(extent.depth, extent_alignment);
|
||||
align(extent.depth, extent_alignment);
|
||||
|
||||
extent.height = u_minify(extent.height, 1);
|
||||
extent.width = u_minify(extent.width, 1);
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ VkResult pvr_init_robustness_buffer(struct pvr_device *device)
|
|||
VkResult result;
|
||||
|
||||
#define ROBUSTNESS_BUFFER_OFFSET_ALIGN16(cur_offset, add) \
|
||||
((uint16_t)ALIGN((cur_offset + (uint16_t)(add)), 16))
|
||||
((uint16_t)align((cur_offset + (uint16_t)(add)), 16))
|
||||
|
||||
robustness_buffer_offsets[PVR_ROBUSTNESS_BUFFER_FORMAT_UINT64] = offset;
|
||||
offset = ROBUSTNESS_BUFFER_OFFSET_ALIGN16(offset, sizeof(uint64_t) * 4);
|
||||
|
|
|
|||
|
|
@ -314,8 +314,8 @@ blorp_hiz_op(struct blorp_batch *batch, struct blorp_surf *surf,
|
|||
params.depth.view.base_level);
|
||||
params.y1 = u_minify(params.depth.surf.logical_level0_px.height,
|
||||
params.depth.view.base_level);
|
||||
params.x1 = ALIGN(params.x1, 8);
|
||||
params.y1 = ALIGN(params.y1, 4);
|
||||
params.x1 = align(params.x1, 8);
|
||||
params.y1 = align(params.y1, 4);
|
||||
|
||||
if (params.depth.view.base_level == 0) {
|
||||
/* TODO: What about MSAA? */
|
||||
|
|
|
|||
|
|
@ -1731,9 +1731,9 @@ blorp_surf_retile_w_to_y(const struct isl_device *isl_dev,
|
|||
const unsigned x_align = 8, y_align = info->surf.samples != 0 ? 8 : 4;
|
||||
info->surf.tiling = ISL_TILING_Y0;
|
||||
info->surf.logical_level0_px.width =
|
||||
ALIGN(info->surf.logical_level0_px.width, x_align) * 2;
|
||||
align(info->surf.logical_level0_px.width, x_align) * 2;
|
||||
info->surf.logical_level0_px.height =
|
||||
ALIGN(info->surf.logical_level0_px.height, y_align) / 2;
|
||||
align(info->surf.logical_level0_px.height, y_align) / 2;
|
||||
info->tile_x_sa *= 2;
|
||||
info->tile_y_sa /= 2;
|
||||
}
|
||||
|
|
@ -1984,8 +1984,8 @@ try_blorp_blit(struct blorp_batch *batch,
|
|||
isl_get_interleaved_msaa_px_size_sa(params->dst.surf.samples);
|
||||
params->x0 = ROUND_DOWN_TO(params->x0, 2) * px_size_sa.width;
|
||||
params->y0 = ROUND_DOWN_TO(params->y0, 2) * px_size_sa.height;
|
||||
params->x1 = ALIGN(params->x1, 2) * px_size_sa.width;
|
||||
params->y1 = ALIGN(params->y1, 2) * px_size_sa.height;
|
||||
params->x1 = align(params->x1, 2) * px_size_sa.width;
|
||||
params->y1 = align(params->y1, 2) * px_size_sa.height;
|
||||
|
||||
blorp_surf_fake_interleaved_msaa(batch->blorp->isl_dev, ¶ms->dst);
|
||||
|
||||
|
|
@ -2045,8 +2045,8 @@ try_blorp_blit(struct blorp_batch *batch,
|
|||
const unsigned y_align = params->dst.surf.samples != 0 ? 8 : 4;
|
||||
params->x0 = ROUND_DOWN_TO(params->x0, x_align) * 2;
|
||||
params->y0 = ROUND_DOWN_TO(params->y0, y_align) / 2;
|
||||
params->x1 = ALIGN(params->x1, x_align) * 2;
|
||||
params->y1 = ALIGN(params->y1, y_align) / 2;
|
||||
params->x1 = align(params->x1, x_align) * 2;
|
||||
params->y1 = align(params->y1, y_align) / 2;
|
||||
|
||||
/* Retile the surface to Y-tiled */
|
||||
blorp_surf_retile_w_to_y(batch->blorp->isl_dev, ¶ms->dst);
|
||||
|
|
|
|||
|
|
@ -387,8 +387,8 @@ get_fast_clear_rect(const struct isl_device *dev,
|
|||
|
||||
*x0 = ROUND_DOWN_TO(*x0, x_align) / x_scaledown;
|
||||
*y0 = ROUND_DOWN_TO(*y0, y_align) / y_scaledown;
|
||||
*x1 = ALIGN(*x1, x_align) / x_scaledown;
|
||||
*y1 = ALIGN(*y1, y_align) / y_scaledown;
|
||||
*x1 = align(*x1, x_align) / x_scaledown;
|
||||
*y1 = align(*y1, y_align) / y_scaledown;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -541,7 +541,7 @@ blorp_fast_clear(struct blorp_batch *batch,
|
|||
assert(surf->surf->levels > 1 ||
|
||||
surf->surf->logical_level0_px.d > 1 ||
|
||||
surf->surf->logical_level0_px.a > 1);
|
||||
const int phys_height0 = ALIGN(surf->surf->logical_level0_px.h,
|
||||
const int phys_height0 = align(surf->surf->logical_level0_px.h,
|
||||
surf->surf->image_alignment_el.h);
|
||||
unaligned_height = phys_height0 % 32;
|
||||
size_B = (int64_t)surf->surf->row_pitch_B * (phys_height0 - unaligned_height);
|
||||
|
|
@ -583,7 +583,7 @@ blorp_fast_clear(struct blorp_batch *batch,
|
|||
mem_surf.addr.offset, size_B, ISL_TILING_4);
|
||||
assert(isl_surf.logical_level0_px.h == 32);
|
||||
assert(isl_surf.logical_level0_px.a == 1);
|
||||
isl_surf.row_pitch_B = ALIGN(isl_surf.row_pitch_B, 16 * 128);
|
||||
isl_surf.row_pitch_B = align(isl_surf.row_pitch_B, 16 * 128);
|
||||
} else {
|
||||
isl_surf_from_mem(batch->blorp->isl_dev, &isl_surf,
|
||||
mem_surf.addr.offset, size_B, ISL_TILING_64);
|
||||
|
|
@ -1341,8 +1341,8 @@ blorp_ccs_resolve(struct blorp_batch *batch,
|
|||
x_scaledown = aux_fmtl->bw / 2;
|
||||
y_scaledown = aux_fmtl->bh / 2;
|
||||
}
|
||||
params.x1 = ALIGN(params.x1, x_scaledown) / x_scaledown;
|
||||
params.y1 = ALIGN(params.y1, y_scaledown) / y_scaledown;
|
||||
params.x1 = align(params.x1, x_scaledown) / x_scaledown;
|
||||
params.y1 = align(params.y1, y_scaledown) / y_scaledown;
|
||||
}
|
||||
|
||||
if (batch->blorp->isl_dev->info->ver >= 10) {
|
||||
|
|
|
|||
|
|
@ -1656,7 +1656,7 @@ blorp_get_compute_push_const(struct blorp_batch *batch,
|
|||
{
|
||||
const struct brw_cs_prog_data *cs_prog_data = params->cs_prog_data;
|
||||
const unsigned push_const_size =
|
||||
ALIGN(brw_cs_push_const_total_size(cs_prog_data, threads), 64);
|
||||
align(brw_cs_push_const_total_size(cs_prog_data, threads), 64);
|
||||
assert(cs_prog_data->push.cross_thread.size +
|
||||
cs_prog_data->push.per_thread.size == sizeof(params->wm_inputs));
|
||||
|
||||
|
|
@ -1860,7 +1860,7 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
|
|||
vfe.URBEntryAllocationSize = 2;
|
||||
|
||||
const uint32_t vfe_curbe_allocation =
|
||||
ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads +
|
||||
align(cs_prog_data->push.per_thread.regs * dispatch.threads +
|
||||
cs_prog_data->push.cross_thread.regs, 2);
|
||||
vfe.CURBEAllocationSize = vfe_curbe_allocation;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1882,7 +1882,7 @@ blorp_get_compute_push_const(struct blorp_batch *batch,
|
|||
{
|
||||
const struct elk_cs_prog_data *cs_prog_data = params->cs_prog_data;
|
||||
const unsigned push_const_size =
|
||||
ALIGN(elk_cs_push_const_total_size(cs_prog_data, threads), 64);
|
||||
align(elk_cs_push_const_total_size(cs_prog_data, threads), 64);
|
||||
assert(cs_prog_data->push.cross_thread.size +
|
||||
cs_prog_data->push.per_thread.size == sizeof(params->wm_inputs));
|
||||
|
||||
|
|
@ -1981,7 +1981,7 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
|
|||
vfe.URBEntryAllocationSize = GFX_VER >= 8 ? 2 : 0;
|
||||
|
||||
const uint32_t vfe_curbe_allocation =
|
||||
ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads +
|
||||
align(cs_prog_data->push.per_thread.regs * dispatch.threads +
|
||||
cs_prog_data->push.cross_thread.regs, 2);
|
||||
vfe.CURBEAllocationSize = vfe_curbe_allocation;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -135,7 +135,7 @@ intel_get_urb_config(const struct intel_device_info *devinfo,
|
|||
* Round them all up.
|
||||
*/
|
||||
for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
|
||||
min_entries[i] = ALIGN(min_entries[i], granularity[i]);
|
||||
min_entries[i] = align(min_entries[i], granularity[i]);
|
||||
}
|
||||
|
||||
unsigned entry_size_bytes[4];
|
||||
|
|
@ -323,7 +323,7 @@ intel_get_mesh_urb_config(const struct intel_device_info *devinfo,
|
|||
* in slices beyond Slice0) of the MESH URB allocation, specified in
|
||||
* multiples of 8 KB.
|
||||
*/
|
||||
push_constant_kb = ALIGN(push_constant_kb, 8);
|
||||
push_constant_kb = align(push_constant_kb, 8);
|
||||
total_urb_kb -= push_constant_kb;
|
||||
const unsigned total_urb_avail_mesh_task_kb = total_urb_kb;
|
||||
|
||||
|
|
@ -360,9 +360,9 @@ intel_get_mesh_urb_config(const struct intel_device_info *devinfo,
|
|||
*/
|
||||
const unsigned min_mesh_entries = urb_cfg->size[MESA_SHADER_MESH] < 9 ? 8 : 1;
|
||||
const unsigned min_task_entries = urb_cfg->size[MESA_SHADER_TASK] < 9 ? 8 : 1;
|
||||
const unsigned min_mesh_urb_kb = ALIGN(urb_cfg->size[MESA_SHADER_MESH] *
|
||||
const unsigned min_mesh_urb_kb = align(urb_cfg->size[MESA_SHADER_MESH] *
|
||||
min_mesh_entries * 64, 1024) / 1024;
|
||||
const unsigned min_task_urb_kb = ALIGN(urb_cfg->size[MESA_SHADER_TASK] *
|
||||
const unsigned min_task_urb_kb = align(urb_cfg->size[MESA_SHADER_TASK] *
|
||||
min_task_entries * 64, 1024) / 1024;
|
||||
|
||||
total_urb_kb -= (min_mesh_urb_kb + min_task_urb_kb);
|
||||
|
|
@ -380,8 +380,8 @@ intel_get_mesh_urb_config(const struct intel_device_info *devinfo,
|
|||
* in slices beyond Slice0) of the TASK URB allocation, specified in
|
||||
* multiples of 8 KB.
|
||||
*/
|
||||
if ((total_urb_avail_mesh_task_kb - ALIGN(mesh_urb_kb, 8)) >= min_task_entries) {
|
||||
mesh_urb_kb = ALIGN(mesh_urb_kb, 8);
|
||||
if ((total_urb_avail_mesh_task_kb - align(mesh_urb_kb, 8)) >= min_task_entries) {
|
||||
mesh_urb_kb = align(mesh_urb_kb, 8);
|
||||
} else {
|
||||
mesh_urb_kb = ROUND_DOWN_TO(mesh_urb_kb, 8);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1219,7 +1219,7 @@ remap_attr_reg(brw_shader &s,
|
|||
const bool per_prim = src.nr < prog_data->num_per_primitive_inputs;
|
||||
const unsigned base = urb_start +
|
||||
(per_prim ? 0 :
|
||||
ALIGN(prog_data->num_per_primitive_inputs / 2,
|
||||
align(prog_data->num_per_primitive_inputs / 2,
|
||||
reg_unit(s.devinfo)) * s.max_polygons);
|
||||
const unsigned idx = per_prim ? src.nr :
|
||||
src.nr - prog_data->num_per_primitive_inputs;
|
||||
|
|
|
|||
|
|
@ -223,7 +223,7 @@ brw_compile_gs(const struct brw_compiler *compiler,
|
|||
|
||||
/* 1 HWORD = 32 bytes = 256 bits */
|
||||
prog_data->control_data_header_size_hwords =
|
||||
ALIGN(control_data_header_size_bits, 256) / 256;
|
||||
align(control_data_header_size_bits, 256) / 256;
|
||||
|
||||
/* Compute the output vertex size.
|
||||
*
|
||||
|
|
@ -276,7 +276,7 @@ brw_compile_gs(const struct brw_compiler *compiler,
|
|||
unsigned output_vertex_size_bytes = prog_data->base.vue_map.num_slots * 16;
|
||||
assert(output_vertex_size_bytes <= GFX7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES);
|
||||
prog_data->output_vertex_size_hwords =
|
||||
ALIGN(output_vertex_size_bytes, 32) / 32;
|
||||
align(output_vertex_size_bytes, 32) / 32;
|
||||
|
||||
/* Compute URB entry size. The maximum allowed URB entry size is 32k.
|
||||
* That divides up as follows:
|
||||
|
|
@ -330,7 +330,7 @@ brw_compile_gs(const struct brw_compiler *compiler,
|
|||
|
||||
|
||||
/* URB entry sizes are stored as a multiple of 64 bytes in gfx7+. */
|
||||
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
|
||||
prog_data->base.urb_entry_size = align(output_size_bytes, 64) / 64;
|
||||
|
||||
assert(nir->info.gs.output_primitive < ARRAY_SIZE(gl_prim_to_hw_prim));
|
||||
prog_data->output_topology =
|
||||
|
|
|
|||
|
|
@ -124,7 +124,7 @@ brw_nir_lower_tue_outputs(nir_shader *nir, brw_tue_map *map)
|
|||
NIR_PASS(_, nir, nir_lower_explicit_io,
|
||||
nir_var_mem_task_payload, nir_address_format_32bit_offset);
|
||||
|
||||
map->size_dw = ALIGN(DIV_ROUND_UP(nir->info.task_payload_size, 4), 8);
|
||||
map->size_dw = align(DIV_ROUND_UP(nir->info.task_payload_size, 4), 8);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -203,7 +203,7 @@ brw_nir_align_launch_mesh_workgroups_instr(nir_builder *b,
|
|||
/* This will avoid special case in nir_lower_task_shader dealing with
|
||||
* not vec4-aligned payload when payload_in_shared workaround is enabled.
|
||||
*/
|
||||
nir_intrinsic_set_range(intrin, ALIGN(range, 16));
|
||||
nir_intrinsic_set_range(intrin, align(range, 16));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -505,7 +505,7 @@ brw_nir_lower_tue_inputs(nir_shader *nir, const brw_tue_map *map)
|
|||
/* The types for Task Output and Mesh Input should match, so their sizes
|
||||
* should also match.
|
||||
*/
|
||||
assert(!map || map->size_dw == ALIGN(DIV_ROUND_UP(nir->info.task_payload_size, 4), 8));
|
||||
assert(!map || map->size_dw == align(DIV_ROUND_UP(nir->info.task_payload_size, 4), 8));
|
||||
} else {
|
||||
/* Mesh doesn't read any input, to make it clearer set the
|
||||
* task_payload_size to zero instead of keeping an incomplete size that
|
||||
|
|
|
|||
|
|
@ -263,7 +263,7 @@ brw_compile_tcs(const struct brw_compiler *compiler,
|
|||
return NULL;
|
||||
|
||||
/* URB entry sizes are stored as a multiple of 64 bytes. */
|
||||
vue_prog_data->urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
|
||||
vue_prog_data->urb_entry_size = align(output_size_bytes, 64) / 64;
|
||||
|
||||
/* HS does not use the usual payload pushing from URB to GRFs,
|
||||
* because we don't have enough registers for a full-size payload, and
|
||||
|
|
|
|||
|
|
@ -133,7 +133,7 @@ brw_compile_tes(const struct brw_compiler *compiler,
|
|||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
|
||||
|
||||
/* URB entry sizes are stored as a multiple of 64 bytes. */
|
||||
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
|
||||
prog_data->base.urb_entry_size = align(output_size_bytes, 64) / 64;
|
||||
|
||||
prog_data->base.urb_read_length = 0;
|
||||
|
||||
|
|
|
|||
|
|
@ -407,7 +407,7 @@ brw_append_insns(struct brw_codegen *p, unsigned nr_insn, unsigned alignment)
|
|||
assert(util_is_power_of_two_or_zero(sizeof(brw_eu_inst)));
|
||||
assert(util_is_power_of_two_or_zero(alignment));
|
||||
const unsigned align_insn = MAX2(alignment / sizeof(brw_eu_inst), 1);
|
||||
const unsigned start_insn = ALIGN(p->nr_insn, align_insn);
|
||||
const unsigned start_insn = align(p->nr_insn, align_insn);
|
||||
const unsigned new_nr_insn = start_insn + nr_insn;
|
||||
|
||||
if (p->store_size < new_nr_insn) {
|
||||
|
|
|
|||
|
|
@ -7295,7 +7295,7 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
|
|||
components = last_component - first_read_component + 1;
|
||||
}
|
||||
|
||||
total = ALIGN(components, REG_SIZE * reg_unit(devinfo) / 4);
|
||||
total = align(components, REG_SIZE * reg_unit(devinfo) / 4);
|
||||
dest = ubld.vgrf(BRW_TYPE_UD, total);
|
||||
} else {
|
||||
total = components * bld.dispatch_width();
|
||||
|
|
@ -7902,7 +7902,7 @@ brw_from_nir(brw_shader *s)
|
|||
*/
|
||||
brw_from_nir_setup_outputs(ntb);
|
||||
brw_from_nir_emit_system_values(ntb);
|
||||
ntb.s.last_scratch = ALIGN(ntb.nir->scratch_size, 4) * ntb.s.dispatch_width;
|
||||
ntb.s.last_scratch = align(ntb.nir->scratch_size, 4) * ntb.s.dispatch_width;
|
||||
|
||||
brw_from_nir_emit_impl(ntb, nir_shader_get_entrypoint((nir_shader *)ntb.nir));
|
||||
|
||||
|
|
|
|||
|
|
@ -527,7 +527,7 @@ brw_flag_mask(const brw_inst *inst, unsigned width)
|
|||
assert(util_is_power_of_two_nonzero(width));
|
||||
const unsigned start = (inst->flag_subreg * 16 + inst->group) &
|
||||
~(width - 1);
|
||||
const unsigned end = start + ALIGN(inst->exec_size, width);
|
||||
const unsigned end = start + align(inst->exec_size, width);
|
||||
return ((1 << DIV_ROUND_UP(end, 8)) - 1) & ~((1 << (start / 8)) - 1);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -467,7 +467,7 @@ brw_lower_find_live_channel(brw_shader &s)
|
|||
* specified quarter control as result.
|
||||
*/
|
||||
if (inst->group > 0)
|
||||
ubld.SHR(mask, mask, brw_imm_ud(ALIGN(inst->group, 8)));
|
||||
ubld.SHR(mask, mask, brw_imm_ud(align(inst->group, 8)));
|
||||
|
||||
ubld.AND(mask, exec_mask, mask);
|
||||
exec_mask = mask;
|
||||
|
|
|
|||
|
|
@ -150,7 +150,7 @@ analyze_ubos_block(struct ubo_analysis_state *state, nir_block *block)
|
|||
nir_def_last_component_read(&intrin->def) + 1;
|
||||
const int bytes = num_components * (intrin->def.bit_size / 8);
|
||||
const int start = ROUND_DOWN_TO(byte_offset, sizeof_GRF);
|
||||
const int end = ALIGN(byte_offset + bytes, sizeof_GRF);
|
||||
const int end = align(byte_offset + bytes, sizeof_GRF);
|
||||
const int chunks = (end - start) / sizeof_GRF;
|
||||
|
||||
/* TODO: should we count uses in loops as higher benefit? */
|
||||
|
|
|
|||
|
|
@ -1574,7 +1574,7 @@ brw_opt_combine_constants(brw_shader &s)
|
|||
* seem to have additional alignment requirements, so account for that
|
||||
* too.
|
||||
*/
|
||||
assert(reg.offset == ALIGN(reg.offset, get_alignment_for_imm(imm)));
|
||||
assert(reg.offset == align(reg.offset, get_alignment_for_imm(imm)));
|
||||
|
||||
struct brw_reg imm_reg = build_imm_reg_for_copy(imm);
|
||||
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ brw_assign_regs_trivial(brw_shader &s)
|
|||
int reg_width = s.dispatch_width / 8;
|
||||
|
||||
/* Note that compressed instructions require alignment to 2 registers. */
|
||||
hw_reg_mapping[0] = ALIGN(s.first_non_payload_grf, reg_width);
|
||||
hw_reg_mapping[0] = align(s.first_non_payload_grf, reg_width);
|
||||
for (i = 1; i <= s.alloc.count; i++) {
|
||||
hw_reg_mapping[i] = (hw_reg_mapping[i - 1] +
|
||||
DIV_ROUND_UP(s.alloc.sizes[i - 1],
|
||||
|
|
@ -264,7 +264,7 @@ public:
|
|||
* for reg_width == 2.
|
||||
*/
|
||||
int reg_width = fs->dispatch_width / 8;
|
||||
payload_node_count = ALIGN(fs->first_non_payload_grf, reg_width);
|
||||
payload_node_count = align(fs->first_non_payload_grf, reg_width);
|
||||
|
||||
/* Get payload IP information */
|
||||
payload_last_use_ip = ralloc_array(mem_ctx, int, payload_node_count);
|
||||
|
|
@ -1180,7 +1180,7 @@ brw_reg_alloc::choose_spill_reg()
|
|||
brw_reg
|
||||
brw_reg_alloc::alloc_spill_reg(unsigned size, int ip)
|
||||
{
|
||||
int vgrf = brw_allocate_vgrf_units(*fs, ALIGN(size, reg_unit(devinfo))).nr;
|
||||
int vgrf = brw_allocate_vgrf_units(*fs, align(size, reg_unit(devinfo))).nr;
|
||||
int class_idx = DIV_ROUND_UP(size, reg_unit(devinfo)) - 1;
|
||||
int n = ra_add_node(g, compiler->reg_set.classes[class_idx]);
|
||||
assert(n == first_vgrf_node + vgrf);
|
||||
|
|
@ -1216,7 +1216,7 @@ brw_reg_alloc::spill_reg(unsigned spill_reg)
|
|||
{
|
||||
int size = fs->alloc.sizes[spill_reg];
|
||||
unsigned int spill_offset = fs->last_scratch;
|
||||
assert(ALIGN(spill_offset, 16) == spill_offset); /* oword read/write req. */
|
||||
assert(align(spill_offset, 16) == spill_offset); /* oword read/write req. */
|
||||
|
||||
fs->spilled_any_registers = true;
|
||||
|
||||
|
|
|
|||
|
|
@ -233,7 +233,7 @@ brw_rt_compute_scratch_layout(struct brw_rt_scratch_layout *layout,
|
|||
*/
|
||||
assert(size % 64 == 0);
|
||||
layout->sw_stack_start = size;
|
||||
layout->sw_stack_size = ALIGN(sw_stack_size, 64);
|
||||
layout->sw_stack_size = align(sw_stack_size, 64);
|
||||
|
||||
/* Currently it's always the case that sw_stack_size is a power of
|
||||
* two, but power-of-two SW stack sizes are prone to causing
|
||||
|
|
|
|||
|
|
@ -570,7 +570,7 @@ elk_append_insns(struct elk_codegen *p, unsigned nr_insn, unsigned alignment)
|
|||
assert(util_is_power_of_two_or_zero(sizeof(elk_inst)));
|
||||
assert(util_is_power_of_two_or_zero(alignment));
|
||||
const unsigned align_insn = MAX2(alignment / sizeof(elk_inst), 1);
|
||||
const unsigned start_insn = ALIGN(p->nr_insn, align_insn);
|
||||
const unsigned start_insn = align(p->nr_insn, align_insn);
|
||||
const unsigned new_nr_insn = start_insn + nr_insn;
|
||||
|
||||
if (p->store_size < new_nr_insn) {
|
||||
|
|
|
|||
|
|
@ -945,7 +945,7 @@ namespace {
|
|||
assert(util_is_power_of_two_nonzero(width));
|
||||
const unsigned start = (inst->flag_subreg * 16 + inst->group) &
|
||||
~(width - 1);
|
||||
const unsigned end = start + ALIGN(inst->exec_size, width);
|
||||
const unsigned end = start + align(inst->exec_size, width);
|
||||
return ((1 << DIV_ROUND_UP(end, 8)) - 1) & ~((1 << (start / 8)) - 1);
|
||||
}
|
||||
|
||||
|
|
@ -1525,7 +1525,7 @@ elk_fs_visitor::assign_urb_setup()
|
|||
const bool per_prim = inst->src[i].nr < prog_data->num_per_primitive_inputs;
|
||||
const unsigned base = urb_start +
|
||||
(per_prim ? 0 :
|
||||
ALIGN(prog_data->num_per_primitive_inputs / 2,
|
||||
align(prog_data->num_per_primitive_inputs / 2,
|
||||
reg_unit(devinfo)));
|
||||
const unsigned idx = per_prim ? inst->src[i].nr :
|
||||
inst->src[i].nr - prog_data->num_per_primitive_inputs;
|
||||
|
|
@ -5296,7 +5296,7 @@ elk_fs_visitor::lower_find_live_channel()
|
|||
* specified quarter control as result.
|
||||
*/
|
||||
if (inst->group > 0)
|
||||
ubld.SHR(mask, mask, elk_imm_ud(ALIGN(inst->group, 8)));
|
||||
ubld.SHR(mask, mask, elk_imm_ud(align(inst->group, 8)));
|
||||
|
||||
ubld.AND(mask, exec_mask, mask);
|
||||
exec_mask = mask;
|
||||
|
|
@ -6003,7 +6003,7 @@ elk_fs_visitor::allocate_registers(bool allow_spilling)
|
|||
* field documentation, platforms prior to Haswell measure scratch
|
||||
* size linearly with a range of [1kB, 12kB] and 1kB granularity.
|
||||
*/
|
||||
prog_data->total_scratch = ALIGN(last_scratch, 1024);
|
||||
prog_data->total_scratch = align(last_scratch, 1024);
|
||||
max_scratch_size = 12 * 1024;
|
||||
}
|
||||
}
|
||||
|
|
@ -6666,7 +6666,7 @@ elk_nir_populate_wm_prog_data(nir_shader *shader,
|
|||
static inline int
|
||||
elk_register_blocks(int reg_count)
|
||||
{
|
||||
return ALIGN(reg_count, 16) / 16 - 1;
|
||||
return align(reg_count, 16) / 16 - 1;
|
||||
}
|
||||
|
||||
const unsigned *
|
||||
|
|
|
|||
|
|
@ -1461,7 +1461,7 @@ elk_fs_visitor::opt_combine_constants()
|
|||
* instructions seem to have additional alignment requirements, so
|
||||
* account for that too.
|
||||
*/
|
||||
reg.offset = ALIGN(reg.offset, get_alignment_for_imm(imm));
|
||||
reg.offset = align(reg.offset, get_alignment_for_imm(imm));
|
||||
|
||||
/* Ensure we have enough space in the register to copy the immediate */
|
||||
if (reg.offset + imm->size > REG_SIZE) {
|
||||
|
|
@ -1555,7 +1555,7 @@ elk_fs_visitor::opt_combine_constants()
|
|||
* seem to have additional alignment requirements, so account for that
|
||||
* too.
|
||||
*/
|
||||
assert(reg.offset == ALIGN(reg.offset, get_alignment_for_imm(imm)));
|
||||
assert(reg.offset == align(reg.offset, get_alignment_for_imm(imm)));
|
||||
|
||||
struct elk_reg imm_reg = build_imm_reg_for_copy(imm);
|
||||
|
||||
|
|
|
|||
|
|
@ -5036,7 +5036,7 @@ fs_nir_emit_intrinsic(nir_to_elk_state &ntb,
|
|||
}
|
||||
|
||||
const unsigned total_dwords =
|
||||
ALIGN(instr->num_components, REG_SIZE * reg_unit(devinfo) / 4);
|
||||
align(instr->num_components, REG_SIZE * reg_unit(devinfo) / 4);
|
||||
unsigned loaded_dwords = 0;
|
||||
|
||||
const elk_fs_reg packed_consts =
|
||||
|
|
@ -5225,7 +5225,7 @@ fs_nir_emit_intrinsic(nir_to_elk_state &ntb,
|
|||
break;
|
||||
|
||||
case nir_intrinsic_load_global_constant_uniform_block_intel: {
|
||||
const unsigned total_dwords = ALIGN(instr->num_components,
|
||||
const unsigned total_dwords = align(instr->num_components,
|
||||
REG_SIZE * reg_unit(devinfo) / 4);
|
||||
unsigned loaded_dwords = 0;
|
||||
|
||||
|
|
@ -5360,7 +5360,7 @@ fs_nir_emit_intrinsic(nir_to_elk_state &ntb,
|
|||
srcs[SURFACE_LOGICAL_SRC_SURFACE] = elk_fs_reg(elk_imm_ud(GFX7_BTI_SLM));
|
||||
}
|
||||
|
||||
const unsigned total_dwords = ALIGN(instr->num_components,
|
||||
const unsigned total_dwords = align(instr->num_components,
|
||||
REG_SIZE * reg_unit(devinfo) / 4);
|
||||
unsigned loaded_dwords = 0;
|
||||
|
||||
|
|
@ -7002,7 +7002,7 @@ nir_to_elk(elk_fs_visitor *s)
|
|||
fs_nir_setup_outputs(ntb);
|
||||
fs_nir_setup_uniforms(ntb.s);
|
||||
fs_nir_emit_system_values(ntb);
|
||||
ntb.s.last_scratch = ALIGN(ntb.nir->scratch_size, 4) * ntb.s.dispatch_width;
|
||||
ntb.s.last_scratch = align(ntb.nir->scratch_size, 4) * ntb.s.dispatch_width;
|
||||
|
||||
fs_nir_emit_impl(ntb, nir_shader_get_entrypoint((nir_shader *)ntb.nir));
|
||||
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ elk_fs_visitor::assign_regs_trivial()
|
|||
int reg_width = dispatch_width / 8;
|
||||
|
||||
/* Note that compressed instructions require alignment to 2 registers. */
|
||||
hw_reg_mapping[0] = ALIGN(this->first_non_payload_grf, reg_width);
|
||||
hw_reg_mapping[0] = align(this->first_non_payload_grf, reg_width);
|
||||
for (i = 1; i <= this->alloc.count; i++) {
|
||||
hw_reg_mapping[i] = (hw_reg_mapping[i - 1] +
|
||||
DIV_ROUND_UP(this->alloc.sizes[i - 1],
|
||||
|
|
@ -325,7 +325,7 @@ public:
|
|||
*/
|
||||
int reg_width = fs->dispatch_width / 8;
|
||||
rsi = util_logbase2(reg_width);
|
||||
payload_node_count = ALIGN(fs->first_non_payload_grf, reg_width);
|
||||
payload_node_count = align(fs->first_non_payload_grf, reg_width);
|
||||
|
||||
/* Get payload IP information */
|
||||
payload_last_use_ip = ralloc_array(mem_ctx, int, payload_node_count);
|
||||
|
|
@ -940,7 +940,7 @@ elk_fs_reg_alloc::choose_spill_reg()
|
|||
elk_fs_reg
|
||||
elk_fs_reg_alloc::alloc_spill_reg(unsigned size, int ip)
|
||||
{
|
||||
int vgrf = fs->alloc.allocate(ALIGN(size, reg_unit(devinfo)));
|
||||
int vgrf = fs->alloc.allocate(align(size, reg_unit(devinfo)));
|
||||
int class_idx = DIV_ROUND_UP(size, reg_unit(devinfo)) - 1;
|
||||
int n = ra_add_node(g, compiler->fs_reg_sets[rsi].classes[class_idx]);
|
||||
assert(n == first_vgrf_node + vgrf);
|
||||
|
|
@ -975,7 +975,7 @@ elk_fs_reg_alloc::spill_reg(unsigned spill_reg)
|
|||
{
|
||||
int size = fs->alloc.sizes[spill_reg];
|
||||
unsigned int spill_offset = fs->last_scratch;
|
||||
assert(ALIGN(spill_offset, 16) == spill_offset); /* oword read/write req. */
|
||||
assert(align(spill_offset, 16) == spill_offset); /* oword read/write req. */
|
||||
|
||||
/* Spills may use MRFs 13-15 in the SIMD16 case. Our texturing is done
|
||||
* using up to 11 MRFs starting from either m1 or m2, and fb writes can use
|
||||
|
|
|
|||
|
|
@ -160,7 +160,7 @@ analyze_ubos_block(struct ubo_analysis_state *state, nir_block *block)
|
|||
const int bytes = nir_intrinsic_dest_components(intrin) *
|
||||
(intrin->def.bit_size / 8);
|
||||
const int start = ROUND_DOWN_TO(byte_offset, 32);
|
||||
const int end = ALIGN(byte_offset + bytes, 32);
|
||||
const int end = align(byte_offset + bytes, 32);
|
||||
const int chunks = (end - start) / 32;
|
||||
|
||||
/* TODO: should we count uses in loops as higher benefit? */
|
||||
|
|
|
|||
|
|
@ -1301,7 +1301,7 @@ elk_compile_tes(const struct elk_compiler *compiler,
|
|||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
|
||||
|
||||
/* URB entry sizes are stored as a multiple of 64 bytes. */
|
||||
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
|
||||
prog_data->base.urb_entry_size = align(output_size_bytes, 64) / 64;
|
||||
|
||||
prog_data->base.urb_read_length = 0;
|
||||
|
||||
|
|
|
|||
|
|
@ -120,7 +120,7 @@ vec4_gs_visitor::setup_varying_inputs(int payload_reg,
|
|||
}
|
||||
}
|
||||
|
||||
int regs_used = ALIGN(input_array_stride * num_input_vertices,
|
||||
int regs_used = align(input_array_stride * num_input_vertices,
|
||||
attributes_per_reg) / attributes_per_reg;
|
||||
return payload_reg + regs_used;
|
||||
}
|
||||
|
|
@ -672,7 +672,7 @@ elk_compile_gs(const struct elk_compiler *compiler,
|
|||
|
||||
/* 1 HWORD = 32 bytes = 256 bits */
|
||||
prog_data->control_data_header_size_hwords =
|
||||
ALIGN(c.control_data_header_size_bits, 256) / 256;
|
||||
align(c.control_data_header_size_bits, 256) / 256;
|
||||
|
||||
/* Compute the output vertex size.
|
||||
*
|
||||
|
|
@ -726,7 +726,7 @@ elk_compile_gs(const struct elk_compiler *compiler,
|
|||
assert(compiler->devinfo->ver == 6 ||
|
||||
output_vertex_size_bytes <= GFX7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES);
|
||||
prog_data->output_vertex_size_hwords =
|
||||
ALIGN(output_vertex_size_bytes, 32) / 32;
|
||||
align(output_vertex_size_bytes, 32) / 32;
|
||||
|
||||
/* Compute URB entry size. The maximum allowed URB entry size is 32k.
|
||||
* That divides up as follows:
|
||||
|
|
@ -793,9 +793,9 @@ elk_compile_gs(const struct elk_compiler *compiler,
|
|||
* a multiple of 128 bytes in gfx6.
|
||||
*/
|
||||
if (compiler->devinfo->ver >= 7) {
|
||||
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
|
||||
prog_data->base.urb_entry_size = align(output_size_bytes, 64) / 64;
|
||||
} else {
|
||||
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128;
|
||||
prog_data->base.urb_entry_size = align(output_size_bytes, 128) / 128;
|
||||
}
|
||||
|
||||
assert(nir->info.gs.output_primitive < ARRAY_SIZE(elk::gl_prim_to_hw_prim));
|
||||
|
|
|
|||
|
|
@ -423,7 +423,7 @@ elk_compile_tcs(const struct elk_compiler *compiler,
|
|||
return NULL;
|
||||
|
||||
/* URB entry sizes are stored as a multiple of 64 bytes. */
|
||||
vue_prog_data->urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
|
||||
vue_prog_data->urb_entry_size = align(output_size_bytes, 64) / 64;
|
||||
|
||||
/* HS does not use the usual payload pushing from URB to GRFs,
|
||||
* because we don't have enough registers for a full-size payload, and
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@
|
|||
#define FILE_DEBUG_FLAG DEBUG_TEXTURE
|
||||
|
||||
#define ALIGN_DOWN(a, b) ROUND_DOWN_TO(a, b)
|
||||
#define ALIGN_UP(a, b) ALIGN(a, b)
|
||||
#define ALIGN_UP(a, b) align(a, b)
|
||||
|
||||
/* Tile dimensions. Width and span are in bytes, height is in pixels (i.e.
|
||||
* unitless). A "span" is the most number of bytes we can copy from linear
|
||||
|
|
|
|||
|
|
@ -94,7 +94,7 @@ write_execlists_header(struct aub_file *aub, const char *name)
|
|||
app_name_len =
|
||||
snprintf(app_name, sizeof(app_name), "PCI-ID=0x%X %s",
|
||||
aub->pci_id, name);
|
||||
app_name_len = ALIGN(app_name_len, sizeof(uint32_t));
|
||||
app_name_len = align(app_name_len, sizeof(uint32_t));
|
||||
|
||||
dwords = 5 + app_name_len / sizeof(uint32_t);
|
||||
dword_out(aub, CMD_MEM_TRACE_VERSION | (dwords - 1));
|
||||
|
|
@ -189,7 +189,7 @@ mem_trace_memory_write_header_out(struct aub_file *aub, uint64_t addr,
|
|||
uint32_t len, uint32_t addr_space,
|
||||
const char *desc)
|
||||
{
|
||||
uint32_t dwords = ALIGN(len, sizeof(uint32_t)) / sizeof(uint32_t);
|
||||
uint32_t dwords = align(len, sizeof(uint32_t)) / sizeof(uint32_t);
|
||||
|
||||
if (aub->verbose_log_file) {
|
||||
fprintf(aub->verbose_log_file,
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue