ac/nir: switch nir_load_smem_amd uses to ac_nir_load_smem wrapper
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

ac_nir_load_smem will use load_global_amd

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37101>
This commit is contained in:
Marek Olšák 2025-08-08 17:05:35 -04:00
parent 4c87d002e3
commit 9e16ed7a13
9 changed files with 47 additions and 31 deletions

View file

@ -309,6 +309,19 @@ ac_get_global_ids(nir_builder *b, unsigned num_components, unsigned bit_size)
return nir_iadd(b, nir_imul(b, block_ids, block_size), local_ids);
}
nir_def *
ac_nir_load_smem(nir_builder *b, unsigned num_components, nir_def *addr, nir_def *offset,
unsigned align_mul, enum gl_access_qualifier access)
{
/* Only 1 flag is allowed. */
assert(!(access & ~ACCESS_CAN_SPECULATE));
assert(align_mul >= 4 && util_is_power_of_two_nonzero(align_mul));
return nir_load_smem_amd(b, num_components, addr, offset,
.align_mul = align_mul,
.access = access | ACCESS_CAN_REORDER | ACCESS_NON_WRITEABLE);
}
unsigned
ac_nir_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer)
{

View file

@ -76,6 +76,10 @@ nir_def *
ac_nir_unpack_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
unsigned rshift, unsigned bitwidth);
nir_def *
ac_nir_load_smem(nir_builder *b, unsigned num_components, nir_def *addr, nir_def *offset,
unsigned align_mul, enum gl_access_qualifier access);
bool ac_nir_lower_sin_cos(nir_shader *shader);
bool ac_nir_lower_intrinsics_to_args(nir_shader *shader, const enum amd_gfx_level gfx_level,

View file

@ -50,7 +50,7 @@ load_desc_ptr(nir_builder *b, apply_layout_state *state, unsigned set)
if (user_sgprs_locs->shader_data[AC_UD_INDIRECT_DESCRIPTOR_SETS].sgpr_idx != -1) {
nir_def *addr = get_scalar_arg(b, 1, state->args->descriptor_sets[0]);
addr = convert_pointer_to_64_bit(b, state, addr);
return nir_load_smem_amd(b, 1, addr, nir_imm_int(b, set * 4));
return ac_nir_load_smem(b, 1, addr, nir_imm_int(b, set * 4), 4, 0);
}
assert(state->args->descriptor_sets[set].used);
@ -168,7 +168,7 @@ load_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_def *rsrc,
return nir_iadd(b, nir_channel(b, rsrc, 0), nir_channel(b, rsrc, 1));
nir_def *desc_set = convert_pointer_to_64_bit(b, state, nir_channel(b, rsrc, 0));
return nir_load_smem_amd(b, 4, desc_set, nir_channel(b, rsrc, 1), .align_mul = 16);
return ac_nir_load_smem(b, 4, desc_set, nir_channel(b, rsrc, 1), 16, 0);
}
static void
@ -186,7 +186,7 @@ visit_get_ssbo_size(nir_builder *b, apply_layout_state *state, nir_intrinsic_ins
} else {
/* load the entire descriptor so it can be CSE'd */
nir_def *ptr = convert_pointer_to_64_bit(b, state, nir_channel(b, rsrc, 0));
nir_def *desc = nir_load_smem_amd(b, 4, ptr, nir_channel(b, rsrc, 1), .align_mul = 16);
nir_def *desc = ac_nir_load_smem(b, 4, ptr, nir_channel(b, rsrc, 1), 16, 0);
size = nir_channel(b, desc, 2);
}
@ -280,7 +280,7 @@ get_sampler_desc(nir_builder *b, apply_layout_state *state, nir_deref_instr *der
return nir_iadd(b, load_desc_ptr(b, state, desc_set), index_offset);
nir_def *addr = convert_pointer_to_64_bit(b, state, load_desc_ptr(b, state, desc_set));
nir_def *desc = nir_load_smem_amd(b, size, addr, index_offset, .align_mul = size * 4u);
nir_def *desc = ac_nir_load_smem(b, size, addr, index_offset, size * 4u, 0);
if (desc_type == AC_DESC_IMAGE && state->has_image_load_dcc_bug && !tex && !write) {
nir_def *comp[8];
@ -395,7 +395,7 @@ load_push_constant(nir_builder *b, apply_layout_state *state, nir_intrinsic_inst
if (size < (count - start) && can_increase_load_size(intrin, start * 4, size, size * 2))
size *= 2;
data[num_loads++] = nir_load_smem_amd(b, size, addr, nir_iadd_imm_nuw(b, offset, start * 4));
data[num_loads++] = ac_nir_load_smem(b, size, addr, nir_iadd_imm_nuw(b, offset, start * 4), 4, 0);
start += size;
}
return nir_extract_bits(b, data, num_loads, 0, intrin->def.num_components, bit_size);

View file

@ -34,8 +34,7 @@ load_ring(nir_builder *b, unsigned ring, lower_abi_state *s)
nir_def *ring_offsets = ac_nir_load_arg(b, &s->args->ac, arg);
ring_offsets = nir_pack_64_2x32_split(b, nir_channel(b, ring_offsets, 0), nir_channel(b, ring_offsets, 1));
return nir_load_smem_amd(b, 4, ring_offsets, nir_imm_int(b, ring * 16u), .align_mul = 4u,
.access = ACCESS_CAN_SPECULATE);
return ac_nir_load_smem(b, 4, ring_offsets, nir_imm_int(b, ring * 16u), 4, ACCESS_CAN_SPECULATE);
}
static nir_def *
@ -367,8 +366,8 @@ lower_abi_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
case nir_intrinsic_load_streamout_buffer_amd: {
nir_def *ptr = nir_pack_64_2x32_split(b, ac_nir_load_arg(b, &s->args->ac, s->args->streamout_buffers),
nir_imm_int(b, s->address32_hi));
replacement = nir_load_smem_amd(b, 4, ptr, nir_imm_int(b, nir_intrinsic_base(intrin) * 16),
.access = ACCESS_CAN_SPECULATE);
replacement =
ac_nir_load_smem(b, 4, ptr, nir_imm_int(b, nir_intrinsic_base(intrin) * 16), 16, ACCESS_CAN_SPECULATE);
break;
}
case nir_intrinsic_load_xfb_state_address_gfx12_amd:

View file

@ -234,8 +234,8 @@ lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs
nir_def *vertex_buffers_arg = ac_nir_load_arg(b, &s->args->ac, s->args->ac.vertex_buffers);
nir_def *vertex_buffers = nir_pack_64_2x32_split(b, vertex_buffers_arg, nir_imm_int(b, s->gpu_info->address32_hi));
nir_def *descriptor = nir_load_smem_amd(b, 4, vertex_buffers, nir_imm_int(b, desc_index * 16),
.access = ACCESS_CAN_SPECULATE);
nir_def *descriptor =
ac_nir_load_smem(b, 4, vertex_buffers, nir_imm_int(b, desc_index * 16), 16, ACCESS_CAN_SPECULATE);
nir_def *base_index = calc_vs_input_index(b, location, s);
nir_def *zero = nir_imm_int(b, 0);

View file

@ -368,10 +368,10 @@ load_sbt_entry(nir_builder *b, const struct rt_variables *vars, nir_def *idx, en
{
nir_def *desc_base_addr = nir_load_sbt_base_amd(b);
nir_def *desc = nir_pack_64_2x32(b, nir_load_smem_amd(b, 2, desc_base_addr, nir_imm_int(b, binding)));
nir_def *desc = nir_pack_64_2x32(b, ac_nir_load_smem(b, 2, desc_base_addr, nir_imm_int(b, binding), 4, 0));
nir_def *stride_offset = nir_imm_int(b, binding + (binding == SBT_RAYGEN ? 8 : 16));
nir_def *stride = nir_load_smem_amd(b, 1, desc_base_addr, stride_offset);
nir_def *stride = ac_nir_load_smem(b, 1, desc_base_addr, stride_offset, 4, 0);
nir_def *addr = nir_iadd(b, desc, nir_u2u64(b, nir_iadd_imm(b, nir_imul(b, idx, stride), offset)));
@ -927,8 +927,8 @@ radv_build_end_trace_token(nir_builder *b, struct rt_variables *vars, nir_def *t
dst_addr = nir_iadd_imm(b, dst_addr, 8);
nir_def *dispatch_indices =
nir_load_smem_amd(b, 2, nir_imm_int64(b, vars->device->rra_trace.ray_history_addr),
nir_imm_int(b, offsetof(struct radv_ray_history_header, dispatch_index)), .align_mul = 4);
ac_nir_load_smem(b, 2, nir_imm_int64(b, vars->device->rra_trace.ray_history_addr),
nir_imm_int(b, offsetof(struct radv_ray_history_header, dispatch_index)), 4, 0);
nir_def *dispatch_index = nir_iadd(b, nir_channel(b, dispatch_indices, 0), nir_channel(b, dispatch_indices, 1));
nir_def *dispatch_and_flags = nir_iand_imm(b, nir_load_var(b, vars->cull_mask_and_flags), 0xFFFF);
dispatch_and_flags = nir_ior(b, dispatch_and_flags, dispatch_index);

View file

@ -39,8 +39,8 @@ nir_def *si_nir_load_internal_binding(struct si_screen *sscreen, nir_builder *b,
unsigned num_components)
{
nir_def *addr = si_nir_load_addr32_arg(sscreen, args, b, args->internal_bindings);
return nir_load_smem_amd(b, num_components, addr, nir_imm_int(b, slot * 16),
.access = ACCESS_CAN_SPECULATE);
return ac_nir_load_smem(b, num_components, addr, nir_imm_int(b, slot * 16), 16,
ACCESS_CAN_SPECULATE);
}
static nir_def *build_attr_ring_desc(nir_builder *b, struct si_shader *shader,
@ -334,22 +334,22 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s
case nir_intrinsic_load_clip_half_line_width_amd: {
nir_def *addr = si_nir_load_addr32_arg(s->shader->selector->screen, s->args,
b, args->small_prim_cull_info);
replacement = nir_load_smem_amd(b, 2, addr, nir_imm_int(b, 32),
.access = ACCESS_CAN_SPECULATE);
replacement = ac_nir_load_smem(b, 2, addr, nir_imm_int(b, 32), 16,
ACCESS_CAN_SPECULATE);
break;
}
case nir_intrinsic_load_cull_triangle_viewport_xy_scale_and_offset_amd: {
nir_def *addr = si_nir_load_addr32_arg(s->shader->selector->screen, s->args,
b, args->small_prim_cull_info);
replacement = nir_load_smem_amd(b, 4, addr, nir_imm_int(b, 0),
.access = ACCESS_CAN_SPECULATE);
replacement = ac_nir_load_smem(b, 4, addr, nir_imm_int(b, 0), 16,
ACCESS_CAN_SPECULATE);
break;
}
case nir_intrinsic_load_cull_line_viewport_xy_scale_and_offset_amd: {
nir_def *addr = si_nir_load_addr32_arg(s->shader->selector->screen, s->args,
b, args->small_prim_cull_info);
replacement = nir_load_smem_amd(b, 4, addr, nir_imm_int(b, 16),
.access = ACCESS_CAN_SPECULATE);
replacement = ac_nir_load_smem(b, 4, addr, nir_imm_int(b, 16), 16,
ACCESS_CAN_SPECULATE);
break;
}
case nir_intrinsic_load_num_vertices_per_primitive_amd:

View file

@ -77,7 +77,7 @@ static nir_def *load_ubo_desc(nir_builder *b, nir_def *index,
index = nir_iadd_imm(b, index, SI_NUM_SHADER_BUFFERS);
nir_def *offset = nir_ishl_imm(b, index, 4);
return nir_load_smem_amd(b, 4, addr, offset, .access = ACCESS_CAN_SPECULATE);
return ac_nir_load_smem(b, 4, addr, offset, 16, ACCESS_CAN_SPECULATE);
}
static nir_def *load_ssbo_desc(nir_builder *b, nir_src *index,
@ -98,7 +98,7 @@ static nir_def *load_ssbo_desc(nir_builder *b, nir_src *index,
slot = nir_isub_imm(b, SI_NUM_SHADER_BUFFERS - 1, slot);
nir_def *offset = nir_ishl_imm(b, slot, 4);
return nir_load_smem_amd(b, 4, addr, offset, .access = ACCESS_CAN_SPECULATE);
return ac_nir_load_smem(b, 4, addr, offset, 16, ACCESS_CAN_SPECULATE);
}
static nir_def *fixup_image_desc(nir_builder *b, nir_def *rsrc, bool uses_store,
@ -155,8 +155,8 @@ static nir_def *load_image_desc(nir_builder *b, nir_def *list, nir_def *index,
num_channels = 8;
}
nir_def *rsrc = nir_load_smem_amd(b, num_channels, list, offset,
.access = bindless ? 0 : ACCESS_CAN_SPECULATE);
nir_def *rsrc = ac_nir_load_smem(b, num_channels, list, offset, 16,
bindless ? 0 : ACCESS_CAN_SPECULATE);
if (desc_type == AC_DESC_IMAGE)
rsrc = fixup_image_desc(b, rsrc, uses_store, s);
@ -434,8 +434,8 @@ static nir_def *load_sampler_desc(nir_builder *b, nir_def *list, nir_def *index,
break;
}
return nir_load_smem_amd(b, num_channels, list, offset,
.access = bindless ? 0 : ACCESS_CAN_SPECULATE);
return ac_nir_load_smem(b, num_channels, list, offset, 16,
bindless ? 0 : ACCESS_CAN_SPECULATE);
}
static nir_def *load_deref_sampler_desc(nir_builder *b, nir_deref_instr *deref,

View file

@ -434,8 +434,8 @@ load_vs_input_from_vertex_buffer(nir_builder *b, unsigned input_index,
unsigned index = input_index - sel->info.num_vbos_in_user_sgprs;
nir_def *addr = si_nir_load_addr32_arg(s->shader->selector->screen, s->args,
b, s->args->ac.vertex_buffers);
vb_desc = nir_load_smem_amd(b, 4, addr, nir_imm_int(b, index * 16),
.access = ACCESS_CAN_SPECULATE);
vb_desc = ac_nir_load_smem(b, 4, addr, nir_imm_int(b, index * 16), 16,
ACCESS_CAN_SPECULATE);
}
nir_def *vertex_index = s->vertex_index[input_index];