mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 11:20:11 +01:00
ac/nir: switch nir_load_smem_amd uses to ac_nir_load_smem wrapper
ac_nir_load_smem will use load_global_amd Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37101>
This commit is contained in:
parent
4c87d002e3
commit
9e16ed7a13
9 changed files with 47 additions and 31 deletions
|
|
@ -309,6 +309,19 @@ ac_get_global_ids(nir_builder *b, unsigned num_components, unsigned bit_size)
|
|||
return nir_iadd(b, nir_imul(b, block_ids, block_size), local_ids);
|
||||
}
|
||||
|
||||
nir_def *
|
||||
ac_nir_load_smem(nir_builder *b, unsigned num_components, nir_def *addr, nir_def *offset,
|
||||
unsigned align_mul, enum gl_access_qualifier access)
|
||||
{
|
||||
/* Only 1 flag is allowed. */
|
||||
assert(!(access & ~ACCESS_CAN_SPECULATE));
|
||||
assert(align_mul >= 4 && util_is_power_of_two_nonzero(align_mul));
|
||||
|
||||
return nir_load_smem_amd(b, num_components, addr, offset,
|
||||
.align_mul = align_mul,
|
||||
.access = access | ACCESS_CAN_REORDER | ACCESS_NON_WRITEABLE);
|
||||
}
|
||||
|
||||
unsigned
|
||||
ac_nir_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -76,6 +76,10 @@ nir_def *
|
|||
ac_nir_unpack_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
|
||||
unsigned rshift, unsigned bitwidth);
|
||||
|
||||
nir_def *
|
||||
ac_nir_load_smem(nir_builder *b, unsigned num_components, nir_def *addr, nir_def *offset,
|
||||
unsigned align_mul, enum gl_access_qualifier access);
|
||||
|
||||
bool ac_nir_lower_sin_cos(nir_shader *shader);
|
||||
|
||||
bool ac_nir_lower_intrinsics_to_args(nir_shader *shader, const enum amd_gfx_level gfx_level,
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ load_desc_ptr(nir_builder *b, apply_layout_state *state, unsigned set)
|
|||
if (user_sgprs_locs->shader_data[AC_UD_INDIRECT_DESCRIPTOR_SETS].sgpr_idx != -1) {
|
||||
nir_def *addr = get_scalar_arg(b, 1, state->args->descriptor_sets[0]);
|
||||
addr = convert_pointer_to_64_bit(b, state, addr);
|
||||
return nir_load_smem_amd(b, 1, addr, nir_imm_int(b, set * 4));
|
||||
return ac_nir_load_smem(b, 1, addr, nir_imm_int(b, set * 4), 4, 0);
|
||||
}
|
||||
|
||||
assert(state->args->descriptor_sets[set].used);
|
||||
|
|
@ -168,7 +168,7 @@ load_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_def *rsrc,
|
|||
return nir_iadd(b, nir_channel(b, rsrc, 0), nir_channel(b, rsrc, 1));
|
||||
|
||||
nir_def *desc_set = convert_pointer_to_64_bit(b, state, nir_channel(b, rsrc, 0));
|
||||
return nir_load_smem_amd(b, 4, desc_set, nir_channel(b, rsrc, 1), .align_mul = 16);
|
||||
return ac_nir_load_smem(b, 4, desc_set, nir_channel(b, rsrc, 1), 16, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -186,7 +186,7 @@ visit_get_ssbo_size(nir_builder *b, apply_layout_state *state, nir_intrinsic_ins
|
|||
} else {
|
||||
/* load the entire descriptor so it can be CSE'd */
|
||||
nir_def *ptr = convert_pointer_to_64_bit(b, state, nir_channel(b, rsrc, 0));
|
||||
nir_def *desc = nir_load_smem_amd(b, 4, ptr, nir_channel(b, rsrc, 1), .align_mul = 16);
|
||||
nir_def *desc = ac_nir_load_smem(b, 4, ptr, nir_channel(b, rsrc, 1), 16, 0);
|
||||
size = nir_channel(b, desc, 2);
|
||||
}
|
||||
|
||||
|
|
@ -280,7 +280,7 @@ get_sampler_desc(nir_builder *b, apply_layout_state *state, nir_deref_instr *der
|
|||
return nir_iadd(b, load_desc_ptr(b, state, desc_set), index_offset);
|
||||
|
||||
nir_def *addr = convert_pointer_to_64_bit(b, state, load_desc_ptr(b, state, desc_set));
|
||||
nir_def *desc = nir_load_smem_amd(b, size, addr, index_offset, .align_mul = size * 4u);
|
||||
nir_def *desc = ac_nir_load_smem(b, size, addr, index_offset, size * 4u, 0);
|
||||
|
||||
if (desc_type == AC_DESC_IMAGE && state->has_image_load_dcc_bug && !tex && !write) {
|
||||
nir_def *comp[8];
|
||||
|
|
@ -395,7 +395,7 @@ load_push_constant(nir_builder *b, apply_layout_state *state, nir_intrinsic_inst
|
|||
if (size < (count - start) && can_increase_load_size(intrin, start * 4, size, size * 2))
|
||||
size *= 2;
|
||||
|
||||
data[num_loads++] = nir_load_smem_amd(b, size, addr, nir_iadd_imm_nuw(b, offset, start * 4));
|
||||
data[num_loads++] = ac_nir_load_smem(b, size, addr, nir_iadd_imm_nuw(b, offset, start * 4), 4, 0);
|
||||
start += size;
|
||||
}
|
||||
return nir_extract_bits(b, data, num_loads, 0, intrin->def.num_components, bit_size);
|
||||
|
|
|
|||
|
|
@ -34,8 +34,7 @@ load_ring(nir_builder *b, unsigned ring, lower_abi_state *s)
|
|||
|
||||
nir_def *ring_offsets = ac_nir_load_arg(b, &s->args->ac, arg);
|
||||
ring_offsets = nir_pack_64_2x32_split(b, nir_channel(b, ring_offsets, 0), nir_channel(b, ring_offsets, 1));
|
||||
return nir_load_smem_amd(b, 4, ring_offsets, nir_imm_int(b, ring * 16u), .align_mul = 4u,
|
||||
.access = ACCESS_CAN_SPECULATE);
|
||||
return ac_nir_load_smem(b, 4, ring_offsets, nir_imm_int(b, ring * 16u), 4, ACCESS_CAN_SPECULATE);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
|
|
@ -367,8 +366,8 @@ lower_abi_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
|
|||
case nir_intrinsic_load_streamout_buffer_amd: {
|
||||
nir_def *ptr = nir_pack_64_2x32_split(b, ac_nir_load_arg(b, &s->args->ac, s->args->streamout_buffers),
|
||||
nir_imm_int(b, s->address32_hi));
|
||||
replacement = nir_load_smem_amd(b, 4, ptr, nir_imm_int(b, nir_intrinsic_base(intrin) * 16),
|
||||
.access = ACCESS_CAN_SPECULATE);
|
||||
replacement =
|
||||
ac_nir_load_smem(b, 4, ptr, nir_imm_int(b, nir_intrinsic_base(intrin) * 16), 16, ACCESS_CAN_SPECULATE);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_xfb_state_address_gfx12_amd:
|
||||
|
|
|
|||
|
|
@ -234,8 +234,8 @@ lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs
|
|||
|
||||
nir_def *vertex_buffers_arg = ac_nir_load_arg(b, &s->args->ac, s->args->ac.vertex_buffers);
|
||||
nir_def *vertex_buffers = nir_pack_64_2x32_split(b, vertex_buffers_arg, nir_imm_int(b, s->gpu_info->address32_hi));
|
||||
nir_def *descriptor = nir_load_smem_amd(b, 4, vertex_buffers, nir_imm_int(b, desc_index * 16),
|
||||
.access = ACCESS_CAN_SPECULATE);
|
||||
nir_def *descriptor =
|
||||
ac_nir_load_smem(b, 4, vertex_buffers, nir_imm_int(b, desc_index * 16), 16, ACCESS_CAN_SPECULATE);
|
||||
nir_def *base_index = calc_vs_input_index(b, location, s);
|
||||
nir_def *zero = nir_imm_int(b, 0);
|
||||
|
||||
|
|
|
|||
|
|
@ -368,10 +368,10 @@ load_sbt_entry(nir_builder *b, const struct rt_variables *vars, nir_def *idx, en
|
|||
{
|
||||
nir_def *desc_base_addr = nir_load_sbt_base_amd(b);
|
||||
|
||||
nir_def *desc = nir_pack_64_2x32(b, nir_load_smem_amd(b, 2, desc_base_addr, nir_imm_int(b, binding)));
|
||||
nir_def *desc = nir_pack_64_2x32(b, ac_nir_load_smem(b, 2, desc_base_addr, nir_imm_int(b, binding), 4, 0));
|
||||
|
||||
nir_def *stride_offset = nir_imm_int(b, binding + (binding == SBT_RAYGEN ? 8 : 16));
|
||||
nir_def *stride = nir_load_smem_amd(b, 1, desc_base_addr, stride_offset);
|
||||
nir_def *stride = ac_nir_load_smem(b, 1, desc_base_addr, stride_offset, 4, 0);
|
||||
|
||||
nir_def *addr = nir_iadd(b, desc, nir_u2u64(b, nir_iadd_imm(b, nir_imul(b, idx, stride), offset)));
|
||||
|
||||
|
|
@ -927,8 +927,8 @@ radv_build_end_trace_token(nir_builder *b, struct rt_variables *vars, nir_def *t
|
|||
dst_addr = nir_iadd_imm(b, dst_addr, 8);
|
||||
|
||||
nir_def *dispatch_indices =
|
||||
nir_load_smem_amd(b, 2, nir_imm_int64(b, vars->device->rra_trace.ray_history_addr),
|
||||
nir_imm_int(b, offsetof(struct radv_ray_history_header, dispatch_index)), .align_mul = 4);
|
||||
ac_nir_load_smem(b, 2, nir_imm_int64(b, vars->device->rra_trace.ray_history_addr),
|
||||
nir_imm_int(b, offsetof(struct radv_ray_history_header, dispatch_index)), 4, 0);
|
||||
nir_def *dispatch_index = nir_iadd(b, nir_channel(b, dispatch_indices, 0), nir_channel(b, dispatch_indices, 1));
|
||||
nir_def *dispatch_and_flags = nir_iand_imm(b, nir_load_var(b, vars->cull_mask_and_flags), 0xFFFF);
|
||||
dispatch_and_flags = nir_ior(b, dispatch_and_flags, dispatch_index);
|
||||
|
|
|
|||
|
|
@ -39,8 +39,8 @@ nir_def *si_nir_load_internal_binding(struct si_screen *sscreen, nir_builder *b,
|
|||
unsigned num_components)
|
||||
{
|
||||
nir_def *addr = si_nir_load_addr32_arg(sscreen, args, b, args->internal_bindings);
|
||||
return nir_load_smem_amd(b, num_components, addr, nir_imm_int(b, slot * 16),
|
||||
.access = ACCESS_CAN_SPECULATE);
|
||||
return ac_nir_load_smem(b, num_components, addr, nir_imm_int(b, slot * 16), 16,
|
||||
ACCESS_CAN_SPECULATE);
|
||||
}
|
||||
|
||||
static nir_def *build_attr_ring_desc(nir_builder *b, struct si_shader *shader,
|
||||
|
|
@ -334,22 +334,22 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s
|
|||
case nir_intrinsic_load_clip_half_line_width_amd: {
|
||||
nir_def *addr = si_nir_load_addr32_arg(s->shader->selector->screen, s->args,
|
||||
b, args->small_prim_cull_info);
|
||||
replacement = nir_load_smem_amd(b, 2, addr, nir_imm_int(b, 32),
|
||||
.access = ACCESS_CAN_SPECULATE);
|
||||
replacement = ac_nir_load_smem(b, 2, addr, nir_imm_int(b, 32), 16,
|
||||
ACCESS_CAN_SPECULATE);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_cull_triangle_viewport_xy_scale_and_offset_amd: {
|
||||
nir_def *addr = si_nir_load_addr32_arg(s->shader->selector->screen, s->args,
|
||||
b, args->small_prim_cull_info);
|
||||
replacement = nir_load_smem_amd(b, 4, addr, nir_imm_int(b, 0),
|
||||
.access = ACCESS_CAN_SPECULATE);
|
||||
replacement = ac_nir_load_smem(b, 4, addr, nir_imm_int(b, 0), 16,
|
||||
ACCESS_CAN_SPECULATE);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_cull_line_viewport_xy_scale_and_offset_amd: {
|
||||
nir_def *addr = si_nir_load_addr32_arg(s->shader->selector->screen, s->args,
|
||||
b, args->small_prim_cull_info);
|
||||
replacement = nir_load_smem_amd(b, 4, addr, nir_imm_int(b, 16),
|
||||
.access = ACCESS_CAN_SPECULATE);
|
||||
replacement = ac_nir_load_smem(b, 4, addr, nir_imm_int(b, 16), 16,
|
||||
ACCESS_CAN_SPECULATE);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_num_vertices_per_primitive_amd:
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ static nir_def *load_ubo_desc(nir_builder *b, nir_def *index,
|
|||
index = nir_iadd_imm(b, index, SI_NUM_SHADER_BUFFERS);
|
||||
|
||||
nir_def *offset = nir_ishl_imm(b, index, 4);
|
||||
return nir_load_smem_amd(b, 4, addr, offset, .access = ACCESS_CAN_SPECULATE);
|
||||
return ac_nir_load_smem(b, 4, addr, offset, 16, ACCESS_CAN_SPECULATE);
|
||||
}
|
||||
|
||||
static nir_def *load_ssbo_desc(nir_builder *b, nir_src *index,
|
||||
|
|
@ -98,7 +98,7 @@ static nir_def *load_ssbo_desc(nir_builder *b, nir_src *index,
|
|||
slot = nir_isub_imm(b, SI_NUM_SHADER_BUFFERS - 1, slot);
|
||||
|
||||
nir_def *offset = nir_ishl_imm(b, slot, 4);
|
||||
return nir_load_smem_amd(b, 4, addr, offset, .access = ACCESS_CAN_SPECULATE);
|
||||
return ac_nir_load_smem(b, 4, addr, offset, 16, ACCESS_CAN_SPECULATE);
|
||||
}
|
||||
|
||||
static nir_def *fixup_image_desc(nir_builder *b, nir_def *rsrc, bool uses_store,
|
||||
|
|
@ -155,8 +155,8 @@ static nir_def *load_image_desc(nir_builder *b, nir_def *list, nir_def *index,
|
|||
num_channels = 8;
|
||||
}
|
||||
|
||||
nir_def *rsrc = nir_load_smem_amd(b, num_channels, list, offset,
|
||||
.access = bindless ? 0 : ACCESS_CAN_SPECULATE);
|
||||
nir_def *rsrc = ac_nir_load_smem(b, num_channels, list, offset, 16,
|
||||
bindless ? 0 : ACCESS_CAN_SPECULATE);
|
||||
|
||||
if (desc_type == AC_DESC_IMAGE)
|
||||
rsrc = fixup_image_desc(b, rsrc, uses_store, s);
|
||||
|
|
@ -434,8 +434,8 @@ static nir_def *load_sampler_desc(nir_builder *b, nir_def *list, nir_def *index,
|
|||
break;
|
||||
}
|
||||
|
||||
return nir_load_smem_amd(b, num_channels, list, offset,
|
||||
.access = bindless ? 0 : ACCESS_CAN_SPECULATE);
|
||||
return ac_nir_load_smem(b, num_channels, list, offset, 16,
|
||||
bindless ? 0 : ACCESS_CAN_SPECULATE);
|
||||
}
|
||||
|
||||
static nir_def *load_deref_sampler_desc(nir_builder *b, nir_deref_instr *deref,
|
||||
|
|
|
|||
|
|
@ -434,8 +434,8 @@ load_vs_input_from_vertex_buffer(nir_builder *b, unsigned input_index,
|
|||
unsigned index = input_index - sel->info.num_vbos_in_user_sgprs;
|
||||
nir_def *addr = si_nir_load_addr32_arg(s->shader->selector->screen, s->args,
|
||||
b, s->args->ac.vertex_buffers);
|
||||
vb_desc = nir_load_smem_amd(b, 4, addr, nir_imm_int(b, index * 16),
|
||||
.access = ACCESS_CAN_SPECULATE);
|
||||
vb_desc = ac_nir_load_smem(b, 4, addr, nir_imm_int(b, index * 16), 16,
|
||||
ACCESS_CAN_SPECULATE);
|
||||
}
|
||||
|
||||
nir_def *vertex_index = s->vertex_index[input_index];
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue