diff --git a/src/amd/common/nir/ac_nir.c b/src/amd/common/nir/ac_nir.c index 210555cc31e..d533f1d72bb 100644 --- a/src/amd/common/nir/ac_nir.c +++ b/src/amd/common/nir/ac_nir.c @@ -309,6 +309,19 @@ ac_get_global_ids(nir_builder *b, unsigned num_components, unsigned bit_size) return nir_iadd(b, nir_imul(b, block_ids, block_size), local_ids); } +nir_def * +ac_nir_load_smem(nir_builder *b, unsigned num_components, nir_def *addr, nir_def *offset, + unsigned align_mul, enum gl_access_qualifier access) +{ + /* Only 1 flag is allowed. */ + assert(!(access & ~ACCESS_CAN_SPECULATE)); + assert(align_mul >= 4 && util_is_power_of_two_nonzero(align_mul)); + + return nir_load_smem_amd(b, num_components, addr, offset, + .align_mul = align_mul, + .access = access | ACCESS_CAN_REORDER | ACCESS_NON_WRITEABLE); +} + unsigned ac_nir_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer) { diff --git a/src/amd/common/nir/ac_nir.h b/src/amd/common/nir/ac_nir.h index 8dca59d3f1e..0ec6a0b467e 100644 --- a/src/amd/common/nir/ac_nir.h +++ b/src/amd/common/nir/ac_nir.h @@ -76,6 +76,10 @@ nir_def * ac_nir_unpack_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg, unsigned rshift, unsigned bitwidth); +nir_def * +ac_nir_load_smem(nir_builder *b, unsigned num_components, nir_def *addr, nir_def *offset, + unsigned align_mul, enum gl_access_qualifier access); + bool ac_nir_lower_sin_cos(nir_shader *shader); bool ac_nir_lower_intrinsics_to_args(nir_shader *shader, const enum amd_gfx_level gfx_level, diff --git a/src/amd/vulkan/nir/radv_nir_apply_pipeline_layout.c b/src/amd/vulkan/nir/radv_nir_apply_pipeline_layout.c index b6573b95cc9..5ce56f91871 100644 --- a/src/amd/vulkan/nir/radv_nir_apply_pipeline_layout.c +++ b/src/amd/vulkan/nir/radv_nir_apply_pipeline_layout.c @@ -50,7 +50,7 @@ load_desc_ptr(nir_builder *b, apply_layout_state *state, unsigned set) if (user_sgprs_locs->shader_data[AC_UD_INDIRECT_DESCRIPTOR_SETS].sgpr_idx != -1) { nir_def *addr = get_scalar_arg(b, 1, state->args->descriptor_sets[0]); addr = convert_pointer_to_64_bit(b, state, addr); - return nir_load_smem_amd(b, 1, addr, nir_imm_int(b, set * 4)); + return ac_nir_load_smem(b, 1, addr, nir_imm_int(b, set * 4), 4, 0); } assert(state->args->descriptor_sets[set].used); @@ -168,7 +168,7 @@ load_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_def *rsrc, return nir_iadd(b, nir_channel(b, rsrc, 0), nir_channel(b, rsrc, 1)); nir_def *desc_set = convert_pointer_to_64_bit(b, state, nir_channel(b, rsrc, 0)); - return nir_load_smem_amd(b, 4, desc_set, nir_channel(b, rsrc, 1), .align_mul = 16); + return ac_nir_load_smem(b, 4, desc_set, nir_channel(b, rsrc, 1), 16, 0); } static void @@ -186,7 +186,7 @@ visit_get_ssbo_size(nir_builder *b, apply_layout_state *state, nir_intrinsic_ins } else { /* load the entire descriptor so it can be CSE'd */ nir_def *ptr = convert_pointer_to_64_bit(b, state, nir_channel(b, rsrc, 0)); - nir_def *desc = nir_load_smem_amd(b, 4, ptr, nir_channel(b, rsrc, 1), .align_mul = 16); + nir_def *desc = ac_nir_load_smem(b, 4, ptr, nir_channel(b, rsrc, 1), 16, 0); size = nir_channel(b, desc, 2); } @@ -280,7 +280,7 @@ get_sampler_desc(nir_builder *b, apply_layout_state *state, nir_deref_instr *der return nir_iadd(b, load_desc_ptr(b, state, desc_set), index_offset); nir_def *addr = convert_pointer_to_64_bit(b, state, load_desc_ptr(b, state, desc_set)); - nir_def *desc = nir_load_smem_amd(b, size, addr, index_offset, .align_mul = size * 4u); + nir_def *desc = ac_nir_load_smem(b, size, addr, index_offset, size * 4u, 0); if (desc_type == AC_DESC_IMAGE && state->has_image_load_dcc_bug && !tex && !write) { nir_def *comp[8]; @@ -395,7 +395,7 @@ load_push_constant(nir_builder *b, apply_layout_state *state, nir_intrinsic_inst if (size < (count - start) && can_increase_load_size(intrin, start * 4, size, size * 2)) size *= 2; - data[num_loads++] = nir_load_smem_amd(b, size, addr, nir_iadd_imm_nuw(b, offset, start * 4)); + data[num_loads++] = ac_nir_load_smem(b, size, addr, nir_iadd_imm_nuw(b, offset, start * 4), 4, 0); start += size; } return nir_extract_bits(b, data, num_loads, 0, intrin->def.num_components, bit_size); diff --git a/src/amd/vulkan/nir/radv_nir_lower_abi.c b/src/amd/vulkan/nir/radv_nir_lower_abi.c index d006c18eaa1..95d1427bc86 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_abi.c +++ b/src/amd/vulkan/nir/radv_nir_lower_abi.c @@ -34,8 +34,7 @@ load_ring(nir_builder *b, unsigned ring, lower_abi_state *s) nir_def *ring_offsets = ac_nir_load_arg(b, &s->args->ac, arg); ring_offsets = nir_pack_64_2x32_split(b, nir_channel(b, ring_offsets, 0), nir_channel(b, ring_offsets, 1)); - return nir_load_smem_amd(b, 4, ring_offsets, nir_imm_int(b, ring * 16u), .align_mul = 4u, - .access = ACCESS_CAN_SPECULATE); + return ac_nir_load_smem(b, 4, ring_offsets, nir_imm_int(b, ring * 16u), 4, ACCESS_CAN_SPECULATE); } static nir_def * @@ -367,8 +366,8 @@ lower_abi_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state) case nir_intrinsic_load_streamout_buffer_amd: { nir_def *ptr = nir_pack_64_2x32_split(b, ac_nir_load_arg(b, &s->args->ac, s->args->streamout_buffers), nir_imm_int(b, s->address32_hi)); - replacement = nir_load_smem_amd(b, 4, ptr, nir_imm_int(b, nir_intrinsic_base(intrin) * 16), - .access = ACCESS_CAN_SPECULATE); + replacement = + ac_nir_load_smem(b, 4, ptr, nir_imm_int(b, nir_intrinsic_base(intrin) * 16), 16, ACCESS_CAN_SPECULATE); break; } case nir_intrinsic_load_xfb_state_address_gfx12_amd: diff --git a/src/amd/vulkan/nir/radv_nir_lower_vs_inputs.c b/src/amd/vulkan/nir/radv_nir_lower_vs_inputs.c index ef1ce0f956a..b72492d96c4 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_vs_inputs.c +++ b/src/amd/vulkan/nir/radv_nir_lower_vs_inputs.c @@ -234,8 +234,8 @@ lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs nir_def *vertex_buffers_arg = ac_nir_load_arg(b, &s->args->ac, s->args->ac.vertex_buffers); nir_def *vertex_buffers = nir_pack_64_2x32_split(b, vertex_buffers_arg, nir_imm_int(b, s->gpu_info->address32_hi)); - nir_def *descriptor = nir_load_smem_amd(b, 4, vertex_buffers, nir_imm_int(b, desc_index * 16), - .access = ACCESS_CAN_SPECULATE); + nir_def *descriptor = + ac_nir_load_smem(b, 4, vertex_buffers, nir_imm_int(b, desc_index * 16), 16, ACCESS_CAN_SPECULATE); nir_def *base_index = calc_vs_input_index(b, location, s); nir_def *zero = nir_imm_int(b, 0); diff --git a/src/amd/vulkan/nir/radv_nir_rt_shader.c b/src/amd/vulkan/nir/radv_nir_rt_shader.c index 255df4e3f5d..009fc201be9 100644 --- a/src/amd/vulkan/nir/radv_nir_rt_shader.c +++ b/src/amd/vulkan/nir/radv_nir_rt_shader.c @@ -368,10 +368,10 @@ load_sbt_entry(nir_builder *b, const struct rt_variables *vars, nir_def *idx, en { nir_def *desc_base_addr = nir_load_sbt_base_amd(b); - nir_def *desc = nir_pack_64_2x32(b, nir_load_smem_amd(b, 2, desc_base_addr, nir_imm_int(b, binding))); + nir_def *desc = nir_pack_64_2x32(b, ac_nir_load_smem(b, 2, desc_base_addr, nir_imm_int(b, binding), 4, 0)); nir_def *stride_offset = nir_imm_int(b, binding + (binding == SBT_RAYGEN ? 8 : 16)); - nir_def *stride = nir_load_smem_amd(b, 1, desc_base_addr, stride_offset); + nir_def *stride = ac_nir_load_smem(b, 1, desc_base_addr, stride_offset, 4, 0); nir_def *addr = nir_iadd(b, desc, nir_u2u64(b, nir_iadd_imm(b, nir_imul(b, idx, stride), offset))); @@ -927,8 +927,8 @@ radv_build_end_trace_token(nir_builder *b, struct rt_variables *vars, nir_def *t dst_addr = nir_iadd_imm(b, dst_addr, 8); nir_def *dispatch_indices = - nir_load_smem_amd(b, 2, nir_imm_int64(b, vars->device->rra_trace.ray_history_addr), - nir_imm_int(b, offsetof(struct radv_ray_history_header, dispatch_index)), .align_mul = 4); + ac_nir_load_smem(b, 2, nir_imm_int64(b, vars->device->rra_trace.ray_history_addr), + nir_imm_int(b, offsetof(struct radv_ray_history_header, dispatch_index)), 4, 0); nir_def *dispatch_index = nir_iadd(b, nir_channel(b, dispatch_indices, 0), nir_channel(b, dispatch_indices, 1)); nir_def *dispatch_and_flags = nir_iand_imm(b, nir_load_var(b, vars->cull_mask_and_flags), 0xFFFF); dispatch_and_flags = nir_ior(b, dispatch_and_flags, dispatch_index); diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c index 795b62fd10b..3d42621f8fd 100644 --- a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c +++ b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c @@ -39,8 +39,8 @@ nir_def *si_nir_load_internal_binding(struct si_screen *sscreen, nir_builder *b, unsigned num_components) { nir_def *addr = si_nir_load_addr32_arg(sscreen, args, b, args->internal_bindings); - return nir_load_smem_amd(b, num_components, addr, nir_imm_int(b, slot * 16), - .access = ACCESS_CAN_SPECULATE); + return ac_nir_load_smem(b, num_components, addr, nir_imm_int(b, slot * 16), 16, + ACCESS_CAN_SPECULATE); } static nir_def *build_attr_ring_desc(nir_builder *b, struct si_shader *shader, @@ -334,22 +334,22 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s case nir_intrinsic_load_clip_half_line_width_amd: { nir_def *addr = si_nir_load_addr32_arg(s->shader->selector->screen, s->args, b, args->small_prim_cull_info); - replacement = nir_load_smem_amd(b, 2, addr, nir_imm_int(b, 32), - .access = ACCESS_CAN_SPECULATE); + replacement = ac_nir_load_smem(b, 2, addr, nir_imm_int(b, 32), 16, + ACCESS_CAN_SPECULATE); break; } case nir_intrinsic_load_cull_triangle_viewport_xy_scale_and_offset_amd: { nir_def *addr = si_nir_load_addr32_arg(s->shader->selector->screen, s->args, b, args->small_prim_cull_info); - replacement = nir_load_smem_amd(b, 4, addr, nir_imm_int(b, 0), - .access = ACCESS_CAN_SPECULATE); + replacement = ac_nir_load_smem(b, 4, addr, nir_imm_int(b, 0), 16, + ACCESS_CAN_SPECULATE); break; } case nir_intrinsic_load_cull_line_viewport_xy_scale_and_offset_amd: { nir_def *addr = si_nir_load_addr32_arg(s->shader->selector->screen, s->args, b, args->small_prim_cull_info); - replacement = nir_load_smem_amd(b, 4, addr, nir_imm_int(b, 16), - .access = ACCESS_CAN_SPECULATE); + replacement = ac_nir_load_smem(b, 4, addr, nir_imm_int(b, 16), 16, + ACCESS_CAN_SPECULATE); break; } case nir_intrinsic_load_num_vertices_per_primitive_amd: diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_resource.c b/src/gallium/drivers/radeonsi/si_nir_lower_resource.c index e11e36e316d..c425cd5f528 100644 --- a/src/gallium/drivers/radeonsi/si_nir_lower_resource.c +++ b/src/gallium/drivers/radeonsi/si_nir_lower_resource.c @@ -77,7 +77,7 @@ static nir_def *load_ubo_desc(nir_builder *b, nir_def *index, index = nir_iadd_imm(b, index, SI_NUM_SHADER_BUFFERS); nir_def *offset = nir_ishl_imm(b, index, 4); - return nir_load_smem_amd(b, 4, addr, offset, .access = ACCESS_CAN_SPECULATE); + return ac_nir_load_smem(b, 4, addr, offset, 16, ACCESS_CAN_SPECULATE); } static nir_def *load_ssbo_desc(nir_builder *b, nir_src *index, @@ -98,7 +98,7 @@ static nir_def *load_ssbo_desc(nir_builder *b, nir_src *index, slot = nir_isub_imm(b, SI_NUM_SHADER_BUFFERS - 1, slot); nir_def *offset = nir_ishl_imm(b, slot, 4); - return nir_load_smem_amd(b, 4, addr, offset, .access = ACCESS_CAN_SPECULATE); + return ac_nir_load_smem(b, 4, addr, offset, 16, ACCESS_CAN_SPECULATE); } static nir_def *fixup_image_desc(nir_builder *b, nir_def *rsrc, bool uses_store, @@ -155,8 +155,8 @@ static nir_def *load_image_desc(nir_builder *b, nir_def *list, nir_def *index, num_channels = 8; } - nir_def *rsrc = nir_load_smem_amd(b, num_channels, list, offset, - .access = bindless ? 0 : ACCESS_CAN_SPECULATE); + nir_def *rsrc = ac_nir_load_smem(b, num_channels, list, offset, 16, + bindless ? 0 : ACCESS_CAN_SPECULATE); if (desc_type == AC_DESC_IMAGE) rsrc = fixup_image_desc(b, rsrc, uses_store, s); @@ -434,8 +434,8 @@ static nir_def *load_sampler_desc(nir_builder *b, nir_def *list, nir_def *index, break; } - return nir_load_smem_amd(b, num_channels, list, offset, - .access = bindless ? 0 : ACCESS_CAN_SPECULATE); + return ac_nir_load_smem(b, num_channels, list, offset, 16, + bindless ? 0 : ACCESS_CAN_SPECULATE); } static nir_def *load_deref_sampler_desc(nir_builder *b, nir_deref_instr *deref, diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_vs_inputs.c b/src/gallium/drivers/radeonsi/si_nir_lower_vs_inputs.c index 59e2a42fabb..111a526f550 100644 --- a/src/gallium/drivers/radeonsi/si_nir_lower_vs_inputs.c +++ b/src/gallium/drivers/radeonsi/si_nir_lower_vs_inputs.c @@ -434,8 +434,8 @@ load_vs_input_from_vertex_buffer(nir_builder *b, unsigned input_index, unsigned index = input_index - sel->info.num_vbos_in_user_sgprs; nir_def *addr = si_nir_load_addr32_arg(s->shader->selector->screen, s->args, b, s->args->ac.vertex_buffers); - vb_desc = nir_load_smem_amd(b, 4, addr, nir_imm_int(b, index * 16), - .access = ACCESS_CAN_SPECULATE); + vb_desc = ac_nir_load_smem(b, 4, addr, nir_imm_int(b, index * 16), 16, + ACCESS_CAN_SPECULATE); } nir_def *vertex_index = s->vertex_index[input_index];