diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index a1ecc410ddd..c499100fc8c 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -5401,59 +5401,6 @@ load_desc_ptr(isel_context* ctx, unsigned desc_set) return get_arg(ctx, ctx->args->descriptor_sets[desc_set]); } -void -visit_load_resource(isel_context* ctx, nir_intrinsic_instr* instr) -{ - Builder bld(ctx->program, ctx->block); - Temp index = get_ssa_temp(ctx, instr->src[0].ssa); - if (!nir_dest_is_divergent(instr->dest)) - index = bld.as_uniform(index); - unsigned desc_set = nir_intrinsic_desc_set(instr); - unsigned binding = nir_intrinsic_binding(instr); - - Temp desc_ptr; - radv_pipeline_layout* pipeline_layout = ctx->options->layout; - radv_descriptor_set_layout* layout = pipeline_layout->set[desc_set].layout; - unsigned offset = layout->binding[binding].offset; - unsigned stride; - if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || - layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) { - unsigned idx = pipeline_layout->set[desc_set].dynamic_offset_start + - layout->binding[binding].dynamic_offset_offset; - desc_ptr = get_arg(ctx, ctx->args->ac.push_constants); - offset = pipeline_layout->push_constant_size + 16 * idx; - stride = 16; - } else { - desc_ptr = load_desc_ptr(ctx, desc_set); - stride = layout->binding[binding].size; - } - - if (nir_src_is_const(instr->src[0])) { - index = - bld.copy(bld.def(s1), Operand::c32((offset + nir_src_as_uint(instr->src[0]) * stride))); - } else if (index.type() == RegType::vgpr) { - if (stride != 1) { - bool index24bit = layout->binding[binding].array_size <= 0x1000000; - index = bld.v_mul_imm(bld.def(v1), index, stride, index24bit); - } - if (offset) - index = bld.vadd32(bld.def(v1), Operand::c32(offset), index); - } else { - if (stride != 1) - index = bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), Operand::c32(stride), index); - if (offset) - index = bld.sop2(aco_opcode::s_add_i32, bld.def(s1), bld.def(s1, scc), - Operand::c32(offset), index); - } - - Temp dst = get_ssa_temp(ctx, &instr->dest.ssa); - std::array elems; - elems[0] = desc_ptr; - elems[1] = index; - ctx->allocated_vec.emplace(dst.id(), elems); - bld.pseudo(aco_opcode::p_create_vector, Definition(dst), desc_ptr, index, Operand::zero()); -} - void load_buffer(isel_context* ctx, unsigned num_components, unsigned component_size, Temp dst, Temp rsrc, Temp offset, unsigned align_mul, unsigned align_offset, bool glc = false, @@ -5494,17 +5441,6 @@ load_buffer_rsrc(isel_context* ctx, Temp rsrc) return bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), set_ptr, binding); } -bool -is_inline_ubo(isel_context* ctx, nir_src rsrc) -{ - nir_binding binding = nir_chase_binding(rsrc); - if (!binding.success) - return false; - - radv_descriptor_set_layout* layout = ctx->options->layout->set[binding.desc_set].layout; - return layout->binding[binding.binding].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT; -} - void visit_load_ubo(isel_context* ctx, nir_intrinsic_instr* instr) { @@ -5513,28 +5449,11 @@ visit_load_ubo(isel_context* ctx, nir_intrinsic_instr* instr) Builder bld(ctx->program, ctx->block); - if (is_inline_ubo(ctx, instr->src[0])) { - Temp set_ptr = bld.as_uniform(emit_extract_vector(ctx, rsrc, 0, RegClass(rsrc.type(), 1))); - Temp binding_off = - bld.as_uniform(emit_extract_vector(ctx, rsrc, 1, RegClass(rsrc.type(), 1))); - rsrc = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), set_ptr, binding_off); - - uint32_t desc_type = - S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); - if (ctx->options->chip_class >= GFX10) { - desc_type |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | - S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1); - } else { - desc_type |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); - } - rsrc = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), rsrc, - Operand::c32(S_008F04_BASE_ADDRESS_HI(ctx->options->address32_hi)), - Operand::c32(0xFFFFFFFFu), Operand::c32(desc_type)); - } else { + if (rsrc.bytes() == 16) + rsrc = bld.as_uniform(rsrc); /* for VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT */ + else rsrc = load_buffer_rsrc(ctx, rsrc); - } + unsigned size = instr->dest.ssa.bit_size / 8; load_buffer(ctx, instr->num_components, size, dst, rsrc, get_ssa_temp(ctx, instr->src[1].ssa), nir_intrinsic_align_mul(instr), nir_intrinsic_align_offset(instr)); @@ -8110,7 +8029,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) case nir_intrinsic_load_ubo: visit_load_ubo(ctx, instr); break; case nir_intrinsic_load_push_constant: visit_load_push_constant(ctx, instr); break; case nir_intrinsic_load_constant: visit_load_constant(ctx, instr); break; - case nir_intrinsic_vulkan_resource_index: visit_load_resource(ctx, instr); break; case nir_intrinsic_load_shared: visit_load_shared(ctx, instr); break; case nir_intrinsic_store_shared: visit_store_shared(ctx, instr); break; case nir_intrinsic_shared_atomic_add: diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index 3ea6c19a2f9..da5412696e2 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -749,7 +749,6 @@ init_context(isel_context* ctx, nir_shader* shader) case nir_intrinsic_load_ssbo: case nir_intrinsic_load_global: case nir_intrinsic_load_global_constant: - case nir_intrinsic_vulkan_resource_index: case nir_intrinsic_get_ssbo_size: type = nir_dest_is_divergent(intrinsic->dest) ? RegType::vgpr : RegType::sgpr; break; diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index ffb255abc6e..4e92396ebf3 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -3722,14 +3722,6 @@ static void visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins case nir_intrinsic_load_push_constant: result = visit_load_push_constant(ctx, instr); break; - case nir_intrinsic_vulkan_resource_index: { - LLVMValueRef index = get_src(ctx, instr->src[0]); - unsigned desc_set = nir_intrinsic_desc_set(instr); - unsigned binding = nir_intrinsic_binding(instr); - - result = ctx->abi->load_resource(ctx->abi, index, desc_set, binding); - break; - } case nir_intrinsic_store_ssbo: visit_store_ssbo(ctx, instr); break; diff --git a/src/amd/llvm/ac_shader_abi.h b/src/amd/llvm/ac_shader_abi.h index dca3d4a7c6e..205dfc04df5 100644 --- a/src/amd/llvm/ac_shader_abi.h +++ b/src/amd/llvm/ac_shader_abi.h @@ -129,16 +129,6 @@ struct ac_shader_abi { LLVMValueRef index, enum ac_descriptor_type desc_type, bool image, bool write, bool bindless); - /** - * Load a Vulkan-specific resource. - * - * \param index resource index - * \param desc_set descriptor set - * \param binding descriptor set binding - */ - LLVMValueRef (*load_resource)(struct ac_shader_abi *abi, LLVMValueRef index, unsigned desc_set, - unsigned binding); - LLVMValueRef (*load_sample_position)(struct ac_shader_abi *abi, LLVMValueRef sample_id); LLVMValueRef (*load_local_group_size)(struct ac_shader_abi *abi); diff --git a/src/amd/vulkan/meson.build b/src/amd/vulkan/meson.build index 7837f622a9a..a1a29a31154 100644 --- a/src/amd/vulkan/meson.build +++ b/src/amd/vulkan/meson.build @@ -69,6 +69,7 @@ libradv_files = files( 'radv_meta_resolve.c', 'radv_meta_resolve_cs.c', 'radv_meta_resolve_fs.c', + 'radv_nir_apply_pipeline_layout.c', 'radv_nir_lower_ray_queries.c', 'radv_nir_lower_ycbcr_textures.c', 'radv_pass.c', diff --git a/src/amd/vulkan/radv_nir_apply_pipeline_layout.c b/src/amd/vulkan/radv_nir_apply_pipeline_layout.c new file mode 100644 index 00000000000..ba8263c7fb4 --- /dev/null +++ b/src/amd/vulkan/radv_nir_apply_pipeline_layout.c @@ -0,0 +1,252 @@ +/* + * Copyright © 2020 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ +#include "nir.h" +#include "nir_builder.h" +#include "radv_private.h" +#include "radv_shader.h" +#include "radv_shader_args.h" + +typedef struct { + enum chip_class chip_class; + uint32_t address32_hi; + + const struct radv_shader_args *args; + const struct radv_shader_info *info; + const struct radv_pipeline_layout *pipeline_layout; +} apply_layout_state; + +static nir_ssa_def * +get_scalar_arg(nir_builder *b, unsigned size, struct ac_arg arg) +{ + return nir_load_scalar_arg_amd(b, size, .base = arg.arg_index); +} + +static nir_ssa_def * +convert_pointer_to_64_bit(nir_builder *b, apply_layout_state *state, nir_ssa_def *ptr) +{ + return nir_pack_64_2x32_split(b, ptr, nir_imm_int(b, state->address32_hi)); +} + +static nir_ssa_def * +load_desc_ptr(nir_builder *b, apply_layout_state *state, unsigned set) +{ + const struct radv_userdata_locations *user_sgprs_locs = &state->info->user_sgprs_locs; + if (user_sgprs_locs->shader_data[AC_UD_INDIRECT_DESCRIPTOR_SETS].sgpr_idx != -1) { + nir_ssa_def *addr = get_scalar_arg(b, 1, state->args->descriptor_sets[0]); + addr = convert_pointer_to_64_bit(b, state, addr); + return nir_load_smem_amd(b, 1, addr, nir_imm_int(b, set * 4)); + } + + assert(state->args->descriptor_sets[set].used); + return get_scalar_arg(b, 1, state->args->descriptor_sets[set]); +} + +static void +visit_vulkan_resource_index(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin) +{ + unsigned desc_set = nir_intrinsic_desc_set(intrin); + unsigned binding = nir_intrinsic_binding(intrin); + struct radv_descriptor_set_layout *layout = state->pipeline_layout->set[desc_set].layout; + unsigned offset = layout->binding[binding].offset; + unsigned stride; + + nir_ssa_def *set_ptr; + if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || + layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) { + unsigned idx = state->pipeline_layout->set[desc_set].dynamic_offset_start + + layout->binding[binding].dynamic_offset_offset; + set_ptr = get_scalar_arg(b, 1, state->args->ac.push_constants); + offset = state->pipeline_layout->push_constant_size + idx * 16; + stride = 16; + } else { + set_ptr = load_desc_ptr(b, state, desc_set); + stride = layout->binding[binding].size; + } + + nir_ssa_def *binding_ptr = nir_imul_imm(b, intrin->src[0].ssa, stride); + nir_instr_as_alu(binding_ptr->parent_instr)->no_unsigned_wrap = true; + + binding_ptr = nir_iadd_imm(b, binding_ptr, offset); + nir_instr_as_alu(binding_ptr->parent_instr)->no_unsigned_wrap = true; + + if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) { + assert(stride == 16); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_pack_64_2x32_split(b, set_ptr, binding_ptr)); + } else { + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_vec3(b, set_ptr, binding_ptr, nir_imm_int(b, stride))); + } + nir_instr_remove(&intrin->instr); +} + +static void +visit_vulkan_resource_reindex(nir_builder *b, apply_layout_state *state, + nir_intrinsic_instr *intrin) +{ + VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin); + if (desc_type == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) { + nir_ssa_def *set_ptr = nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa); + nir_ssa_def *binding_ptr = nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa); + + nir_ssa_def *index = nir_imul_imm(b, intrin->src[1].ssa, 16); + nir_instr_as_alu(index->parent_instr)->no_unsigned_wrap = true; + + binding_ptr = nir_iadd_nuw(b, binding_ptr, index); + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_pack_64_2x32_split(b, set_ptr, binding_ptr)); + } else { + assert(desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER || + desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + + nir_ssa_def *binding_ptr = nir_channel(b, intrin->src[0].ssa, 1); + nir_ssa_def *stride = nir_channel(b, intrin->src[0].ssa, 2); + + nir_ssa_def *index = nir_imul(b, intrin->src[1].ssa, stride); + nir_instr_as_alu(index->parent_instr)->no_unsigned_wrap = true; + + binding_ptr = nir_iadd_nuw(b, binding_ptr, index); + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_vector_insert_imm(b, intrin->src[0].ssa, binding_ptr, 1)); + } + nir_instr_remove(&intrin->instr); +} + +static void +visit_load_vulkan_descriptor(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin) +{ + if (nir_intrinsic_desc_type(intrin) == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) { + nir_ssa_def *addr = convert_pointer_to_64_bit( + b, state, + nir_iadd(b, nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa), + nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa))); + nir_ssa_def *desc = nir_build_load_global(b, 1, 64, addr, .access = ACCESS_NON_WRITEABLE); + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc); + } else { + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_vector_insert_imm(b, intrin->src[0].ssa, nir_imm_int(b, 0), 2)); + } + nir_instr_remove(&intrin->instr); +} + +static nir_ssa_def * +load_inline_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_ssa_def *rsrc) +{ + uint32_t desc_type = + S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); + if (state->chip_class >= GFX10) { + desc_type |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | + S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1); + } else { + desc_type |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); + } + + return nir_vec4(b, rsrc, nir_imm_int(b, S_008F04_BASE_ADDRESS_HI(state->address32_hi)), + nir_imm_int(b, 0xffffffff), nir_imm_int(b, desc_type)); +} + +static nir_ssa_def * +load_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_ssa_def *rsrc) +{ + nir_binding binding = nir_chase_binding(nir_src_for_ssa(rsrc)); + + /* If binding.success=false, then this is a variable pointer, which we don't support with + * VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT. + */ + if (binding.success) { + struct radv_descriptor_set_layout *layout = + state->pipeline_layout->set[binding.desc_set].layout; + if (layout->binding[binding.binding].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) { + rsrc = nir_iadd(b, nir_channel(b, rsrc, 0), nir_channel(b, rsrc, 1)); + return load_inline_buffer_descriptor(b, state, rsrc); + } + } + + return rsrc; +} + +static void +apply_layout_to_intrin(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin) +{ + b->cursor = nir_before_instr(&intrin->instr); + + nir_ssa_def *rsrc; + switch (intrin->intrinsic) { + case nir_intrinsic_vulkan_resource_index: + visit_vulkan_resource_index(b, state, intrin); + break; + case nir_intrinsic_vulkan_resource_reindex: + visit_vulkan_resource_reindex(b, state, intrin); + break; + case nir_intrinsic_load_vulkan_descriptor: + visit_load_vulkan_descriptor(b, state, intrin); + break; + case nir_intrinsic_load_ubo: + rsrc = load_buffer_descriptor(b, state, intrin->src[0].ssa); + nir_instr_rewrite_src_ssa(&intrin->instr, &intrin->src[0], rsrc); + break; + default: + break; + } +} + +void +radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device, + const struct radv_pipeline_layout *layout, + const struct radv_shader_info *info, + const struct radv_shader_args *args) +{ + apply_layout_state state = { + .chip_class = device->physical_device->rad_info.chip_class, + .address32_hi = device->physical_device->rad_info.address32_hi, + .args = args, + .info = info, + .pipeline_layout = layout, + }; + + nir_builder b; + + nir_foreach_function (function, shader) { + if (!function->impl) + continue; + + nir_builder_init(&b, function->impl); + + /* Iterate in reverse so load_ubo lowering can look at + * the vulkan_resource_index to tell if it's an inline + * ubo. + */ + nir_foreach_block_reverse (block, function->impl) { + nir_foreach_instr_reverse_safe (instr, block) { + if (instr->type == nir_instr_type_intrinsic) + apply_layout_to_intrin(&b, &state, nir_instr_as_intrinsic(instr)); + } + } + + nir_metadata_preserve(function->impl, nir_metadata_block_index | nir_metadata_dominance); + } +} diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 2bb4a9d77c1..91f1f25ae05 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -198,39 +198,6 @@ create_function(struct radv_shader_context *ctx, gl_shader_stage stage, bool has } } -static LLVMValueRef -radv_load_resource(struct ac_shader_abi *abi, LLVMValueRef index, unsigned desc_set, - unsigned binding) -{ - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set]; - struct radv_pipeline_layout *pipeline_layout = ctx->options->layout; - struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout; - unsigned base_offset = layout->binding[binding].offset; - LLVMValueRef offset, stride; - - if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || - layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) { - unsigned idx = pipeline_layout->set[desc_set].dynamic_offset_start + - layout->binding[binding].dynamic_offset_offset; - desc_ptr = ac_get_arg(&ctx->ac, ctx->args->ac.push_constants); - base_offset = pipeline_layout->push_constant_size + 16 * idx; - stride = LLVMConstInt(ctx->ac.i32, 16, false); - } else - stride = LLVMConstInt(ctx->ac.i32, layout->binding[binding].size, false); - - offset = LLVMConstInt(ctx->ac.i32, base_offset, false); - - if (layout->binding[binding].type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) { - offset = ac_build_imad(&ctx->ac, index, stride, offset); - } - - desc_ptr = LLVMBuildPtrToInt(ctx->ac.builder, desc_ptr, ctx->ac.i32, ""); - - LLVMValueRef res[] = {desc_ptr, offset, ctx->ac.i32_0}; - return ac_build_gather_values(&ctx->ac, res, 3); -} - static uint32_t radv_get_sample_pos_offset(uint32_t num_samples) { @@ -2303,7 +2270,6 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, ctx.abi.load_ubo = radv_load_ubo; ctx.abi.load_ssbo = radv_load_ssbo; ctx.abi.load_sampler_desc = radv_get_sampler_desc; - ctx.abi.load_resource = radv_load_resource; ctx.abi.load_ring_tess_factors = load_ring_tess_factors; ctx.abi.load_ring_tess_offchip = load_ring_tess_offchip; ctx.abi.load_ring_esgs = load_ring_esgs; diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 2bc5e66de11..14ef5e160e5 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -4067,8 +4067,6 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout } NIR_PASS_V(nir[i], nir_lower_memory_model); - bool lower_to_scalar = false; - nir_load_store_vectorize_options vectorize_opts = { .modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_push_const | nir_var_mem_shared | nir_var_mem_global, @@ -4084,16 +4082,26 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout if (nir_opt_load_store_vectorize(nir[i], &vectorize_opts)) { NIR_PASS_V(nir[i], nir_copy_prop); nir_opt_shrink_stores(nir[i], !device->instance->disable_shrink_image_store); - lower_to_scalar = true; /* Gather info again, to update whether 8/16-bit are used. */ nir_shader_gather_info(nir[i], nir_shader_get_entrypoint(nir[i])); } + struct radv_shader_info *info = &infos[i]; + if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) { + if (i == MESA_SHADER_VERTEX && nir[MESA_SHADER_TESS_CTRL]) + info = &infos[MESA_SHADER_TESS_CTRL]; + else if (i == MESA_SHADER_VERTEX && nir[MESA_SHADER_GEOMETRY]) + info = &infos[MESA_SHADER_GEOMETRY]; + else if (i == MESA_SHADER_TESS_EVAL && nir[MESA_SHADER_GEOMETRY]) + info = &infos[MESA_SHADER_GEOMETRY]; + } + NIR_PASS_V(nir[i], radv_nir_apply_pipeline_layout, device, pipeline_layout, info, + &args[i]); + nir_opt_shrink_vectors(nir[i]); - if (lower_to_scalar) - nir_lower_alu_to_scalar(nir[i], NULL, NULL); + nir_lower_alu_to_scalar(nir[i], NULL, NULL); /* lower ALU operations */ nir_lower_int64(nir[i]); diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index eab1485cf87..d3532aec404 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -290,13 +290,6 @@ radv_compiler_debug(void *private_data, enum radv_compiler_debug_level level, co &debug_data->module->base, 0, 0, "radv", message); } -static nir_ssa_def * -convert_pointer_to_64(nir_builder *b, const struct radv_physical_device *pdev, nir_ssa_def *ptr) -{ - nir_ssa_def *comp[] = {ptr, nir_imm_int(b, pdev->rad_info.address32_hi)}; - return nir_pack_64_2x32(b, nir_vec(b, comp, 2)); -} - static bool lower_intrinsics(nir_shader *nir, const struct radv_pipeline_key *key, const struct radv_pipeline_layout *layout, const struct radv_physical_device *pdev) @@ -317,44 +310,6 @@ lower_intrinsics(nir_shader *nir, const struct radv_pipeline_key *key, nir_ssa_def *def = NULL; switch (intrin->intrinsic) { - case nir_intrinsic_load_vulkan_descriptor: - if (nir_intrinsic_desc_type(intrin) == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) { - nir_ssa_def *addr = - convert_pointer_to_64(&b, pdev, - nir_iadd(&b, nir_channel(&b, intrin->src[0].ssa, 0), - nir_channel(&b, intrin->src[0].ssa, 1))); - - def = nir_build_load_global(&b, 1, 64, addr, .access = ACCESS_NON_WRITEABLE); - } else { - def = nir_vector_insert_imm(&b, intrin->src[0].ssa, nir_imm_int(&b, 0), 2); - } - break; - case nir_intrinsic_vulkan_resource_index: { - unsigned desc_set = nir_intrinsic_desc_set(intrin); - unsigned binding = nir_intrinsic_binding(intrin); - struct radv_descriptor_set_layout *desc_layout = layout->set[desc_set].layout; - - nir_ssa_def *new_res = nir_vulkan_resource_index( - &b, 3, 32, intrin->src[0].ssa, .desc_set = desc_set, .binding = binding, - .desc_type = nir_intrinsic_desc_type(intrin)); - - nir_ssa_def *stride; - if (desc_layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || - desc_layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) { - stride = nir_imm_int(&b, 16); - } else { - stride = nir_imm_int(&b, desc_layout->binding[binding].size); - } - def = nir_vector_insert_imm(&b, new_res, stride, 2); - break; - } - case nir_intrinsic_vulkan_resource_reindex: { - nir_ssa_def *binding_ptr = nir_channel(&b, intrin->src[0].ssa, 1); - nir_ssa_def *stride = nir_channel(&b, intrin->src[0].ssa, 2); - binding_ptr = nir_iadd(&b, binding_ptr, nir_imul(&b, intrin->src[1].ssa, stride)); - def = nir_vector_insert_imm(&b, intrin->src[0].ssa, binding_ptr, 1); - break; - } case nir_intrinsic_is_sparse_texels_resident: def = nir_ieq_imm(&b, intrin->src[0].ssa, 0); break; diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index afb487980af..bb6e1332728 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -510,6 +510,11 @@ bool radv_nir_lower_ycbcr_textures(nir_shader *shader, const struct radv_pipelin bool radv_nir_lower_ray_queries(nir_shader *shader, struct radv_device *device); +void radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device, + const struct radv_pipeline_layout *layout, + const struct radv_shader_info *info, + const struct radv_shader_args *args); + nir_shader *radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *module, const char *entrypoint_name, gl_shader_stage stage, const VkSpecializationInfo *spec_info,