diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 6de9a9eac3a..cc3cd859de0 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -5577,43 +5577,7 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr) Temp dst = get_ssa_temp(ctx, &instr->dest.ssa); nir_src offset = *nir_get_io_offset_src(instr); - if (ctx->shader->info.stage == MESA_SHADER_VERTEX && ctx->program->info.vs.dynamic_inputs) { - if (!nir_src_is_const(offset) || nir_src_as_uint(offset)) - isel_err(offset.ssa->parent_instr, - "Unimplemented non-zero nir_intrinsic_load_input offset"); - - unsigned location = nir_intrinsic_base(instr) - VERT_ATTRIB_GENERIC0; - unsigned bitsize = instr->dest.ssa.bit_size; - unsigned component = nir_intrinsic_component(instr) >> (bitsize == 64 ? 1 : 0); - unsigned num_components = instr->dest.ssa.num_components; - - aco_ptr vec{create_instruction( - aco_opcode::p_create_vector, Format::PSEUDO, num_components, 1)}; - std::array elems; - for (unsigned i = 0; i < num_components; i++) { - if (bitsize == 64) { - Temp input = get_arg(ctx, ctx->args->vs_inputs[location + (component + i) / 2]); - elems[i] = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), - emit_extract_vector(ctx, input, (component + i) * 2 % 4, v1), - emit_extract_vector(ctx, input, (component + i) * 2 % 4 + 1, v1)); - } else { - Temp input = get_arg(ctx, ctx->args->vs_inputs[location]); - elems[i] = emit_extract_vector(ctx, input, component + i, v1); - } - if (bitsize == 16) { - if (nir_alu_type_get_base_type(nir_intrinsic_dest_type(instr)) == nir_type_float) - elems[i] = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), elems[i]); - else - elems[i] = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v2b), elems[i], - Operand::c32(0u)); - } - vec->operands[i] = Operand(elems[i]); - } - vec->definitions[0] = Definition(dst); - ctx->block->instructions.emplace_back(std::move(vec)); - ctx->allocated_vec.emplace(dst.id(), elems); - } else if (ctx->shader->info.stage == MESA_SHADER_VERTEX) { - + if (ctx->shader->info.stage == MESA_SHADER_VERTEX) { if (!nir_src_is_const(offset) || nir_src_as_uint(offset)) isel_err(offset.ssa->parent_instr, "Unimplemented non-zero nir_intrinsic_load_input offset"); diff --git a/src/amd/vulkan/radv_nir_lower_abi.c b/src/amd/vulkan/radv_nir_lower_abi.c index f8737e483e0..dcded40c85b 100644 --- a/src/amd/vulkan/radv_nir_lower_abi.c +++ b/src/amd/vulkan/radv_nir_lower_abi.c @@ -79,7 +79,7 @@ lower_load_vs_input_from_prolog(nir_builder *b, const unsigned driver_location = base + base_offset - VERT_ATTRIB_GENERIC0; const unsigned component = nir_intrinsic_component(intrin); const unsigned bit_size = intrin->dest.ssa.bit_size; - const unsigned num_components = intrin->dest.ssa.num_components + const unsigned num_components = intrin->dest.ssa.num_components; /* 64-bit inputs: they occupy twice as many 32-bit components. * 16-bit inputs: they occupy a 32-bit component (not packed). @@ -95,7 +95,7 @@ lower_load_vs_input_from_prolog(nir_builder *b, input_args[1] = ac_nir_load_arg(b, &s->args->ac, s->args->vs_inputs[driver_location + 1]); } - nir_ssa_def *extracted = nir_extract_bits(b, &input_arg, num_input_args, component * 32, + nir_ssa_def *extracted = nir_extract_bits(b, input_args, num_input_args, component * 32, num_components, arg_bit_size); if (bit_size < arg_bit_size) {