diff --git a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp index 4785422e563..df3a3336361 100644 --- a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp @@ -1865,13 +1865,12 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr) bool is_sparse = instr->intrinsic == nir_intrinsic_bindless_image_sparse_load; Temp dst = get_ssa_temp(ctx, &instr->def); + assert(dim != GLSL_SAMPLER_DIM_BUF); memory_sync_info sync = get_memory_sync_info(instr, storage_image, 0); unsigned result_size = instr->def.num_components - is_sparse; unsigned expand_mask = nir_def_components_read(&instr->def) & u_bit_consecutive(0, result_size); expand_mask = MAX2(expand_mask, 1); /* this can be zero in the case of sparse image loads */ - if (dim == GLSL_SAMPLER_DIM_BUF) - expand_mask = (1u << util_last_bit(expand_mask)) - 1u; unsigned dmask = expand_mask; if (instr->def.bit_size == 64) { expand_mask &= 0x9; @@ -1897,72 +1896,35 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr) enum gl_access_qualifier access = nir_intrinsic_access(instr); bool disable_wqm = access & ACCESS_SKIP_HELPERS; - if (dim == GLSL_SAMPLER_DIM_BUF) { - Temp vindex = emit_extract_vector(ctx, get_ssa_temp(ctx, instr->src[1].ssa), 0, v1); + std::vector coords = get_image_coords(ctx, instr); - aco_opcode opcode; - if (!d16) { - switch (util_bitcount(dmask)) { - case 1: opcode = aco_opcode::buffer_load_format_x; break; - case 2: opcode = aco_opcode::buffer_load_format_xy; break; - case 3: opcode = aco_opcode::buffer_load_format_xyz; break; - case 4: opcode = aco_opcode::buffer_load_format_xyzw; break; - default: UNREACHABLE(">4 channel buffer image load"); - } - } else { - switch (util_bitcount(dmask)) { - case 1: opcode = aco_opcode::buffer_load_format_d16_x; break; - case 2: opcode = aco_opcode::buffer_load_format_d16_xy; break; - case 3: opcode = aco_opcode::buffer_load_format_d16_xyz; break; - case 4: opcode = aco_opcode::buffer_load_format_d16_xyzw; break; - default: UNREACHABLE(">4 channel buffer image load"); - } - } - aco_ptr load{ - create_instruction(opcode, Format::MUBUF, 3 + is_sparse + 2 * disable_wqm, 1)}; - load->operands[0] = Operand(resource); - load->operands[1] = Operand(vindex); - load->operands[2] = Operand::c32(0); - load->definitions[0] = Definition(tmp); - load->mubuf().idxen = true; - load->mubuf().cache = get_cache_flags(ctx, access, ac_access_type_load); - load->mubuf().sync = sync; - load->mubuf().tfe = is_sparse; - if (load->mubuf().tfe) - load->operands[3] = emit_tfe_init(bld, tmp); - init_disable_wqm(bld, load->mubuf(), disable_wqm); - ctx->block->instructions.emplace_back(std::move(load)); + aco_opcode opcode; + if (instr->intrinsic == nir_intrinsic_bindless_image_fragment_mask_load_amd) { + opcode = aco_opcode::image_load; } else { - std::vector coords = get_image_coords(ctx, instr); + bool level_zero = nir_src_is_const(instr->src[3]) && nir_src_as_uint(instr->src[3]) == 0; + opcode = level_zero ? aco_opcode::image_load : aco_opcode::image_load_mip; + } - aco_opcode opcode; - if (instr->intrinsic == nir_intrinsic_bindless_image_fragment_mask_load_amd) { - opcode = aco_opcode::image_load; - } else { - bool level_zero = nir_src_is_const(instr->src[3]) && nir_src_as_uint(instr->src[3]) == 0; - opcode = level_zero ? aco_opcode::image_load : aco_opcode::image_load_mip; - } + Operand vdata = is_sparse ? emit_tfe_init(bld, tmp) : Operand(v1); + MIMG_instruction* load = + emit_mimg(bld, opcode, {tmp}, resource, Operand(s4), coords, disable_wqm, vdata); + load->cache = get_cache_flags(ctx, access, ac_access_type_load); + load->a16 = instr->src[1].ssa->bit_size == 16; + load->d16 = d16; + load->dmask = dmask; + load->unrm = true; + load->tfe = is_sparse; - Operand vdata = is_sparse ? emit_tfe_init(bld, tmp) : Operand(v1); - MIMG_instruction* load = - emit_mimg(bld, opcode, {tmp}, resource, Operand(s4), coords, disable_wqm, vdata); - load->cache = get_cache_flags(ctx, access, ac_access_type_load); - load->a16 = instr->src[1].ssa->bit_size == 16; - load->d16 = d16; - load->dmask = dmask; - load->unrm = true; - load->tfe = is_sparse; - - if (instr->intrinsic == nir_intrinsic_bindless_image_fragment_mask_load_amd) { - load->dim = is_array ? ac_image_2darray : ac_image_2d; - load->da = is_array; - load->sync = memory_sync_info(); - } else { - ac_image_dim sdim = ac_get_image_dim(ctx->options->gfx_level, dim, is_array); - load->dim = sdim; - load->da = should_declare_array(sdim); - load->sync = sync; - } + if (instr->intrinsic == nir_intrinsic_bindless_image_fragment_mask_load_amd) { + load->dim = is_array ? ac_image_2darray : ac_image_2d; + load->da = is_array; + load->sync = memory_sync_info(); + } else { + ac_image_dim sdim = ac_get_image_dim(ctx->options->gfx_level, dim, is_array); + load->dim = sdim; + load->da = should_declare_array(sdim); + load->sync = sync; } if (is_sparse && instr->def.bit_size == 64) { diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index d701de77f98..5337dc06348 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -2263,26 +2263,9 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, const nir_intri args.access = nir_intrinsic_access(instr); args.tfe = instr->intrinsic == nir_intrinsic_bindless_image_sparse_load; - if (dim == GLSL_SAMPLER_DIM_BUF) { - unsigned num_channels = util_last_bit(nir_def_components_read(&instr->def)); - if (instr->def.bit_size == 64) - num_channels = num_channels < 4 ? 2 : 4; - LLVMValueRef rsrc, vindex; + assert(dim != GLSL_SAMPLER_DIM_BUF); - rsrc = ctx->abi->load_sampler_desc(ctx->abi, dynamic_index, AC_DESC_BUFFER); - vindex = - LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, ""); - - bool can_speculate = access & ACCESS_CAN_REORDER; - res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex, ctx->ac.i32_0, num_channels, - args.access, can_speculate, - instr->def.bit_size == 16, - args.tfe); - res = ac_build_expand(&ctx->ac, res, num_channels, args.tfe ? 5 : 4); - - res = ac_trim_vector(&ctx->ac, res, instr->def.num_components); - res = ac_to_integer(&ctx->ac, res); - } else if (instr->intrinsic == nir_intrinsic_bindless_image_fragment_mask_load_amd) { + if (instr->intrinsic == nir_intrinsic_bindless_image_fragment_mask_load_amd) { assert(ctx->ac.gfx_level < GFX11); args.opcode = ac_image_load;