diff --git a/src/amd/compiler/instruction_selection/aco_isel_setup.cpp b/src/amd/compiler/instruction_selection/aco_isel_setup.cpp index a0b3a2a7f1d..24052cd4c3b 100644 --- a/src/amd/compiler/instruction_selection/aco_isel_setup.cpp +++ b/src/amd/compiler/instruction_selection/aco_isel_setup.cpp @@ -340,6 +340,18 @@ skip_uniformize_merge_phi(nir_def* ssa, unsigned depth) return true; } +bool +intrinsic_try_skip_helpers(nir_intrinsic_instr* intr, UNUSED void* data) +{ + switch (intr->intrinsic) { + case nir_intrinsic_bindless_image_load: + case nir_intrinsic_bindless_image_fragment_mask_load_amd: + case nir_intrinsic_bindless_image_sparse_load: + return !(nir_intrinsic_access(intr) & ACCESS_SMEM_AMD); + default: return false; + } +} + } /* end namespace */ void @@ -370,6 +382,7 @@ init_context(isel_context* ctx, nir_shader* shader) if (shader->info.stage == MESA_SHADER_FRAGMENT) { nir_opt_load_skip_helpers_options skip_helper_options = {}; skip_helper_options.no_add_divergence = true; + skip_helper_options.intrinsic_cb = intrinsic_try_skip_helpers; nir_opt_load_skip_helpers(shader, &skip_helper_options); } diff --git a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp index 73f8bd4b4bd..12d686fca67 100644 --- a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp @@ -1821,6 +1821,9 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr) Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); + enum gl_access_qualifier access = nir_intrinsic_access(instr); + bool disable_wqm = access & ACCESS_SKIP_HELPERS; + if (dim == GLSL_SAMPLER_DIM_BUF) { Temp vindex = emit_extract_vector(ctx, get_ssa_temp(ctx, instr->src[1].ssa), 0, v1); @@ -1842,17 +1845,19 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr) default: UNREACHABLE(">4 channel buffer image load"); } } - aco_ptr load{create_instruction(opcode, Format::MUBUF, 3 + is_sparse, 1)}; + aco_ptr load{ + create_instruction(opcode, Format::MUBUF, 3 + is_sparse + 2 * disable_wqm, 1)}; load->operands[0] = Operand(resource); load->operands[1] = Operand(vindex); load->operands[2] = Operand::c32(0); load->definitions[0] = Definition(tmp); load->mubuf().idxen = true; - load->mubuf().cache = get_cache_flags(ctx, nir_intrinsic_access(instr), ac_access_type_load); + load->mubuf().cache = get_cache_flags(ctx, access, ac_access_type_load); load->mubuf().sync = sync; load->mubuf().tfe = is_sparse; if (load->mubuf().tfe) load->operands[3] = emit_tfe_init(bld, tmp); + init_disable_wqm(bld, load->mubuf(), disable_wqm); ctx->block->instructions.emplace_back(std::move(load)); } else { std::vector coords = get_image_coords(ctx, instr); @@ -1867,8 +1872,8 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr) Operand vdata = is_sparse ? emit_tfe_init(bld, tmp) : Operand(v1); MIMG_instruction* load = - emit_mimg(bld, opcode, {tmp}, resource, Operand(s4), coords, false, vdata); - load->cache = get_cache_flags(ctx, nir_intrinsic_access(instr), ac_access_type_load); + emit_mimg(bld, opcode, {tmp}, resource, Operand(s4), coords, disable_wqm, vdata); + load->cache = get_cache_flags(ctx, access, ac_access_type_load); load->a16 = instr->src[1].ssa->bit_size == 16; load->d16 = d16; load->dmask = dmask;