From 2f0bb39e1621ab610cd6ca788635c64320917404 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 22 Apr 2022 16:46:21 +0100 Subject: [PATCH] aco: ensure that definitions fixed to operands have matching regclasses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the operand is not killed, the definition needs to be large enough so that the new location for the operand does not intersect with the old location. Fixes with zink: KHR-GL45.shader_image_load_store.basic-allTargets-atomicCS KHR-GL45.shader_image_load_store.basic-allTargets-atomicGS KHR-GL45.shader_image_load_store.basic-allTargets-atomicVS Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Gitlab: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6276 Part-of: --- .../compiler/aco_instruction_selection.cpp | 23 +++++++++++++++---- src/amd/compiler/aco_register_allocation.cpp | 12 +++++++++- .../drivers/zink/ci/zink-radv-fails.txt | 3 --- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 1fe28c82657..540886ba2eb 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -6184,10 +6184,11 @@ visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr) Builder bld(ctx->program, ctx->block); Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[3].ssa)); + bool cmpswap = instr->intrinsic == nir_intrinsic_bindless_image_atomic_comp_swap; bool is_64bit = data.bytes() == 8; assert((data.bytes() == 4 || data.bytes() == 8) && "only 32/64-bit image atomics implemented."); - if (instr->intrinsic == nir_intrinsic_bindless_image_atomic_comp_swap) + if (cmpswap) data = bld.pseudo(aco_opcode::p_create_vector, bld.def(is_64bit ? v4 : v2), get_ssa_temp(ctx, instr->src[4].ssa), data); @@ -6272,8 +6273,10 @@ visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr) mubuf->operands[1] = Operand(vindex); mubuf->operands[2] = Operand::c32(0); mubuf->operands[3] = Operand(data); + Definition def = + return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition(); if (return_previous) - mubuf->definitions[0] = Definition(dst); + mubuf->definitions[0] = def; mubuf->offset = 0; mubuf->idxen = true; mubuf->glc = return_previous; @@ -6282,12 +6285,15 @@ visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr) mubuf->sync = sync; ctx->program->needs_exact = true; ctx->block->instructions.emplace_back(std::move(mubuf)); + if (return_previous && cmpswap) + bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), def.getTemp(), Operand::zero()); return; } std::vector coords = get_image_coords(ctx, instr); Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); - Definition def = return_previous ? Definition(dst) : Definition(); + Definition def = + return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition(); MIMG_instruction* mimg = emit_mimg(bld, image_op, def, resource, Operand(s4), coords, 0, Operand(data)); mimg->glc = return_previous; @@ -6299,6 +6305,8 @@ visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr) mimg->disable_wqm = true; mimg->sync = sync; ctx->program->needs_exact = true; + if (return_previous && cmpswap) + bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), def.getTemp(), Operand::zero()); return; } @@ -6481,8 +6489,9 @@ visit_atomic_ssbo(isel_context* ctx, nir_intrinsic_instr* instr) Builder bld(ctx->program, ctx->block); bool return_previous = !nir_ssa_def_is_unused(&instr->dest.ssa); Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[2].ssa)); + bool cmpswap = instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap; - if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) + if (cmpswap) data = bld.pseudo(aco_opcode::p_create_vector, bld.def(RegType::vgpr, data.size() * 2), get_ssa_temp(ctx, instr->src[3].ssa), data); @@ -6552,8 +6561,10 @@ visit_atomic_ssbo(isel_context* ctx, nir_intrinsic_instr* instr) mubuf->operands[1] = offset.type() == RegType::vgpr ? Operand(offset) : Operand(v1); mubuf->operands[2] = offset.type() == RegType::sgpr ? Operand(offset) : Operand::c32(0); mubuf->operands[3] = Operand(data); + Definition def = + return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition(); if (return_previous) - mubuf->definitions[0] = Definition(dst); + mubuf->definitions[0] = def; mubuf->offset = 0; mubuf->offen = (offset.type() == RegType::vgpr); mubuf->glc = return_previous; @@ -6562,6 +6573,8 @@ visit_atomic_ssbo(isel_context* ctx, nir_intrinsic_instr* instr) mubuf->sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw); ctx->program->needs_exact = true; ctx->block->instructions.emplace_back(std::move(mubuf)); + if (return_previous && cmpswap) + bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), def.getTemp(), Operand::zero()); } void diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 61e384dbdaa..108dfd04b8c 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -2711,7 +2711,12 @@ register_allocation(Program* program, std::vector& live_out_per_block, ra } } - /* handle definitions which must have the same register as an operand */ + /* Handle definitions which must have the same register as an operand. + * We expect that the definition has the same size as the operand, otherwise the new + * location for the operand (if it's not killed) might intersect with the old one. + * We can't read from the old location because it's corrupted, and we can't write the new + * location because that's used by a live-through operand. + */ if (instr->opcode == aco_opcode::v_interp_p2_f32 || instr->opcode == aco_opcode::v_mac_f32 || instr->opcode == aco_opcode::v_fmac_f32 || instr->opcode == aco_opcode::v_mac_f16 || instr->opcode == aco_opcode::v_fmac_f16 || @@ -2721,15 +2726,20 @@ register_allocation(Program* program, std::vector& live_out_per_block, ra instr->opcode == aco_opcode::v_writelane_b32 || instr->opcode == aco_opcode::v_writelane_b32_e64 || instr->opcode == aco_opcode::v_dot4c_i32_i8) { + assert(instr->definitions[0].bytes() == instr->operands[2].bytes() || + instr->operands[2].regClass() == v1); instr->definitions[0].setFixed(instr->operands[2].physReg()); } else if (instr->opcode == aco_opcode::s_addk_i32 || instr->opcode == aco_opcode::s_mulk_i32) { + assert(instr->definitions[0].bytes() == instr->operands[0].bytes()); instr->definitions[0].setFixed(instr->operands[0].physReg()); } else if (instr->isMUBUF() && instr->definitions.size() == 1 && instr->operands.size() == 4) { + assert(instr->definitions[0].bytes() == instr->operands[3].bytes()); instr->definitions[0].setFixed(instr->operands[3].physReg()); } else if (instr->isMIMG() && instr->definitions.size() == 1 && !instr->operands[2].isUndefined()) { + assert(instr->definitions[0].bytes() == instr->operands[2].bytes()); instr->definitions[0].setFixed(instr->operands[2].physReg()); } diff --git a/src/gallium/drivers/zink/ci/zink-radv-fails.txt b/src/gallium/drivers/zink/ci/zink-radv-fails.txt index 33c55926244..fe90384874d 100644 --- a/src/gallium/drivers/zink/ci/zink-radv-fails.txt +++ b/src/gallium/drivers/zink/ci/zink-radv-fails.txt @@ -1,7 +1,4 @@ # probable ACO bug: #6276 -KHR-GL46.shader_image_load_store.basic-allTargets-atomicCS,Fail -KHR-GL46.shader_image_load_store.basic-allTargets-atomicGS,Fail -KHR-GL46.shader_image_load_store.basic-allTargets-atomicVS,Fail KHR-GL46.shader_storage_buffer_object.basic-operations-case1-cs,Fail KHR-GL46.shader_storage_buffer_object.basic-operations-case1-vs,Fail