diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 5258236fd81..d818cabb285 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -219,12 +219,17 @@ emit_bpermute(isel_context* ctx, Builder& bld, Temp index, Temp data) input_data.setLateKill(true); same_half.setLateKill(true); - /* We need one pair of shared VGPRs: - * Note, that these have twice the allocation granularity of normal VGPRs */ - ctx->program->config->num_shared_vgprs = 2 * ctx->program->dev.vgpr_alloc_granule; + if (ctx->options->gfx_level <= GFX10_3) { + /* We need one pair of shared VGPRs: + * Note, that these have twice the allocation granularity of normal VGPRs + */ + ctx->program->config->num_shared_vgprs = 2 * ctx->program->dev.vgpr_alloc_granule; - return bld.pseudo(aco_opcode::p_bpermute_gfx10w64, bld.def(v1), bld.def(s2), bld.def(s1, scc), - index_x4, input_data, same_half); + return bld.pseudo(aco_opcode::p_bpermute_gfx10w64, bld.def(v1), bld.def(s2), + bld.def(s1, scc), index_x4, input_data, same_half); + } else { + unreachable("emit_bpermute does not yet support GFX11+"); + } } else { /* GFX8-9 or GFX10 wave32: bpermute works normally */ Temp index_x4 = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u), index); diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index 3d7f1f31efe..b4fadd88ca7 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -848,7 +848,7 @@ emit_gfx10_wave64_bpermute(Program* program, aco_ptr& instr, Builde * manually swap the data between the two halves using two shared VGPRs. */ - assert(program->gfx_level >= GFX10); + assert(program->gfx_level >= GFX10 && program->gfx_level <= GFX10_3); assert(program->wave_size == 64); unsigned shared_vgpr_reg_0 = align(program->config->num_vgprs, 4) + 256; @@ -907,8 +907,9 @@ emit_gfx10_wave64_bpermute(Program* program, aco_ptr& instr, Builde /* Restore saved EXEC */ bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand(tmp_exec.physReg(), s2)); - /* RA assumes that the result is always in the low part of the register, so we have to shift, if - * it's not there already */ + /* RA assumes that the result is always in the low part of the register, so we have to shift, + * if it's not there already. + */ if (input_data.physReg().byte()) { unsigned right_shift = input_data.physReg().byte() * 8; bld.vop2(aco_opcode::v_lshrrev_b32, dst, Operand::c32(right_shift),