mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 00:58:05 +02:00
aco: Split opcodes for GFX6 and GFX10 emulated bpermute.
Different sequences are emitted for these, so it makes sense to have different opcodes too. Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20293>
This commit is contained in:
parent
614348f28b
commit
640e801651
4 changed files with 20 additions and 13 deletions
|
|
@ -199,8 +199,8 @@ emit_bpermute(isel_context* ctx, Builder& bld, Temp index, Temp data)
|
|||
index_op.setLateKill(true);
|
||||
input_data.setLateKill(true);
|
||||
|
||||
return bld.pseudo(aco_opcode::p_bpermute, bld.def(v1), bld.def(bld.lm), bld.def(bld.lm, vcc),
|
||||
index_op, input_data);
|
||||
return bld.pseudo(aco_opcode::p_bpermute_gfx6, bld.def(v1), bld.def(bld.lm),
|
||||
bld.def(bld.lm, vcc), index_op, input_data);
|
||||
} else if (ctx->options->gfx_level >= GFX10 && ctx->program->wave_size == 64) {
|
||||
|
||||
/* GFX10 wave64 mode: emulate full-wave bpermute */
|
||||
|
|
@ -223,7 +223,7 @@ emit_bpermute(isel_context* ctx, Builder& bld, Temp index, Temp data)
|
|||
* Note, that these have twice the allocation granularity of normal VGPRs */
|
||||
ctx->program->config->num_shared_vgprs = 2 * ctx->program->dev.vgpr_alloc_granule;
|
||||
|
||||
return bld.pseudo(aco_opcode::p_bpermute, bld.def(v1), bld.def(s2), bld.def(s1, scc),
|
||||
return bld.pseudo(aco_opcode::p_bpermute_gfx10w64, bld.def(v1), bld.def(s2), bld.def(s1, scc),
|
||||
index_x4, input_data, same_half);
|
||||
} else {
|
||||
/* GFX8-9 or GFX10 wave32: bpermute works normally */
|
||||
|
|
|
|||
|
|
@ -2193,13 +2193,12 @@ lower_to_hw_instr(Program* program)
|
|||
}
|
||||
break;
|
||||
}
|
||||
case aco_opcode::p_bpermute: {
|
||||
if (ctx.program->gfx_level <= GFX7)
|
||||
emit_gfx6_bpermute(program, instr, bld);
|
||||
else if (ctx.program->gfx_level >= GFX10 && ctx.program->wave_size == 64)
|
||||
emit_gfx10_wave64_bpermute(program, instr, bld);
|
||||
else
|
||||
unreachable("Current hardware supports ds_bpermute, don't emit p_bpermute.");
|
||||
case aco_opcode::p_bpermute_gfx6: {
|
||||
emit_gfx6_bpermute(program, instr, bld);
|
||||
break;
|
||||
}
|
||||
case aco_opcode::p_bpermute_gfx10w64: {
|
||||
emit_gfx10_wave64_bpermute(program, instr, bld);
|
||||
break;
|
||||
}
|
||||
case aco_opcode::p_constaddr: {
|
||||
|
|
|
|||
|
|
@ -315,8 +315,15 @@ opcode("p_demote_to_helper")
|
|||
opcode("p_is_helper")
|
||||
opcode("p_exit_early_if")
|
||||
|
||||
# simulates proper bpermute behavior when it's unsupported, eg. GFX10 wave64
|
||||
opcode("p_bpermute")
|
||||
# simulates proper bpermute behavior on GFX6
|
||||
# definitions: result VGPR, temp EXEC, clobbered VCC
|
||||
# operands: index, input data
|
||||
opcode("p_bpermute_gfx6")
|
||||
|
||||
# simulates proper bpermute behavior on GFX10
|
||||
# definitions: result VGPR, temp EXEC, clobbered SCC
|
||||
# operands: index * 4, input data, same half (bool)
|
||||
opcode("p_bpermute_gfx10w64")
|
||||
|
||||
# creates a lane mask where only the first active lane is selected
|
||||
opcode("p_elect")
|
||||
|
|
|
|||
|
|
@ -673,7 +673,8 @@ alu_can_accept_constant(aco_opcode opcode, unsigned operand)
|
|||
case aco_opcode::v_readfirstlane_b32:
|
||||
case aco_opcode::p_extract:
|
||||
case aco_opcode::p_insert: return operand != 0;
|
||||
case aco_opcode::p_bpermute:
|
||||
case aco_opcode::p_bpermute_gfx6:
|
||||
case aco_opcode::p_bpermute_gfx10w64:
|
||||
case aco_opcode::p_interp_gfx11:
|
||||
case aco_opcode::p_dual_src_export_gfx11: return false;
|
||||
default: return true;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue