mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 07:28:11 +02:00
aco: don't DCE atomics with return values
We don't create atomics with definitions if they are not used in NIR, but
our own DCE can remove the uses if an export turns out to be null.
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Fixes: 93c8ebfa78 ('aco: Initial commit of independent AMD compiler')
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3081>
This commit is contained in:
parent
8f291dc146
commit
69bed1c918
5 changed files with 26 additions and 19 deletions
|
|
@ -57,11 +57,7 @@ void process_block(dce_ctx& ctx, Block& block)
|
|||
continue;
|
||||
|
||||
aco_ptr<Instruction>& instr = block.instructions[idx];
|
||||
const bool is_live = instr->definitions.empty() ||
|
||||
std::any_of(instr->definitions.begin(), instr->definitions.end(),
|
||||
[&ctx] (const Definition& def) { return !def.isTemp() || ctx.uses[def.tempId()];});
|
||||
|
||||
if (is_live) {
|
||||
if (!is_dead(ctx.uses, instr.get())) {
|
||||
for (const Operand& op : instr->operands) {
|
||||
if (op.isTemp()) {
|
||||
if (ctx.uses[op.tempId()] == 0)
|
||||
|
|
@ -81,6 +77,16 @@ void process_block(dce_ctx& ctx, Block& block)
|
|||
|
||||
} /* end namespace */
|
||||
|
||||
bool is_dead(const std::vector<uint16_t>& uses, Instruction *instr)
|
||||
{
|
||||
if (instr->definitions.empty())
|
||||
return false;
|
||||
if (std::any_of(instr->definitions.begin(), instr->definitions.end(),
|
||||
[&uses] (const Definition& def) { return uses[def.tempId()];}))
|
||||
return false;
|
||||
return instr_info.is_atomic[(int)instr->opcode];
|
||||
}
|
||||
|
||||
std::vector<uint16_t> dead_code_analysis(Program *program) {
|
||||
|
||||
dce_ctx ctx(program);
|
||||
|
|
|
|||
|
|
@ -981,6 +981,8 @@ constexpr barrier_interaction get_barrier_interaction(Instruction* instr)
|
|||
}
|
||||
}
|
||||
|
||||
bool is_dead(const std::vector<uint16_t>& uses, Instruction *instr);
|
||||
|
||||
enum block_kind {
|
||||
/* uniform indicates that leaving this block,
|
||||
* all actives lanes stay active */
|
||||
|
|
@ -1267,6 +1269,7 @@ typedef struct {
|
|||
const int16_t opcode_gfx10[static_cast<int>(aco_opcode::num_opcodes)];
|
||||
const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_input_modifiers;
|
||||
const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_output_modifiers;
|
||||
const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> is_atomic;
|
||||
const char *name[static_cast<int>(aco_opcode::num_opcodes)];
|
||||
const aco::Format format[static_cast<int>(aco_opcode::num_opcodes)];
|
||||
} Info;
|
||||
|
|
|
|||
|
|
@ -155,7 +155,7 @@ class Opcode(object):
|
|||
"""Class that represents all the information we have about the opcode
|
||||
NOTE: this must be kept in sync with aco_op_info
|
||||
"""
|
||||
def __init__(self, name, opcode_gfx7, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod):
|
||||
def __init__(self, name, opcode_gfx7, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod, is_atomic):
|
||||
"""Parameters:
|
||||
|
||||
- name is the name of the opcode (prepend nir_op_ for the enum name)
|
||||
|
|
@ -180,15 +180,16 @@ class Opcode(object):
|
|||
self.opcode_gfx10 = opcode_gfx10
|
||||
self.input_mod = "1" if input_mod else "0"
|
||||
self.output_mod = "1" if output_mod else "0"
|
||||
self.is_atomic = "1" if is_atomic else "0"
|
||||
self.format = format
|
||||
|
||||
|
||||
# global dictionary of opcodes
|
||||
opcodes = {}
|
||||
|
||||
def opcode(name, opcode_gfx7 = -1, opcode_gfx9 = -1, opcode_gfx10 = -1, format = Format.PSEUDO, input_mod = False, output_mod = False):
|
||||
def opcode(name, opcode_gfx7 = -1, opcode_gfx9 = -1, opcode_gfx10 = -1, format = Format.PSEUDO, input_mod = False, output_mod = False, is_atomic = True):
|
||||
assert name not in opcodes
|
||||
opcodes[name] = Opcode(name, opcode_gfx7, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod)
|
||||
opcodes[name] = Opcode(name, opcode_gfx7, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod, is_atomic)
|
||||
|
||||
opcode("exp", 0, 0, 0, format = Format.EXP)
|
||||
opcode("p_parallelcopy")
|
||||
|
|
@ -584,7 +585,7 @@ SMEM = {
|
|||
( -1, -1, -1, 0xac, 0xac, "s_atomic_dec_x2"),
|
||||
}
|
||||
for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SMEM:
|
||||
opcode(name, gfx7, gfx9, gfx10, Format.SMEM)
|
||||
opcode(name, gfx7, gfx9, gfx10, Format.SMEM, is_atomic = "atomic" not in name)
|
||||
|
||||
|
||||
# VOP2 instructions: 2 inputs, 1 output (+ optional vcc)
|
||||
|
|
@ -1263,7 +1264,7 @@ MUBUF = {
|
|||
( -1, -1, -1, -1, 0x72, "buffer_gl1_inv"),
|
||||
}
|
||||
for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in MUBUF:
|
||||
opcode(name, gfx7, gfx9, gfx10, Format.MUBUF)
|
||||
opcode(name, gfx7, gfx9, gfx10, Format.MUBUF, is_atomic = "atomic" not in name)
|
||||
|
||||
MTBUF = {
|
||||
(0x00, 0x00, 0x00, 0x00, 0x00, "tbuffer_load_format_x"),
|
||||
|
|
@ -1327,7 +1328,7 @@ IMAGE_ATOMIC = {
|
|||
# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (gfx6, gfx7, gfx89, gfx89, ???, name)
|
||||
# gfx7 and gfx10 opcodes are the same here
|
||||
for (gfx6, gfx7, gfx89, name) in IMAGE_ATOMIC:
|
||||
opcode(name, gfx7, gfx89, gfx7, Format.MIMG)
|
||||
opcode(name, gfx7, gfx89, gfx7, Format.MIMG, is_atomic = False)
|
||||
|
||||
IMAGE_SAMPLE = {
|
||||
(0x20, "image_sample"),
|
||||
|
|
@ -1467,7 +1468,7 @@ FLAT = {
|
|||
(0x60, -1, 0x60, "flat_atomic_fmax_x2"),
|
||||
}
|
||||
for (gfx7, gfx8, gfx10, name) in FLAT:
|
||||
opcode(name, gfx7, gfx8, gfx10, Format.FLAT)
|
||||
opcode(name, gfx7, gfx8, gfx10, Format.FLAT, is_atomic = "atomic" not in name)
|
||||
|
||||
GLOBAL = {
|
||||
#GFX8_9, GFX10
|
||||
|
|
@ -1527,7 +1528,7 @@ GLOBAL = {
|
|||
( -1, 0x60, "global_atomic_fmax_x2"),
|
||||
}
|
||||
for (gfx8, gfx10, name) in GLOBAL:
|
||||
opcode(name, -1, gfx8, gfx10, Format.GLOBAL)
|
||||
opcode(name, -1, gfx8, gfx10, Format.GLOBAL, is_atomic = "atomic" not in name)
|
||||
|
||||
SCRATCH = {
|
||||
#GFX8_9, GFX10
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ namespace aco {
|
|||
opcode_names = sorted(opcodes.keys())
|
||||
can_use_input_modifiers = "".join([opcodes[name].input_mod for name in reversed(opcode_names)])
|
||||
can_use_output_modifiers = "".join([opcodes[name].output_mod for name in reversed(opcode_names)])
|
||||
is_atomic = "".join([opcodes[name].is_atomic for name in reversed(opcode_names)])
|
||||
%>
|
||||
|
||||
extern const aco::Info instr_info = {
|
||||
|
|
@ -53,6 +54,7 @@ extern const aco::Info instr_info = {
|
|||
},
|
||||
.can_use_input_modifiers = std::bitset<${len(opcode_names)}>("${can_use_input_modifiers}"),
|
||||
.can_use_output_modifiers = std::bitset<${len(opcode_names)}>("${can_use_output_modifiers}"),
|
||||
.is_atomic = std::bitset<${len(opcode_names)}>("${is_atomic}"),
|
||||
.name = {
|
||||
% for name in opcode_names:
|
||||
"${name}",
|
||||
|
|
|
|||
|
|
@ -2265,12 +2265,7 @@ void select_instruction(opt_ctx &ctx, aco_ptr<Instruction>& instr)
|
|||
{
|
||||
const uint32_t threshold = 4;
|
||||
|
||||
/* Dead Code Elimination:
|
||||
* We remove instructions if they define temporaries which all are unused */
|
||||
const bool is_used = instr->definitions.empty() ||
|
||||
std::any_of(instr->definitions.begin(), instr->definitions.end(),
|
||||
[&ctx](const Definition& def) { return ctx.uses[def.tempId()]; });
|
||||
if (!is_used) {
|
||||
if (is_dead(ctx.uses, instr.get())) {
|
||||
instr.reset();
|
||||
return;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue