mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 13:00:09 +01:00
aco: add a subdword lowering pass
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28836>
This commit is contained in:
parent
6b35de971c
commit
47566d0df3
5 changed files with 330 additions and 3 deletions
|
|
@ -17,8 +17,6 @@ dEQP-VK.pipeline.pipeline_library.input_attribute_offset.vec4.offset_12.packed.n
|
|||
dEQP-VK.pipeline.pipeline_library.input_attribute_offset.vec4.offset_12.packed.with_memory_offset.static,Fail
|
||||
dEQP-VK.pipeline.pipeline_library.input_attribute_offset.vec4.offset_4.packed.no_memory_offset.static,Fail
|
||||
dEQP-VK.pipeline.pipeline_library.input_attribute_offset.vec4.offset_4.packed.with_memory_offset.static,Fail
|
||||
dEQP-VK.spirv_assembly.type.vec4.i8.mod_geom,Fail
|
||||
dEQP-VK.spirv_assembly.type.vec4.i8.rem_geom,Crash
|
||||
|
||||
# New CTS failures in 1.3.8.0
|
||||
dEQP-VK.api.get_device_proc_addr.non_enabled,Fail
|
||||
|
|
|
|||
|
|
@ -104,6 +104,10 @@ aco_postprocess_shader(const struct aco_compiler_options* options,
|
|||
if (!info->is_trap_handler_shader) {
|
||||
dominator_tree(program.get());
|
||||
lower_phis(program.get());
|
||||
|
||||
if (program->gfx_level <= GFX7)
|
||||
lower_subdword(program.get());
|
||||
|
||||
validate(program.get());
|
||||
|
||||
/* Optimization */
|
||||
|
|
|
|||
|
|
@ -659,7 +659,8 @@ public:
|
|||
constexpr void setTemp(Temp t) noexcept
|
||||
{
|
||||
assert(!isConstant_);
|
||||
isTemp_ = true;
|
||||
if (t.id() != 0)
|
||||
isTemp_ = true;
|
||||
data_.temp = t;
|
||||
}
|
||||
|
||||
|
|
@ -2113,6 +2114,7 @@ void select_ps_prolog(Program* program, void* pinfo, ac_shader_config* config,
|
|||
const struct aco_shader_info* info, const struct ac_shader_args* args);
|
||||
|
||||
void lower_phis(Program* program);
|
||||
void lower_subdword(Program* program);
|
||||
void calc_min_waves(Program* program);
|
||||
void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand);
|
||||
live live_var_analysis(Program* program);
|
||||
|
|
|
|||
322
src/amd/compiler/aco_lower_subdword.cpp
Normal file
322
src/amd/compiler/aco_lower_subdword.cpp
Normal file
|
|
@ -0,0 +1,322 @@
|
|||
/*
|
||||
* Copyright © 2024 Valve Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "aco_builder.h"
|
||||
#include "aco_ir.h"
|
||||
|
||||
namespace aco {
|
||||
|
||||
namespace {
|
||||
|
||||
Temp
|
||||
dword_temp(Temp tmp)
|
||||
{
|
||||
if (!tmp.regClass().is_subdword())
|
||||
return tmp;
|
||||
|
||||
RegClass rc = RegClass(tmp.type(), tmp.size());
|
||||
if (tmp.regClass().is_linear())
|
||||
rc = rc.as_linear();
|
||||
return Temp(tmp.id(), rc);
|
||||
}
|
||||
|
||||
Definition
|
||||
dword_def(Program* program, Definition def)
|
||||
{
|
||||
def.setTemp(dword_temp(def.getTemp()));
|
||||
|
||||
if (def.isTemp())
|
||||
program->temp_rc[def.tempId()] = def.regClass();
|
||||
|
||||
return def;
|
||||
}
|
||||
|
||||
Operand
|
||||
dword_op(Operand op, bool convert_const)
|
||||
{
|
||||
if (op.isTemp() || op.isUndefined())
|
||||
op.setTemp(dword_temp(op.getTemp()));
|
||||
else if (convert_const && op.isConstant() && op.bytes() < 4)
|
||||
op = Operand::c32(op.constantValue());
|
||||
return op;
|
||||
}
|
||||
|
||||
struct op_info {
|
||||
Operand op;
|
||||
unsigned offset; /* byte offset into op. */
|
||||
unsigned bytes; /* how many bytes to use after offset. */
|
||||
};
|
||||
|
||||
void
|
||||
emit_pack(Builder& bld, Definition def, std::vector<op_info> operands)
|
||||
{
|
||||
assert(def.regClass().type() == RegType::vgpr);
|
||||
|
||||
/* split definition into dwords. */
|
||||
if (def.size() > 1) {
|
||||
aco_ptr<Instruction> vec{
|
||||
create_instruction(aco_opcode::p_create_vector, Format::PSEUDO, def.size(), 1)};
|
||||
vec->definitions[0] = def;
|
||||
|
||||
unsigned op_idx = 0;
|
||||
for (unsigned i = 0; i < def.size(); i++) {
|
||||
std::vector<op_info> sub_operands;
|
||||
Definition sub_def = bld.def(v1);
|
||||
vec->operands[i] = Operand(sub_def.getTemp());
|
||||
unsigned sub_bytes = 0;
|
||||
while (sub_bytes < 4) {
|
||||
unsigned new_bytes = MIN2(operands[op_idx].bytes, 4 - sub_bytes);
|
||||
sub_bytes += new_bytes;
|
||||
|
||||
sub_operands.push_back({operands[op_idx].op, operands[op_idx].offset, new_bytes});
|
||||
|
||||
if (new_bytes == operands[op_idx].bytes) {
|
||||
op_idx++;
|
||||
if (op_idx >= operands.size())
|
||||
break;
|
||||
} else {
|
||||
operands[op_idx].offset += new_bytes;
|
||||
operands[op_idx].bytes -= new_bytes;
|
||||
}
|
||||
}
|
||||
|
||||
emit_pack(bld, sub_def, std::move(sub_operands));
|
||||
}
|
||||
|
||||
bld.insert(std::move(vec));
|
||||
return;
|
||||
}
|
||||
|
||||
/* split operands into dwords. */
|
||||
for (unsigned i = 0; i < operands.size(); i++) {
|
||||
Operand op = operands[i].op;
|
||||
unsigned offset = operands[i].offset;
|
||||
unsigned bytes = operands[i].bytes;
|
||||
|
||||
if (op.isUndefined() || op.isConstant()) {
|
||||
if (op.isConstant())
|
||||
operands[i].op = Operand::c32(op.constantValue64() >> (offset * 8));
|
||||
else
|
||||
operands[i].op = Operand(v1);
|
||||
operands[i].offset = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (op.size() == 1)
|
||||
continue;
|
||||
|
||||
assert(!op.isFixed());
|
||||
|
||||
RegClass rc = op.isOfType(RegType::vgpr) ? v1 : s1;
|
||||
|
||||
aco_ptr<Instruction> split{
|
||||
create_instruction(aco_opcode::p_split_vector, Format::PSEUDO, 1, op.size())};
|
||||
split->operands[0] = op;
|
||||
for (unsigned j = 0; j < op.size(); j++)
|
||||
split->definitions[j] = bld.def(rc);
|
||||
|
||||
unsigned dword_off = offset / 4;
|
||||
unsigned new_bytes = MIN2(4 - (offset % 4), bytes);
|
||||
operands[i].op = Operand(split->definitions[dword_off++].getTemp());
|
||||
operands[i].offset = offset % 4;
|
||||
operands[i].bytes = new_bytes;
|
||||
if (new_bytes != bytes) {
|
||||
i++;
|
||||
operands.insert(
|
||||
std::next(operands.begin(), i),
|
||||
{Operand(split->definitions[dword_off++].getTemp()), 0, bytes - new_bytes});
|
||||
}
|
||||
|
||||
bld.insert(std::move(split));
|
||||
}
|
||||
|
||||
/* remove undef operands */
|
||||
for (unsigned i = 0; i < operands.size(); i++) {
|
||||
Operand op = operands[i].op;
|
||||
unsigned bytes = operands[i].bytes;
|
||||
if (!op.isUndefined())
|
||||
continue;
|
||||
|
||||
if (i != operands.size() - 1) {
|
||||
unsigned offset = operands[i + 1].offset;
|
||||
operands[i + 1].offset -= MIN2(offset, bytes);
|
||||
bytes -= MIN2(offset, bytes);
|
||||
}
|
||||
|
||||
if (i != 0) {
|
||||
unsigned rem = 4 - (operands[i - 1].bytes + operands[i - 1].offset);
|
||||
operands[i - 1].bytes += MIN2(rem, bytes);
|
||||
bytes -= MIN2(rem, bytes);
|
||||
}
|
||||
|
||||
if (bytes == 0) {
|
||||
operands.erase(std::next(operands.begin(), i));
|
||||
i--;
|
||||
} else {
|
||||
operands[i].op = Operand::c32(0);
|
||||
operands[i].bytes = bytes;
|
||||
}
|
||||
}
|
||||
|
||||
/* combine constant operands */
|
||||
for (unsigned i = 1; i < operands.size(); i++) {
|
||||
if (!operands[i].op.isConstant())
|
||||
continue;
|
||||
assert(operands[i].offset == 0);
|
||||
|
||||
if (!operands[i - 1].op.isConstant())
|
||||
continue;
|
||||
|
||||
unsigned bytes = operands[i - 1].bytes;
|
||||
uint32_t prev = operands[i - 1].op.constantValue() & BITFIELD_MASK(bytes * 8);
|
||||
uint32_t current = operands[i].op.constantValue() << (bytes * 8);
|
||||
|
||||
operands[i - 1].op = Operand::c32(prev | current);
|
||||
operands[i - 1].bytes += operands[i].bytes;
|
||||
operands.erase(std::next(operands.begin(), i));
|
||||
i--;
|
||||
}
|
||||
|
||||
if (operands.size() == 1) {
|
||||
Operand op = operands[0].op;
|
||||
unsigned offset = operands[0].offset;
|
||||
if (offset != 0) {
|
||||
if (op.isOfType(RegType::vgpr))
|
||||
bld.vop2(aco_opcode::v_lshrrev_b32, def, Operand::c32(offset * 8), op);
|
||||
else
|
||||
bld.vop2_e64(aco_opcode::v_lshrrev_b32, def, Operand::c32(offset * 8), op);
|
||||
} else {
|
||||
bld.copy(def, op);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
Operand curr = operands[0].op;
|
||||
unsigned shift = (4 - (operands[0].bytes + operands[0].offset)) * 8;
|
||||
if (shift != 0) {
|
||||
if (curr.isConstant())
|
||||
curr = Operand::c32(curr.constantValue() << shift);
|
||||
else if (curr.isOfType(RegType::vgpr))
|
||||
curr = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(shift), curr);
|
||||
else
|
||||
curr = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), curr,
|
||||
Operand::c32(shift));
|
||||
}
|
||||
|
||||
if (curr.isLiteral())
|
||||
curr = bld.copy(bld.def(s1), curr);
|
||||
|
||||
unsigned packed_bytes = operands[0].bytes;
|
||||
for (unsigned i = 1; i < operands.size(); i++) {
|
||||
Operand op = operands[i].op;
|
||||
unsigned offset = operands[i].offset;
|
||||
|
||||
if (offset) {
|
||||
if (op.isOfType(RegType::vgpr))
|
||||
op = bld.vop2(aco_opcode::v_lshrrev_b32, bld.def(v1), Operand::c32(offset * 8), op);
|
||||
else
|
||||
op = bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc), op,
|
||||
Operand::c32(offset * 8));
|
||||
}
|
||||
|
||||
if (curr.isOfType(RegType::sgpr) && (op.isOfType(RegType::sgpr) || op.isLiteral()))
|
||||
op = bld.copy(bld.def(v1), op);
|
||||
else if (op.isLiteral())
|
||||
op = bld.copy(bld.def(s1), op);
|
||||
|
||||
Definition next = i + 1 == operands.size() ? def : bld.def(v1);
|
||||
unsigned bytes = i + 1 == operands.size() ? 4 - packed_bytes : operands[i].bytes;
|
||||
curr = bld.vop3(aco_opcode::v_alignbyte_b32, next, op, curr, Operand::c32(bytes));
|
||||
packed_bytes += bytes;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
emit_split_vector(Builder& bld, aco_ptr<Instruction>& instr)
|
||||
{
|
||||
bool needs_lowering = false;
|
||||
for (Definition& def : instr->definitions)
|
||||
needs_lowering |= def.regClass().is_subdword();
|
||||
|
||||
if (!needs_lowering) {
|
||||
bld.insert(std::move(instr));
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<op_info> operands = {{dword_op(instr->operands[0], true), 0, 0}};
|
||||
for (Definition& def : instr->definitions) {
|
||||
operands[0].bytes = def.bytes();
|
||||
emit_pack(bld, dword_def(bld.program, def), operands);
|
||||
operands[0].offset += def.bytes();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
emit_create_vector(Builder& bld, aco_ptr<Instruction>& instr)
|
||||
{
|
||||
instr->definitions[0] = dword_def(bld.program, instr->definitions[0]);
|
||||
bool needs_lowering = false;
|
||||
for (Operand& op : instr->operands)
|
||||
needs_lowering |= (op.hasRegClass() && op.regClass().is_subdword()) || op.bytes() < 4;
|
||||
|
||||
if (!needs_lowering) {
|
||||
bld.insert(std::move(instr));
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<op_info> operands;
|
||||
operands.reserve(instr->operands.size());
|
||||
for (const Operand& op : instr->operands)
|
||||
operands.push_back({dword_op(op, true), 0, op.bytes()});
|
||||
|
||||
emit_pack(bld, instr->definitions[0], std::move(operands));
|
||||
}
|
||||
|
||||
void
|
||||
process_block(Program* program, Block* block)
|
||||
{
|
||||
std::vector<aco_ptr<Instruction>> instructions;
|
||||
instructions.reserve(block->instructions.size());
|
||||
|
||||
Builder bld(program, &instructions);
|
||||
for (unsigned idx = 0; idx < block->instructions.size(); idx++) {
|
||||
aco_ptr<Instruction> instr = std::move(block->instructions[idx]);
|
||||
|
||||
if (instr->opcode == aco_opcode::p_split_vector) {
|
||||
emit_split_vector(bld, instr);
|
||||
} else if (instr->opcode == aco_opcode::p_create_vector) {
|
||||
emit_create_vector(bld, instr);
|
||||
} else if (instr->opcode == aco_opcode::p_extract_vector &&
|
||||
instr->definitions[0].regClass().is_subdword()) {
|
||||
const Definition& def = instr->definitions[0];
|
||||
unsigned offset = def.bytes() * instr->operands[1].constantValue();
|
||||
std::vector<op_info> operands = {
|
||||
{dword_op(instr->operands[0], true), offset, def.bytes()}};
|
||||
emit_pack(bld, dword_def(program, def), std::move(operands));
|
||||
} else {
|
||||
for (Definition& def : instr->definitions)
|
||||
def = dword_def(program, def);
|
||||
|
||||
for (Operand& op : instr->operands)
|
||||
op = dword_op(op, instr->isPseudo());
|
||||
|
||||
bld.insert(std::move(instr));
|
||||
}
|
||||
}
|
||||
|
||||
block->instructions = std::move(instructions);
|
||||
}
|
||||
|
||||
} /* end namespace */
|
||||
|
||||
void
|
||||
lower_subdword(Program* program)
|
||||
{
|
||||
for (Block& block : program->blocks)
|
||||
process_block(program, &block);
|
||||
}
|
||||
|
||||
} /* end namespace aco */
|
||||
|
|
@ -50,6 +50,7 @@ libaco_files = files(
|
|||
'aco_register_allocation.cpp',
|
||||
'aco_live_var_analysis.cpp',
|
||||
'aco_lower_phis.cpp',
|
||||
'aco_lower_subdword.cpp',
|
||||
'aco_lower_to_cssa.cpp',
|
||||
'aco_lower_to_hw_instr.cpp',
|
||||
'aco_optimizer.cpp',
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue