mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 11:30:11 +01:00
aco: lower p_constaddr into separate instructions earlier
This allows them to be scheduled properly and simplifies the assembler a little. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8994>
This commit is contained in:
parent
ab957bb899
commit
0af7ff49fd
5 changed files with 48 additions and 45 deletions
|
|
@ -1,5 +1,6 @@
|
|||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
|
||||
#include "aco_ir.h"
|
||||
#include "aco_builder.h"
|
||||
|
|
@ -10,11 +11,16 @@
|
|||
|
||||
namespace aco {
|
||||
|
||||
struct constaddr_info {
|
||||
unsigned getpc_end;
|
||||
unsigned add_literal;
|
||||
};
|
||||
|
||||
struct asm_context {
|
||||
Program *program;
|
||||
enum chip_class chip_class;
|
||||
std::vector<std::pair<int, SOPP_instruction*>> branches;
|
||||
std::vector<unsigned> constaddrs;
|
||||
std::map<unsigned, constaddr_info> constaddrs;
|
||||
const int16_t* opcode;
|
||||
// TODO: keep track of branch instructions referring blocks
|
||||
// and, when emitting the block, correct the offset in instr
|
||||
|
|
@ -45,39 +51,17 @@ static uint32_t get_sdwa_sel(unsigned sel, PhysReg reg)
|
|||
void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* instr)
|
||||
{
|
||||
/* lower remaining pseudo-instructions */
|
||||
if (instr->opcode == aco_opcode::p_constaddr) {
|
||||
unsigned dest = instr->definitions[0].physReg();
|
||||
unsigned offset = instr->operands[0].constantValue();
|
||||
if (instr->opcode == aco_opcode::p_constaddr_getpc) {
|
||||
ctx.constaddrs[instr->operands[0].constantValue()].getpc_end = out.size() + 1;
|
||||
|
||||
/* s_getpc_b64 dest[0:1] */
|
||||
uint32_t encoding = (0b101111101 << 23);
|
||||
uint32_t opcode = ctx.opcode[(int)aco_opcode::s_getpc_b64];
|
||||
if (opcode >= 55 && ctx.chip_class <= GFX9) {
|
||||
assert(ctx.chip_class == GFX9 && opcode < 60);
|
||||
opcode = opcode - 4;
|
||||
}
|
||||
encoding |= dest << 16;
|
||||
encoding |= opcode << 8;
|
||||
out.push_back(encoding);
|
||||
instr->opcode = aco_opcode::s_getpc_b64;
|
||||
instr->operands.pop_back();
|
||||
} else if (instr->opcode == aco_opcode::p_constaddr_addlo) {
|
||||
ctx.constaddrs[instr->operands[1].constantValue()].add_literal = out.size() + 1;
|
||||
|
||||
/* s_add_u32 dest[0], dest[0], ... */
|
||||
encoding = (0b10 << 30);
|
||||
encoding |= ctx.opcode[(int)aco_opcode::s_add_u32] << 23;
|
||||
encoding |= dest << 16;
|
||||
encoding |= dest;
|
||||
encoding |= 255 << 8;
|
||||
out.push_back(encoding);
|
||||
ctx.constaddrs.push_back(out.size());
|
||||
out.push_back(offset);
|
||||
|
||||
/* s_addc_u32 dest[1], dest[1], 0 */
|
||||
encoding = (0b10 << 30);
|
||||
encoding |= ctx.opcode[(int)aco_opcode::s_addc_u32] << 23;
|
||||
encoding |= (dest + 1) << 16;
|
||||
encoding |= dest + 1;
|
||||
encoding |= 128 << 8;
|
||||
out.push_back(encoding);
|
||||
return;
|
||||
instr->opcode = aco_opcode::s_add_u32;
|
||||
instr->operands[1] = Operand(0u);
|
||||
instr->operands[1].setFixed(PhysReg(255));
|
||||
}
|
||||
|
||||
uint32_t opcode = ctx.opcode[(int)instr->opcode];
|
||||
|
|
@ -798,14 +782,14 @@ static void insert_code(asm_context& ctx, std::vector<uint32_t>& out, unsigned i
|
|||
for (; branch_it != ctx.branches.end(); ++branch_it)
|
||||
branch_it->first += insert_count;
|
||||
|
||||
/* Find first constant address after the inserted code */
|
||||
auto caddr_it = std::find_if(ctx.constaddrs.begin(), ctx.constaddrs.end(), [insert_before](const int &caddr_pos) -> bool {
|
||||
return (unsigned)caddr_pos >= insert_before;
|
||||
});
|
||||
|
||||
/* Update the locations of constant addresses */
|
||||
for (; caddr_it != ctx.constaddrs.end(); ++caddr_it)
|
||||
(*caddr_it) += insert_count;
|
||||
/* Update the locations of p_constaddr instructions */
|
||||
for (auto& constaddr : ctx.constaddrs) {
|
||||
constaddr_info& info = constaddr.second;
|
||||
if (info.getpc_end >= insert_before)
|
||||
info.getpc_end += insert_count;
|
||||
if (info.add_literal >= insert_before)
|
||||
info.add_literal += insert_count;
|
||||
}
|
||||
}
|
||||
|
||||
static void fix_branches_gfx10(asm_context& ctx, std::vector<uint32_t>& out)
|
||||
|
|
@ -928,8 +912,10 @@ void fix_branches(asm_context& ctx, std::vector<uint32_t>& out)
|
|||
|
||||
void fix_constaddrs(asm_context& ctx, std::vector<uint32_t>& out)
|
||||
{
|
||||
for (unsigned addr : ctx.constaddrs)
|
||||
out[addr] += (out.size() - addr + 1u) * 4u;
|
||||
for (auto& constaddr : ctx.constaddrs) {
|
||||
constaddr_info& info = constaddr.second;
|
||||
out[info.add_literal] += (out.size() - info.getpc_end) * 4u;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned emit_program(Program* program,
|
||||
|
|
|
|||
|
|
@ -5550,7 +5550,7 @@ void visit_load_constant(isel_context *ctx, nir_intrinsic_instr *instr)
|
|||
offset = bld.vadd32(bld.def(v1), Operand(base), offset);
|
||||
|
||||
Temp rsrc = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4),
|
||||
bld.sop1(aco_opcode::p_constaddr, bld.def(s2), bld.def(s1, scc), Operand(ctx->constant_data_offset)),
|
||||
bld.pseudo(aco_opcode::p_constaddr, bld.def(s2), bld.def(s1, scc), Operand(ctx->constant_data_offset)),
|
||||
Operand(MIN2(base + range, ctx->shader->constant_data_size)),
|
||||
Operand(desc_type));
|
||||
unsigned size = instr->dest.ssa.bit_size / 8;
|
||||
|
|
|
|||
|
|
@ -1979,6 +1979,17 @@ void lower_to_hw_instr(Program* program)
|
|||
unreachable("Current hardware supports ds_bpermute, don't emit p_bpermute.");
|
||||
break;
|
||||
}
|
||||
case aco_opcode::p_constaddr:
|
||||
{
|
||||
unsigned id = instr->definitions[0].tempId();
|
||||
PhysReg reg = instr->definitions[0].physReg();
|
||||
bld.sop1(aco_opcode::p_constaddr_getpc, instr->definitions[0], Operand(id));
|
||||
bld.sop2(aco_opcode::p_constaddr_addlo, Definition(reg, s1), bld.def(s1, scc),
|
||||
Operand(reg, s1), Operand(id));
|
||||
bld.sop2(aco_opcode::s_addc_u32, Definition(reg.advance(4), s1), bld.def(s1, scc),
|
||||
Operand(reg.advance(4), s1), Operand(0u), Operand(scc, s1));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -288,6 +288,8 @@ opcode("p_exit_early_if")
|
|||
# simulates proper bpermute behavior when it's unsupported, eg. GFX10 wave64
|
||||
opcode("p_bpermute")
|
||||
|
||||
opcode("p_constaddr")
|
||||
|
||||
# SOP2 instructions: 2 scalar inputs, 1 scalar output (+optional scc)
|
||||
SOP2 = {
|
||||
# GFX6, GFX7, GFX8, GFX9, GFX10, name
|
||||
|
|
@ -344,6 +346,8 @@ SOP2 = {
|
|||
( -1, -1, -1, 0x34, 0x34, "s_pack_hh_b32_b16"),
|
||||
( -1, -1, -1, 0x2c, 0x35, "s_mul_hi_u32"),
|
||||
( -1, -1, -1, 0x2d, 0x36, "s_mul_hi_i32"),
|
||||
# actually a pseudo-instruction. it's lowered to SALU during assembly though, so it's useful to identify it as a SOP2.
|
||||
( -1, -1, -1, -1, -1, "p_constaddr_addlo"),
|
||||
}
|
||||
for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SOP2:
|
||||
opcode(name, gfx7, gfx9, gfx10, Format.SOP2)
|
||||
|
|
@ -457,7 +461,7 @@ SOP1 = {
|
|||
( -1, -1, -1, -1, 0x47, "s_andn2_wrexec_b32"),
|
||||
( -1, -1, -1, -1, 0x49, "s_movrelsd_2_b32"),
|
||||
# actually a pseudo-instruction. it's lowered to SALU during assembly though, so it's useful to identify it as a SOP1.
|
||||
( -1, -1, -1, -1, -1, "p_constaddr"),
|
||||
( -1, -1, -1, -1, -1, "p_constaddr_getpc"),
|
||||
}
|
||||
for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SOP1:
|
||||
opcode(name, gfx7, gfx9, gfx10, Format.SOP1)
|
||||
|
|
|
|||
|
|
@ -220,7 +220,9 @@ BEGIN_TEST(assembler.long_jump.constaddr)
|
|||
|
||||
//>> s_getpc_b64 s[0:1] ; be801f00
|
||||
//! s_add_u32 s0, s0, 0xe0 ; 8000ff00 000000e0
|
||||
bld.sop1(aco_opcode::p_constaddr, Definition(PhysReg(0), s2), Operand(0u));
|
||||
bld.sop1(aco_opcode::p_constaddr_getpc, Definition(PhysReg(0), s2), Operand(0u));
|
||||
bld.sop2(aco_opcode::p_constaddr_addlo, Definition(PhysReg(0), s1), bld.def(s1, scc),
|
||||
Operand(PhysReg(0), s1), Operand(0u));
|
||||
|
||||
program->blocks[2].linear_preds.push_back(0u);
|
||||
program->blocks[2].linear_preds.push_back(1u);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue