mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-29 08:00:12 +01:00
aco: add MIMG_instruction::strict_wqm
This lets us use linear VGPRs for part of the texture sample's address. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22636>
This commit is contained in:
parent
1a6a57ac96
commit
35c133a77b
6 changed files with 113 additions and 24 deletions
|
|
@ -5930,14 +5930,7 @@ static MIMG_instruction*
|
|||
emit_mimg(Builder& bld, aco_opcode op, Temp dst, Temp rsrc, Operand samp, std::vector<Temp> coords,
|
||||
bool needs_wqm = false, Operand vdata = Operand(v1))
|
||||
{
|
||||
/* Limit NSA instructions to 3 dwords on GFX10 to avoid stability issues.
|
||||
* On GFX11 the first 4 vaddr are single registers and the last contains the remaining
|
||||
* vector.
|
||||
*/
|
||||
size_t nsa_size = bld.program->gfx_level == GFX10 ? 5
|
||||
: bld.program->gfx_level == GFX10_3 ? 13
|
||||
: bld.program->gfx_level >= GFX11 ? 4
|
||||
: 0;
|
||||
size_t nsa_size = bld.program->dev.max_nsa_vgprs;
|
||||
nsa_size = bld.program->gfx_level >= GFX11 || coords.size() <= nsa_size ? nsa_size : 0;
|
||||
|
||||
for (unsigned i = 0; i < std::min(coords.size(), nsa_size); i++) {
|
||||
|
|
|
|||
|
|
@ -188,6 +188,21 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info,
|
|||
program->dev.scratch_global_offset_max = 4095;
|
||||
}
|
||||
|
||||
if (program->gfx_level >= GFX11) {
|
||||
/* GFX11 can have only 1 NSA dword. The last VGPR isn't included here because it contains the
|
||||
* rest of the address.
|
||||
*/
|
||||
program->dev.max_nsa_vgprs = 4;
|
||||
} else if (program->gfx_level >= GFX10_3) {
|
||||
/* GFX10.3 can have up to 3 NSA dwords. */
|
||||
program->dev.max_nsa_vgprs = 13;
|
||||
} else if (program->gfx_level >= GFX10) {
|
||||
/* Limit NSA instructions to 1 NSA dword on GFX10 to avoid stability issues. */
|
||||
program->dev.max_nsa_vgprs = 5;
|
||||
} else {
|
||||
program->dev.max_nsa_vgprs = 0;
|
||||
}
|
||||
|
||||
program->wgp_mode = wgp_mode;
|
||||
|
||||
program->progress = CompilationProgress::after_isel;
|
||||
|
|
|
|||
|
|
@ -1619,7 +1619,8 @@ struct MIMG_instruction : public Instruction {
|
|||
bool a16 : 1; /* VEGA, NAVI: Address components are 16-bits */
|
||||
bool d16 : 1; /* Convert 32-bit data to 16-bit data */
|
||||
bool disable_wqm : 1; /* Require an exec mask without helper invocations */
|
||||
uint8_t padding0 : 2;
|
||||
bool strict_wqm : 1; /* VADDR is a linear VGPR and additional VGPRs may be copied into it */
|
||||
uint8_t padding0 : 1;
|
||||
uint8_t padding1;
|
||||
uint8_t padding2;
|
||||
};
|
||||
|
|
@ -2095,6 +2096,7 @@ struct DeviceInfo {
|
|||
|
||||
int16_t scratch_global_offset_min;
|
||||
int16_t scratch_global_offset_max;
|
||||
unsigned max_nsa_vgprs;
|
||||
};
|
||||
|
||||
enum class CompilationProgress {
|
||||
|
|
|
|||
|
|
@ -2138,6 +2138,66 @@ hw_init_scratch(Builder& bld, Definition def, Operand scratch_addr, Operand scra
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
lower_image_sample(lower_context* ctx, aco_ptr<Instruction>& instr)
|
||||
{
|
||||
Operand linear_vgpr = instr->operands[3];
|
||||
|
||||
unsigned nsa_size = ctx->program->dev.max_nsa_vgprs;
|
||||
unsigned vaddr_size = linear_vgpr.size();
|
||||
unsigned num_copied_vgprs = instr->operands.size() - 4;
|
||||
nsa_size = num_copied_vgprs > 0 && (ctx->program->gfx_level >= GFX11 || vaddr_size <= nsa_size)
|
||||
? nsa_size
|
||||
: 0;
|
||||
|
||||
Operand vaddr[16];
|
||||
unsigned num_vaddr = 0;
|
||||
|
||||
if (nsa_size) {
|
||||
assert(num_copied_vgprs <= nsa_size);
|
||||
for (unsigned i = 0; i < num_copied_vgprs; i++)
|
||||
vaddr[num_vaddr++] = instr->operands[4 + i];
|
||||
for (unsigned i = num_copied_vgprs; i < std::min(vaddr_size, nsa_size); i++)
|
||||
vaddr[num_vaddr++] = Operand(linear_vgpr.physReg().advance(i * 4), v1);
|
||||
if (vaddr_size > nsa_size) {
|
||||
RegClass rc = RegClass::get(RegType::vgpr, (vaddr_size - nsa_size) * 4);
|
||||
vaddr[num_vaddr++] = Operand(PhysReg(linear_vgpr.physReg().advance(nsa_size * 4)), rc);
|
||||
}
|
||||
} else {
|
||||
PhysReg reg = linear_vgpr.physReg();
|
||||
std::map<PhysReg, copy_operation> copy_operations;
|
||||
for (unsigned i = 4; i < instr->operands.size(); i++) {
|
||||
Operand arg = instr->operands[i];
|
||||
Definition def(reg, RegClass::get(RegType::vgpr, arg.bytes()));
|
||||
copy_operations[def.physReg()] = {arg, def, def.bytes()};
|
||||
reg = reg.advance(arg.bytes());
|
||||
}
|
||||
vaddr[num_vaddr++] = linear_vgpr;
|
||||
|
||||
Pseudo_instruction pi = {};
|
||||
handle_operands(copy_operations, ctx, ctx->program->gfx_level, &pi);
|
||||
}
|
||||
|
||||
instr->mimg().strict_wqm = false;
|
||||
|
||||
if ((3 + num_vaddr) > instr->operands.size()) {
|
||||
MIMG_instruction *new_instr = create_instruction<MIMG_instruction>(
|
||||
instr->opcode, Format::MIMG, 3 + num_vaddr, instr->definitions.size());
|
||||
std::copy(instr->definitions.cbegin(), instr->definitions.cend(),
|
||||
new_instr->definitions.begin());
|
||||
new_instr->operands[0] = instr->operands[0];
|
||||
new_instr->operands[1] = instr->operands[1];
|
||||
new_instr->operands[2] = instr->operands[2];
|
||||
memcpy((uint8_t*)new_instr + sizeof(Instruction), (uint8_t*)instr.get() + sizeof(Instruction),
|
||||
sizeof(MIMG_instruction) - sizeof(Instruction));
|
||||
instr.reset(new_instr);
|
||||
} else {
|
||||
while (instr->operands.size() > (3 + num_vaddr))
|
||||
instr->operands.pop_back();
|
||||
}
|
||||
std::copy(vaddr, vaddr + num_vaddr, std::next(instr->operands.begin(), 3));
|
||||
}
|
||||
|
||||
void
|
||||
lower_to_hw_instr(Program* program)
|
||||
{
|
||||
|
|
@ -2802,6 +2862,9 @@ lower_to_hw_instr(Program* program)
|
|||
ctx.instructions.emplace_back(std::move(instr));
|
||||
|
||||
emit_set_mode(bld, block->fp_mode, set_round, false);
|
||||
} else if (instr->isMIMG() && instr->mimg().strict_wqm) {
|
||||
lower_image_sample(&ctx, instr);
|
||||
ctx.instructions.emplace_back(std::move(instr));
|
||||
} else {
|
||||
ctx.instructions.emplace_back(std::move(instr));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2445,7 +2445,8 @@ get_affinities(ra_ctx& ctx, std::vector<IDSet>& live_out_per_block)
|
|||
op.getTemp().type() == instr->definitions[0].getTemp().type())
|
||||
ctx.vectors[op.tempId()] = instr.get();
|
||||
}
|
||||
} else if (instr->format == Format::MIMG && instr->operands.size() > 4) {
|
||||
} else if (instr->format == Format::MIMG && instr->operands.size() > 4 &&
|
||||
!instr->mimg().strict_wqm) {
|
||||
for (unsigned i = 3; i < instr->operands.size(); i++)
|
||||
ctx.vectors[instr->operands[i].tempId()] = instr.get();
|
||||
} else if (instr->opcode == aco_opcode::p_split_vector &&
|
||||
|
|
|
|||
|
|
@ -696,21 +696,36 @@ validate_ir(Program* program)
|
|||
"TFE/LWE loads",
|
||||
instr.get());
|
||||
}
|
||||
check(instr->operands.size() == 4 || program->gfx_level >= GFX10,
|
||||
"NSA is only supported on GFX10+", instr.get());
|
||||
for (unsigned i = 3; i < instr->operands.size(); i++) {
|
||||
check(instr->operands[i].hasRegClass() &&
|
||||
instr->operands[i].regClass().type() == RegType::vgpr,
|
||||
"MIMG operands[3+] (VADDR) must be VGPR", instr.get());
|
||||
if (instr->operands.size() > 4) {
|
||||
if (program->gfx_level < GFX11) {
|
||||
check(instr->operands[i].regClass() == v1,
|
||||
"GFX10 MIMG VADDR must be v1 if NSA is used", instr.get());
|
||||
} else {
|
||||
if (instr->opcode != aco_opcode::image_bvh_intersect_ray &&
|
||||
instr->opcode != aco_opcode::image_bvh64_intersect_ray && i < 7) {
|
||||
|
||||
if (instr->mimg().strict_wqm) {
|
||||
check(instr->operands[3].isTemp() && instr->operands[3].regClass().is_linear_vgpr(),
|
||||
"MIMG operands[3] must be temp linear VGPR.", instr.get());
|
||||
|
||||
unsigned total_size = 0;
|
||||
for (unsigned i = 4; i < instr->operands.size(); i++) {
|
||||
check(instr->operands[i].isTemp() && instr->operands[i].regClass() == v1,
|
||||
"MIMG operands[4+] (VADDR) must be v1", instr.get());
|
||||
total_size += instr->operands[i].bytes();
|
||||
}
|
||||
check(total_size <= instr->operands[3].bytes(),
|
||||
"MIMG operands[4+] must fit within operands[3].", instr.get());
|
||||
} else {
|
||||
check(instr->operands.size() == 4 || program->gfx_level >= GFX10,
|
||||
"NSA is only supported on GFX10+", instr.get());
|
||||
for (unsigned i = 3; i < instr->operands.size(); i++) {
|
||||
check(instr->operands[i].hasRegClass() &&
|
||||
instr->operands[i].regClass().type() == RegType::vgpr,
|
||||
"MIMG operands[3+] (VADDR) must be VGPR", instr.get());
|
||||
if (instr->operands.size() > 4) {
|
||||
if (program->gfx_level < GFX11) {
|
||||
check(instr->operands[i].regClass() == v1,
|
||||
"first 4 GFX11 MIMG VADDR must be v1 if NSA is used", instr.get());
|
||||
"GFX10 MIMG VADDR must be v1 if NSA is used", instr.get());
|
||||
} else {
|
||||
if (instr->opcode != aco_opcode::image_bvh_intersect_ray &&
|
||||
instr->opcode != aco_opcode::image_bvh64_intersect_ray && i < 7) {
|
||||
check(instr->operands[i].regClass() == v1,
|
||||
"first 4 GFX11 MIMG VADDR must be v1 if NSA is used", instr.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue