mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 20:28:04 +02:00
aco: don't consider gfx1153 to have point sample acceleration
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34978>
This commit is contained in:
parent
f10b49781d
commit
86ccceb4de
7 changed files with 86 additions and 84 deletions
|
|
@ -51,7 +51,7 @@ get_type(Program* program, aco_ptr<Instruction>& instr)
|
|||
|
||||
if (program->gfx_level >= GFX11) {
|
||||
if (instr->isMIMG()) {
|
||||
uint8_t vmem_type = get_vmem_type(program->gfx_level, instr.get());
|
||||
uint8_t vmem_type = get_vmem_type(program->gfx_level, program->family, instr.get());
|
||||
switch (vmem_type) {
|
||||
case vmem_bvh: return clause_bvh;
|
||||
case vmem_sampler: return clause_mimg_sample;
|
||||
|
|
|
|||
|
|
@ -1628,9 +1628,10 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr<Instruction>&
|
|||
for (Operand& op : instr->operands)
|
||||
fill_vgpr_bitset(ctx.vgpr_used_by_vmem_store, op.physReg(), op.bytes());
|
||||
} else {
|
||||
uint8_t vmem_type = state.program->gfx_level >= GFX12
|
||||
? get_vmem_type(state.program->gfx_level, instr.get())
|
||||
: vmem_nosampler;
|
||||
uint8_t vmem_type =
|
||||
state.program->gfx_level >= GFX12
|
||||
? get_vmem_type(state.program->gfx_level, state.program->family, instr.get())
|
||||
: vmem_nosampler;
|
||||
std::bitset<256>* vgprs = &ctx.vgpr_used_by_vmem_load;
|
||||
if (vmem_type == vmem_sampler)
|
||||
vgprs = &ctx.vgpr_used_by_vmem_sample;
|
||||
|
|
|
|||
|
|
@ -366,7 +366,7 @@ check_instr(wait_ctx& ctx, wait_imm& wait, Instruction* instr)
|
|||
* We can do this for GFX12 and different types for GFX11 if we know that the two
|
||||
* VMEM loads do not write the same register half or the same lanes.
|
||||
*/
|
||||
uint8_t vmem_type = get_vmem_type(ctx.gfx_level, instr);
|
||||
uint8_t vmem_type = get_vmem_type(ctx.gfx_level, ctx.program->family, instr);
|
||||
if (vmem_type) {
|
||||
wait_event event = get_vmem_event(ctx, instr, vmem_type);
|
||||
wait_type type = (wait_type)(ffs(ctx.info->get_counters_for_event(event)) - 1);
|
||||
|
|
@ -740,7 +740,7 @@ gen(Instruction* instr, wait_ctx& ctx)
|
|||
case Format::MIMG:
|
||||
case Format::GLOBAL:
|
||||
case Format::SCRATCH: {
|
||||
uint8_t type = get_vmem_type(ctx.gfx_level, instr);
|
||||
uint8_t type = get_vmem_type(ctx.gfx_level, ctx.program->family, instr);
|
||||
wait_event ev = get_vmem_event(ctx, instr, type);
|
||||
uint32_t mask = ev == event_vmem ? get_vmem_mask(ctx, instr) : 0;
|
||||
|
||||
|
|
|
|||
|
|
@ -1452,7 +1452,7 @@ get_tied_defs(Instruction* instr)
|
|||
}
|
||||
|
||||
uint8_t
|
||||
get_vmem_type(enum amd_gfx_level gfx_level, Instruction* instr)
|
||||
get_vmem_type(amd_gfx_level gfx_level, radeon_family family, Instruction* instr)
|
||||
{
|
||||
if (instr->opcode == aco_opcode::image_bvh_intersect_ray ||
|
||||
instr->opcode == aco_opcode::image_bvh64_intersect_ray ||
|
||||
|
|
@ -1463,10 +1463,10 @@ get_vmem_type(enum amd_gfx_level gfx_level, Instruction* instr)
|
|||
return vmem_sampler;
|
||||
} else if (instr->isMIMG() && !instr->operands[1].isUndefined() &&
|
||||
instr->operands[1].regClass() == s4) {
|
||||
bool point_sample_accel =
|
||||
gfx_level == GFX11_5 && (instr->opcode == aco_opcode::image_sample ||
|
||||
instr->opcode == aco_opcode::image_sample_l ||
|
||||
instr->opcode == aco_opcode::image_sample_lz);
|
||||
bool point_sample_accel = gfx_level == GFX11_5 && family != CHIP_GFX1153 &&
|
||||
(instr->opcode == aco_opcode::image_sample ||
|
||||
instr->opcode == aco_opcode::image_sample_l ||
|
||||
instr->opcode == aco_opcode::image_sample_lz);
|
||||
return vmem_sampler | (point_sample_accel ? vmem_nosampler : 0);
|
||||
} else if (instr->isVMEM() || instr->isScratch() || instr->isGlobal()) {
|
||||
return vmem_nosampler;
|
||||
|
|
|
|||
|
|
@ -1916,7 +1916,7 @@ enum vmem_type : uint8_t {
|
|||
/* VMEM instructions of the same type return in-order. For GFX12+, this determines which counter
|
||||
* is used.
|
||||
*/
|
||||
uint8_t get_vmem_type(enum amd_gfx_level gfx_level, Instruction* instr);
|
||||
uint8_t get_vmem_type(amd_gfx_level gfx_level, radeon_family family, Instruction* instr);
|
||||
|
||||
/* For all of the counters, the maximum value means no wait.
|
||||
* Some of the counters are larger than their bit field,
|
||||
|
|
|
|||
|
|
@ -263,7 +263,7 @@ BlockCycleEstimator::cycles_until_res_available(aco_ptr<Instruction>& instr)
|
|||
}
|
||||
|
||||
static std::array<unsigned, wait_type_num>
|
||||
get_wait_counter_info(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr)
|
||||
get_wait_counter_info(Program* program, aco_ptr<Instruction>& instr)
|
||||
{
|
||||
/* These numbers are all a bit nonsense. LDS/VMEM/SMEM/EXP performance
|
||||
* depends a lot on the situation. */
|
||||
|
|
@ -276,12 +276,12 @@ get_wait_counter_info(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr)
|
|||
info[wait_type_exp] = 13;
|
||||
} else if (instr->isFlatLike()) {
|
||||
info[wait_type_lgkm] = instr->isFlat() ? 20 : 0;
|
||||
if (!instr->definitions.empty() || gfx_level < GFX10)
|
||||
if (!instr->definitions.empty() || program->gfx_level < GFX10)
|
||||
info[wait_type_vm] = 320;
|
||||
else
|
||||
info[wait_type_vs] = 320;
|
||||
} else if (instr->isSMEM()) {
|
||||
wait_type type = gfx_level >= GFX12 ? wait_type_km : wait_type_lgkm;
|
||||
wait_type type = program->gfx_level >= GFX12 ? wait_type_km : wait_type_lgkm;
|
||||
if (instr->definitions.empty()) {
|
||||
info[type] = 200;
|
||||
} else if (instr->operands.empty()) { /* s_memtime and s_memrealtime */
|
||||
|
|
@ -299,14 +299,14 @@ get_wait_counter_info(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr)
|
|||
}
|
||||
} else if (instr->isDS()) {
|
||||
info[wait_type_lgkm] = 20;
|
||||
} else if (instr->isVMEM() && instr->definitions.empty() && gfx_level >= GFX10) {
|
||||
} else if (instr->isVMEM() && instr->definitions.empty() && program->gfx_level >= GFX10) {
|
||||
info[wait_type_vs] = 320;
|
||||
} else if (instr->isVMEM()) {
|
||||
uint8_t vm_type = get_vmem_type(gfx_level, instr.get());
|
||||
uint8_t vm_type = get_vmem_type(program->gfx_level, program->family, instr.get());
|
||||
wait_type type = wait_type_vm;
|
||||
if (gfx_level >= GFX12 && vm_type == vmem_bvh)
|
||||
if (program->gfx_level >= GFX12 && vm_type == vmem_bvh)
|
||||
type = wait_type_bvh;
|
||||
else if (gfx_level >= GFX12 && vm_type == vmem_sampler)
|
||||
else if (program->gfx_level >= GFX12 && vm_type == vmem_sampler)
|
||||
type = wait_type_sample;
|
||||
info[type] = 320;
|
||||
}
|
||||
|
|
@ -328,8 +328,7 @@ get_wait_imm(Program* program, aco_ptr<Instruction>& instr)
|
|||
imm.exp = wait_imm::unset_counter;
|
||||
} else {
|
||||
/* If an instruction increases a counter, it waits for it to be below maximum first. */
|
||||
std::array<unsigned, wait_type_num> wait_info =
|
||||
get_wait_counter_info(program->gfx_level, instr);
|
||||
std::array<unsigned, wait_type_num> wait_info = get_wait_counter_info(program, instr);
|
||||
wait_imm max = wait_imm::max(program->gfx_level);
|
||||
for (unsigned i = 0; i < wait_type_num; i++) {
|
||||
if (wait_info[i])
|
||||
|
|
@ -418,7 +417,7 @@ BlockCycleEstimator::add(aco_ptr<Instruction>& instr)
|
|||
mem_ops[i].pop_front();
|
||||
}
|
||||
|
||||
std::array<unsigned, wait_type_num> wait_info = get_wait_counter_info(program->gfx_level, instr);
|
||||
std::array<unsigned, wait_type_num> wait_info = get_wait_counter_info(program, instr);
|
||||
for (unsigned i = 0; i < wait_type_num; i++) {
|
||||
if (wait_info[i])
|
||||
mem_ops[i].push_back(cur_cycle + wait_info[i]);
|
||||
|
|
|
|||
|
|
@ -341,78 +341,80 @@ BEGIN_TEST(insert_waitcnt.waw.vmem_types)
|
|||
END_TEST
|
||||
|
||||
BEGIN_TEST(insert_waitcnt.waw.point_sample_accel)
|
||||
if (!setup_cs(NULL, GFX11_5))
|
||||
return;
|
||||
for (radeon_family family : {CHIP_GFX1150, CHIP_GFX1153}) {
|
||||
if (!setup_cs(NULL, GFX11_5, family, family == CHIP_GFX1153 ? "_3" : "_0"))
|
||||
continue;
|
||||
|
||||
Definition def_v4(PhysReg(260), v1);
|
||||
Operand op_v0(PhysReg(256), v1);
|
||||
Operand desc_s4(PhysReg(0), s4);
|
||||
Operand desc_s8(PhysReg(8), s8);
|
||||
Definition def_v4(PhysReg(260), v1);
|
||||
Operand op_v0(PhysReg(256), v1);
|
||||
Operand desc_s4(PhysReg(0), s4);
|
||||
Operand desc_s8(PhysReg(8), s8);
|
||||
|
||||
/* image_sample has point sample acceleration, but image_sample_b does not. Both are VMEM sample
|
||||
* instructions. */
|
||||
/* image_sample has point sample acceleration, but image_sample_b does not. Both are VMEM
|
||||
* sample instructions. */
|
||||
|
||||
//>> p_unit_test 0
|
||||
//! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d
|
||||
//! s_waitcnt vmcnt(0)
|
||||
//! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0));
|
||||
bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
//>> p_unit_test 0
|
||||
//! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d
|
||||
//~gfx11_5_0! s_waitcnt vmcnt(0)
|
||||
//! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0));
|
||||
bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
|
||||
//>> p_unit_test 1
|
||||
//! v1: %0:v[4] = image_sample_b %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d
|
||||
//! s_waitcnt vmcnt(0)
|
||||
//! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d
|
||||
bld.reset(program->create_and_insert_block());
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1));
|
||||
bld.mimg(aco_opcode::image_sample_b, def_v4, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
//>> p_unit_test 1
|
||||
//! v1: %0:v[4] = image_sample_b %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d
|
||||
//~gfx11_5_0! s_waitcnt vmcnt(0)
|
||||
//! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d
|
||||
bld.reset(program->create_and_insert_block());
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1));
|
||||
bld.mimg(aco_opcode::image_sample_b, def_v4, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
|
||||
//>> p_unit_test 2
|
||||
//! v1: %0:v[4] = image_load %0:s[8-15], s4: undef, v1: undef, %0:v[0] 1d
|
||||
//! s_waitcnt vmcnt(0)
|
||||
//! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d
|
||||
bld.reset(program->create_and_insert_block());
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2));
|
||||
bld.mimg(aco_opcode::image_load, def_v4, desc_s8, Operand(s4), Operand(v1), op_v0);
|
||||
bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
//>> p_unit_test 2
|
||||
//! v1: %0:v[4] = image_load %0:s[8-15], s4: undef, v1: undef, %0:v[0] 1d
|
||||
//! s_waitcnt vmcnt(0)
|
||||
//! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d
|
||||
bld.reset(program->create_and_insert_block());
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2));
|
||||
bld.mimg(aco_opcode::image_load, def_v4, desc_s8, Operand(s4), Operand(v1), op_v0);
|
||||
bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
|
||||
//>> p_unit_test 3
|
||||
//! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d
|
||||
//! s_waitcnt vmcnt(0)
|
||||
//! v1: %0:v[4] = image_sample_b %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d
|
||||
bld.reset(program->create_and_insert_block());
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3));
|
||||
bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
bld.mimg(aco_opcode::image_sample_b, def_v4, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
//>> p_unit_test 3
|
||||
//! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d
|
||||
//~gfx11_5_0! s_waitcnt vmcnt(0)
|
||||
//! v1: %0:v[4] = image_sample_b %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d
|
||||
bld.reset(program->create_and_insert_block());
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3));
|
||||
bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
bld.mimg(aco_opcode::image_sample_b, def_v4, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
|
||||
//>> p_unit_test 4
|
||||
//! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d
|
||||
//! s_waitcnt vmcnt(0)
|
||||
//! v1: %0:v[4] = image_load %0:s[8-15], s4: undef, v1: undef, %0:v[0] 1d
|
||||
bld.reset(program->create_and_insert_block());
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4));
|
||||
bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
bld.mimg(aco_opcode::image_load, def_v4, desc_s8, Operand(s4), Operand(v1), op_v0);
|
||||
//>> p_unit_test 4
|
||||
//! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d
|
||||
//! s_waitcnt vmcnt(0)
|
||||
//! v1: %0:v[4] = image_load %0:s[8-15], s4: undef, v1: undef, %0:v[0] 1d
|
||||
bld.reset(program->create_and_insert_block());
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4));
|
||||
bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
bld.mimg(aco_opcode::image_load, def_v4, desc_s8, Operand(s4), Operand(v1), op_v0);
|
||||
|
||||
//>> p_unit_test 5
|
||||
//! v1: %0:v[4] = image_sample_b %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d
|
||||
//! v1: %0:v[4] = image_sample_b %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d
|
||||
bld.reset(program->create_and_insert_block());
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5));
|
||||
bld.mimg(aco_opcode::image_sample_b, def_v4, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
bld.mimg(aco_opcode::image_sample_b, def_v4, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
//>> p_unit_test 5
|
||||
//! v1: %0:v[4] = image_sample_b %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d
|
||||
//! v1: %0:v[4] = image_sample_b %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d
|
||||
bld.reset(program->create_and_insert_block());
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5));
|
||||
bld.mimg(aco_opcode::image_sample_b, def_v4, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
bld.mimg(aco_opcode::image_sample_b, def_v4, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
|
||||
//>> p_unit_test 5
|
||||
//! v1: %0:v[4] = image_load %0:s[8-15], s4: undef, v1: undef, %0:v[0] 1d
|
||||
//! v1: %0:v[4] = image_load %0:s[8-15], s4: undef, v1: undef, %0:v[0] 1d
|
||||
bld.reset(program->create_and_insert_block());
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5));
|
||||
bld.mimg(aco_opcode::image_load, def_v4, desc_s8, Operand(s4), Operand(v1), op_v0);
|
||||
bld.mimg(aco_opcode::image_load, def_v4, desc_s8, Operand(s4), Operand(v1), op_v0);
|
||||
//>> p_unit_test 5
|
||||
//! v1: %0:v[4] = image_load %0:s[8-15], s4: undef, v1: undef, %0:v[0] 1d
|
||||
//! v1: %0:v[4] = image_load %0:s[8-15], s4: undef, v1: undef, %0:v[0] 1d
|
||||
bld.reset(program->create_and_insert_block());
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5));
|
||||
bld.mimg(aco_opcode::image_load, def_v4, desc_s8, Operand(s4), Operand(v1), op_v0);
|
||||
bld.mimg(aco_opcode::image_load, def_v4, desc_s8, Operand(s4), Operand(v1), op_v0);
|
||||
|
||||
finish_waitcnt_test();
|
||||
finish_waitcnt_test();
|
||||
}
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(insert_waitcnt.vmem)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue