mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 15:20:10 +01:00
aco/gfx115: consider point sample acceleration
Like 15428e0d786939a5c7629a9978947c8a9112ce96 in LLVM. fossil-db (gfx1150): Totals from 909 (1.14% of 79653) affected shaders: Instrs: 5840489 -> 5840705 (+0.00%); split: -0.00%, +0.00% CodeSize: 31133460 -> 31134296 (+0.00%); split: -0.00%, +0.00% Latency: 52982280 -> 53438577 (+0.86%); split: -0.00%, +0.86% InvThroughput: 10841454 -> 10942682 (+0.93%); split: -0.00%, +0.93% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Backport-to: 25.0 Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34935>
This commit is contained in:
parent
cbd85acf9a
commit
171920ceed
3 changed files with 94 additions and 8 deletions
|
|
@ -287,8 +287,13 @@ check_instr(wait_ctx& ctx, wait_imm& wait, Instruction* instr)
|
|||
if (vmem_type && ctx.gfx_level < GFX12) {
|
||||
wait_event event = get_vmem_event(ctx, instr, vmem_type);
|
||||
wait_type type = (wait_type)(ffs(ctx.info->get_counters_for_event(event)) - 1);
|
||||
if ((it->second.events & ctx.info->events[type]) == event &&
|
||||
(type != wait_type_vm || it->second.vmem_types == vmem_type))
|
||||
|
||||
bool event_matches = (it->second.events & ctx.info->events[type]) == event;
|
||||
/* wait_type_vm/counter_vm can have several different vmem_types */
|
||||
bool type_matches = type != wait_type_vm || (it->second.vmem_types == vmem_type &&
|
||||
util_bitcount(vmem_type) == 1);
|
||||
|
||||
if (event_matches && type_matches)
|
||||
reg_imm[type] = wait_imm::unset_counter;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -75,6 +75,7 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info,
|
|||
case GFX10: program->family = CHIP_NAVI10; break;
|
||||
case GFX10_3: program->family = CHIP_NAVI21; break;
|
||||
case GFX11: program->family = CHIP_NAVI31; break;
|
||||
case GFX11_5: program->family = CHIP_GFX1150; break;
|
||||
case GFX12: program->family = CHIP_GFX1200; break;
|
||||
default: program->family = CHIP_UNKNOWN; break;
|
||||
}
|
||||
|
|
@ -1460,15 +1461,20 @@ uint8_t
|
|||
get_vmem_type(enum amd_gfx_level gfx_level, Instruction* instr)
|
||||
{
|
||||
if (instr->opcode == aco_opcode::image_bvh64_intersect_ray ||
|
||||
instr->opcode == aco_opcode::image_bvh8_intersect_ray)
|
||||
instr->opcode == aco_opcode::image_bvh8_intersect_ray) {
|
||||
return vmem_bvh;
|
||||
else if (gfx_level >= GFX12 && instr->opcode == aco_opcode::image_msaa_load)
|
||||
} else if (gfx_level >= GFX12 && instr->opcode == aco_opcode::image_msaa_load) {
|
||||
return vmem_sampler;
|
||||
else if (instr->isMIMG() && !instr->operands[1].isUndefined() &&
|
||||
instr->operands[1].regClass() == s4)
|
||||
return vmem_sampler;
|
||||
else if (instr->isVMEM() || instr->isScratch() || instr->isGlobal())
|
||||
} else if (instr->isMIMG() && !instr->operands[1].isUndefined() &&
|
||||
instr->operands[1].regClass() == s4) {
|
||||
bool point_sample_accel =
|
||||
gfx_level == GFX11_5 && (instr->opcode == aco_opcode::image_sample ||
|
||||
instr->opcode == aco_opcode::image_sample_l ||
|
||||
instr->opcode == aco_opcode::image_sample_lz);
|
||||
return vmem_sampler | (point_sample_accel ? vmem_nosampler : 0);
|
||||
} else if (instr->isVMEM() || instr->isScratch() || instr->isGlobal()) {
|
||||
return vmem_nosampler;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -340,6 +340,81 @@ BEGIN_TEST(insert_waitcnt.waw.vmem_types)
|
|||
}
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(insert_waitcnt.waw.point_sample_accel)
|
||||
if (!setup_cs(NULL, GFX11_5))
|
||||
return;
|
||||
|
||||
Definition def_v4(PhysReg(260), v1);
|
||||
Operand op_v0(PhysReg(256), v1);
|
||||
Operand desc_s4(PhysReg(0), s4);
|
||||
Operand desc_s8(PhysReg(8), s8);
|
||||
|
||||
/* image_sample has point sample acceleration, but image_sample_b does not. Both are VMEM sample
|
||||
* instructions. */
|
||||
|
||||
//>> p_unit_test 0
|
||||
//! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d
|
||||
//! s_waitcnt vmcnt(0)
|
||||
//! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0));
|
||||
bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
|
||||
//>> p_unit_test 1
|
||||
//! v1: %0:v[4] = image_sample_b %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d
|
||||
//! s_waitcnt vmcnt(0)
|
||||
//! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d
|
||||
bld.reset(program->create_and_insert_block());
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1));
|
||||
bld.mimg(aco_opcode::image_sample_b, def_v4, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
|
||||
//>> p_unit_test 2
|
||||
//! v1: %0:v[4] = image_load %0:s[8-15], s4: undef, v1: undef, %0:v[0] 1d
|
||||
//! s_waitcnt vmcnt(0)
|
||||
//! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d
|
||||
bld.reset(program->create_and_insert_block());
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2));
|
||||
bld.mimg(aco_opcode::image_load, def_v4, desc_s8, Operand(s4), Operand(v1), op_v0);
|
||||
bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
|
||||
//>> p_unit_test 3
|
||||
//! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d
|
||||
//! s_waitcnt vmcnt(0)
|
||||
//! v1: %0:v[4] = image_sample_b %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d
|
||||
bld.reset(program->create_and_insert_block());
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3));
|
||||
bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
bld.mimg(aco_opcode::image_sample_b, def_v4, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
|
||||
//>> p_unit_test 4
|
||||
//! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d
|
||||
//! s_waitcnt vmcnt(0)
|
||||
//! v1: %0:v[4] = image_load %0:s[8-15], s4: undef, v1: undef, %0:v[0] 1d
|
||||
bld.reset(program->create_and_insert_block());
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4));
|
||||
bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
bld.mimg(aco_opcode::image_load, def_v4, desc_s8, Operand(s4), Operand(v1), op_v0);
|
||||
|
||||
//>> p_unit_test 5
|
||||
//! v1: %0:v[4] = image_sample_b %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d
|
||||
//! v1: %0:v[4] = image_sample_b %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d
|
||||
bld.reset(program->create_and_insert_block());
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5));
|
||||
bld.mimg(aco_opcode::image_sample_b, def_v4, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
bld.mimg(aco_opcode::image_sample_b, def_v4, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
|
||||
//>> p_unit_test 5
|
||||
//! v1: %0:v[4] = image_load %0:s[8-15], s4: undef, v1: undef, %0:v[0] 1d
|
||||
//! v1: %0:v[4] = image_load %0:s[8-15], s4: undef, v1: undef, %0:v[0] 1d
|
||||
bld.reset(program->create_and_insert_block());
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5));
|
||||
bld.mimg(aco_opcode::image_load, def_v4, desc_s8, Operand(s4), Operand(v1), op_v0);
|
||||
bld.mimg(aco_opcode::image_load, def_v4, desc_s8, Operand(s4), Operand(v1), op_v0);
|
||||
|
||||
finish_waitcnt_test();
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(insert_waitcnt.vmem)
|
||||
if (!setup_cs(NULL, GFX12))
|
||||
return;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue