aco/tests: add barrier-to-waitcnt tests
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36491>
This commit is contained in:
Rhys Perry 2025-07-24 14:44:29 +01:00 committed by Marge Bot
parent 0f32b573a4
commit e2181744c2
2 changed files with 406 additions and 1 deletions

View file

@ -569,7 +569,7 @@ formats = [("pseudo", [Format.PSEUDO], list(itertools.product(range(5), range(7)
("sopp", [Format.SOPP], [(0, 0), (0, 1)]),
("sopc", [Format.SOPC], [(1, 2)]),
("smem", [Format.SMEM], [(0, 4), (0, 3), (1, 0), (1, 3), (1, 2), (1, 1), (0, 0)]),
("ds", [Format.DS], [(1, 0), (1, 1), (1, 2), (1, 3), (0, 3), (0, 4), (2, 3)]),
("ds", [Format.DS], [(1, 0), (1, 1), (1, 2), (1, 3), (0, 2), (0, 3), (0, 4), (2, 3)]),
("ldsdir", [Format.LDSDIR], [(1, 1)]),
("mubuf", [Format.MUBUF], [(0, 4), (1, 3), (1, 4)]),
("mtbuf", [Format.MTBUF], [(0, 4), (1, 3)]),

View file

@ -1150,3 +1150,408 @@ BEGIN_TEST(insert_waitcnt.flat.barrier)
finish_waitcnt_test();
}
END_TEST
static void
barrier(unsigned storage, unsigned semantics, sync_scope scope,
sync_scope exec_scope = scope_invocation)
{
bld.barrier(aco_opcode::p_barrier, memory_sync_info(storage, semantics, scope), exec_scope);
}
static Definition&
load_global(unsigned semantics = 0, sync_scope scope = scope_invocation)
{
Definition dest0(PhysReg(260), v1);
Operand addr(PhysReg(256), v2);
return bld
.global(aco_opcode::global_load_dword, dest0, addr, Operand(s1), 0,
memory_sync_info(storage_buffer, semantics, scope))
.def(0);
}
static void
store_global(unsigned semantics = 0, sync_scope scope = scope_invocation)
{
Operand addr(PhysReg(256), v2);
Operand data(PhysReg(256), v1);
bld.global(aco_opcode::global_store_dword, addr, Operand(s1), data, 0,
memory_sync_info(storage_buffer, semantics, scope));
}
static Definition&
load_shared(unsigned semantics = 0, sync_scope scope = scope_invocation)
{
Definition dest0(PhysReg(260), v1);
Operand offset(PhysReg(256), v1);
Builder::Result res = bld.ds(aco_opcode::ds_read_b32, dest0, offset);
res.instr->ds().sync = memory_sync_info(storage_shared, semantics, scope);
return res.def(0);
}
static void
store_shared(unsigned semantics = 0, sync_scope scope = scope_invocation)
{
Operand offset(PhysReg(256), v1);
Operand data(PhysReg(256), v1);
bld.ds(aco_opcode::ds_write_b32, offset, data).instr->ds().sync =
memory_sync_info(storage_shared, semantics, scope);
}
struct barrier_test_variant {
unsigned workgroup_size;
bool wgp;
const char* name;
};
static const barrier_test_variant barrier_test_variants[] = {
{64, false, "_wg64cu"},
{128, false, "_wg128cu"},
{64, true, "_wg64wgp"},
{128, true, "_wg128wgp"},
};
BEGIN_TEST(insert_waitcnt.barrier.release)
for (barrier_test_variant var : barrier_test_variants) {
if (!setup_cs(NULL, GFX10, CHIP_UNKNOWN, var.name))
continue;
program->workgroup_size = var.workgroup_size;
program->wgp_mode = var.wgp;
Definition dest0(PhysReg(260), v1);
Definition dest1(PhysReg(261), v1);
Operand addr(PhysReg(256), v2);
Operand offset(PhysReg(256), v1);
Operand data(PhysReg(256), v1);
/* global->global, device scope */
//>> p_unit_test 0
//! global_store_dword %0:v[0-1], s1: undef, %0:v[0] storage:buffer
//! s_waitcnt_vscnt %0:null imm:0
//! global_store_dword %0:v[0-1], s1: undef, %0:v[0] storage:buffer semantics:atomic
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0));
store_global();
barrier(storage_buffer, semantic_release, scope_device);
store_global(semantic_atomic);
//>> p_unit_test 1
//! v1: %0:v[4] = global_load_dword %0:v[0-1], s1: undef storage:buffer
//! s_waitcnt vmcnt(0)
//! global_store_dword %0:v[0-1], s1: undef, %0:v[0] storage:buffer semantics:atomic
bld.reset(program->create_and_insert_block());
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1));
load_global();
barrier(storage_buffer, semantic_release, scope_device);
store_global(semantic_atomic);
/* global->global, workgroup scope */
//>> p_unit_test 2
//! global_store_dword %0:v[0-1], s1: undef, %0:v[0] storage:buffer
//~gfx10_wg128wgp! s_waitcnt_vscnt %0:null imm:0
//! global_store_dword %0:v[0-1], s1: undef, %0:v[0] storage:buffer semantics:atomic
bld.reset(program->create_and_insert_block());
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2));
store_global();
barrier(storage_buffer, semantic_release, scope_workgroup);
store_global(semantic_atomic);
//>> p_unit_test 3
//! v1: %0:v[4] = global_load_dword %0:v[0-1], s1: undef storage:buffer
//~gfx10_wg128wgp! s_waitcnt vmcnt(0)
//! global_store_dword %0:v[0-1], s1: undef, %0:v[0] storage:buffer semantics:atomic
bld.reset(program->create_and_insert_block());
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3));
load_global();
barrier(storage_buffer, semantic_release, scope_workgroup);
store_global(semantic_atomic);
/* shared->shared */
//>> p_unit_test 4
//! ds_write_b32 %0:v[0], %0:v[0] storage:shared
//! ds_write_b32 %0:v[0], %0:v[0] storage:shared semantics:atomic
bld.reset(program->create_and_insert_block());
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4));
store_shared();
barrier(storage_shared, semantic_release, scope_workgroup);
store_shared(semantic_atomic);
//>> p_unit_test 5
//! v1: %0:v[4] = ds_read_b32 %0:v[0] storage:shared
//! ds_write_b32 %0:v[0], %0:v[0] storage:shared semantics:atomic
bld.reset(program->create_and_insert_block());
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5));
load_shared();
barrier(storage_shared, semantic_release, scope_workgroup);
store_shared(semantic_atomic);
/* shared->global */
//>> p_unit_test 6
//! ds_write_b32 %0:v[0], %0:v[0] storage:shared
//~gfx10_wg128(cu|wgp)! s_waitcnt lgkmcnt(0)
//! global_store_dword %0:v[0-1], s1: undef, %0:v[0] storage:buffer semantics:atomic
bld.reset(program->create_and_insert_block());
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6));
store_shared();
barrier(storage_buffer | storage_shared, semantic_release, scope_workgroup);
store_global(semantic_atomic);
/* global->shared */
//>> p_unit_test 7
//! global_store_dword %0:v[0-1], s1: undef, %0:v[0] storage:buffer
//~gfx10_wg128(cu|wgp)! s_waitcnt_vscnt %0:null imm:0
//! ds_write_b32 %0:v[0], %0:v[0] storage:shared semantics:atomic
bld.reset(program->create_and_insert_block());
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7));
store_global();
barrier(storage_buffer | storage_shared, semantic_release, scope_workgroup);
store_shared(semantic_atomic);
/* global->global, device scope, release in the atomic */
//>> p_unit_test 8
//! global_store_dword %0:v[0-1], s1: undef, %0:v[0] storage:buffer
//! s_waitcnt_vscnt %0:null imm:0
//! global_store_dword %0:v[0-1], s1: undef, %0:v[0] storage:buffer semantics:release,atomic scope:device
bld.reset(program->create_and_insert_block());
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8));
store_global();
store_global(semantic_atomic | semantic_release, scope_device);
/* global->global, workgroup scope, control barrier */
//>> p_unit_test 9
//! global_store_dword %0:v[0-1], s1: undef, %0:v[0] storage:buffer
//~gfx10_wg128wgp! s_waitcnt_vscnt %0:null imm:0
//~gfx10_wg128cu! s_waitcnt_depctr vm_vsrc(0)
//! s_barrier
bld.reset(program->create_and_insert_block());
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9));
store_global();
barrier(storage_buffer, semantic_release, scope_workgroup);
barrier(0, 0, scope_invocation, scope_workgroup);
bld.sopp(aco_opcode::s_barrier);
/* shared->shared, workgroup scope, control barrier */
//>> p_unit_test 10
//! ds_write_b32 %0:v[0], %0:v[0] storage:shared
//~gfx10_wg128cu! s_waitcnt_depctr vm_vsrc(0)
//~gfx10_wg128wgp! s_waitcnt lgkmcnt(0)
//! s_barrier
bld.reset(program->create_and_insert_block());
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10));
store_shared();
barrier(storage_shared, semantic_release, scope_workgroup);
barrier(0, 0, scope_invocation, scope_workgroup);
bld.sopp(aco_opcode::s_barrier);
/* global->global, device scope, delayed waitcnt */
//>> p_unit_test 11
//! global_store_dword %0:v[0-1], s1: undef, %0:v[0] storage:buffer
//! v_nop
//! s_waitcnt_vscnt %0:null imm:0
//! global_store_dword %0:v[0-1], s1: undef, %0:v[0] storage:buffer semantics:atomic
bld.reset(program->create_and_insert_block());
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11));
store_global();
barrier(storage_buffer, semantic_release, scope_device);
bld.vop1(aco_opcode::v_nop);
store_global(semantic_atomic);
//>> p_unit_test 12
//! global_store_dword %0:v[0-1], s1: undef, %0:v[0] storage:buffer
//! v_nop
//! s_waitcnt_vscnt %0:null imm:0
//! v_nop
//! global_store_dword %0:v[0-1], s1: undef, %0:v[0] storage:buffer semantics:atomic
bld.reset(program->create_and_insert_block());
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12));
store_global();
barrier(storage_buffer, semantic_release, scope_device);
bld.vop1(aco_opcode::v_nop);
wait_imm vs_imm;
vs_imm.vs = 0;
vs_imm.build_waitcnt(bld);
bld.vop1(aco_opcode::v_nop);
store_global(semantic_atomic);
//>> p_unit_test 13
//! global_store_dword %0:v[0-1], s1: undef, %0:v[0] storage:buffer
//! global_store_dword %0:v[0-1], s1: undef, %0:v[0] storage:buffer semantics:private
//! v_nop
//! s_waitcnt_vscnt %0:null imm:1
//! global_store_dword %0:v[0-1], s1: undef, %0:v[0] storage:buffer semantics:atomic
bld.reset(program->create_and_insert_block());
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13));
store_global();
barrier(storage_buffer, semantic_release, scope_device);
store_global(semantic_private);
bld.vop1(aco_opcode::v_nop); /* break up clause */
store_global(semantic_atomic);
finish_waitcnt_test();
}
END_TEST
BEGIN_TEST(insert_waitcnt.barrier.acquire)
for (barrier_test_variant var : barrier_test_variants) {
if (!setup_cs(NULL, GFX10, CHIP_UNKNOWN, var.name))
continue;
program->workgroup_size = var.workgroup_size;
program->wgp_mode = var.wgp;
Definition dest0(PhysReg(260), v1);
Definition dest1(PhysReg(261), v1);
Definition dest2(PhysReg(262), v1);
Operand addr(PhysReg(256), v2);
Operand offset(PhysReg(256), v1);
Operand data(PhysReg(256), v1);
/* global->global, device scope */
//>> p_unit_test 0
//! v1: %0:v[4] = global_load_dword %0:v[0-1], s1: undef storage:buffer semantics:atomic
//! s_waitcnt vmcnt(0)
//! v1: %0:v[5] = global_load_dword %0:v[0-1], s1: undef storage:buffer
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0));
load_global(semantic_atomic);
barrier(storage_buffer, semantic_acquire, scope_device);
load_global() = dest1;
//>> p_unit_test 1
//! v1: %0:v[4] = global_load_dword %0:v[0-1], s1: undef storage:buffer semantics:atomic
//! s_waitcnt vmcnt(0)
//! global_store_dword %0:v[0-1], s1: undef, %0:v[0] storage:buffer
bld.reset(program->create_and_insert_block());
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1));
load_global(semantic_atomic);
barrier(storage_buffer, semantic_acquire, scope_device);
store_global();
/* global->global, workgroup scope */
//>> p_unit_test 2
//! v1: %0:v[4] = global_load_dword %0:v[0-1], s1: undef storage:buffer semantics:atomic
//~gfx10_wg128wgp! s_waitcnt vmcnt(0)
//! v1: %0:v[5] = global_load_dword %0:v[0-1], s1: undef storage:buffer
bld.reset(program->create_and_insert_block());
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2));
load_global(semantic_atomic);
barrier(storage_buffer, semantic_acquire, scope_workgroup);
load_global() = dest1;
//>> p_unit_test 3
//! v1: %0:v[4] = global_load_dword %0:v[0-1], s1: undef storage:buffer semantics:atomic
//~gfx10_wg128wgp! s_waitcnt vmcnt(0)
//! global_store_dword %0:v[0-1], s1: undef, %0:v[0] storage:buffer
bld.reset(program->create_and_insert_block());
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3));
load_global(semantic_atomic);
barrier(storage_buffer, semantic_acquire, scope_workgroup);
store_global();
/* shared->shared */
//>> p_unit_test 4
//! v1: %0:v[4] = ds_read_b32 %0:v[0] storage:shared semantics:atomic
//! v1: %0:v[5] = ds_read_b32 %0:v[0] storage:shared
bld.reset(program->create_and_insert_block());
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4));
load_shared(semantic_atomic);
barrier(storage_shared, semantic_acquire, scope_workgroup);
load_shared() = dest1;
//>> p_unit_test 5
//! v1: %0:v[4] = ds_read_b32 %0:v[0] storage:shared semantics:atomic
//! ds_write_b32 %0:v[0], %0:v[0] storage:shared
bld.reset(program->create_and_insert_block());
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5));
load_shared(semantic_atomic);
barrier(storage_shared, semantic_acquire, scope_workgroup);
store_shared();
/* shared->global */
//>> p_unit_test 6
//! v1: %0:v[4] = ds_read_b32 %0:v[0] storage:shared semantics:atomic
//~gfx10_wg128(cu|wgp)! s_waitcnt lgkmcnt(0)
//! v1: %0:v[5] = global_load_dword %0:v[0-1], s1: undef storage:buffer
bld.reset(program->create_and_insert_block());
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6));
load_shared(semantic_atomic);
barrier(storage_buffer | storage_shared, semantic_acquire, scope_workgroup);
load_global() = dest1;
/* global->shared */
//>> p_unit_test 7
//! v1: %0:v[4] = global_load_dword %0:v[0-1], s1: undef storage:buffer semantics:atomic
//~gfx10_wg128(cu|wgp)! s_waitcnt vmcnt(0)
//! v1: %0:v[5] = ds_read_b32 %0:v[0] storage:shared
bld.reset(program->create_and_insert_block());
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7));
load_global(semantic_atomic);
barrier(storage_buffer | storage_shared, semantic_acquire, scope_workgroup);
load_shared() = dest1;
/* global->global, device scope, acquire in the atomic */
//>> p_unit_test 8
//! v1: %0:v[4] = global_load_dword %0:v[0-1], s1: undef storage:buffer semantics:acquire,atomic scope:device
//! s_waitcnt vmcnt(0)
//! v1: %0:v[5] = global_load_dword %0:v[0-1], s1: undef storage:buffer
bld.reset(program->create_and_insert_block());
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8));
load_global(semantic_atomic | semantic_acquire, scope_device);
load_global() = dest1;
/* global->global, workgroup scope, control barrier */
//>> p_unit_test 9
//! s_barrier
//! v1: %0:v[4] = global_load_dword %0:v[0-1], s1: undef storage:buffer
bld.reset(program->create_and_insert_block());
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9));
barrier(0, 0, scope_invocation, scope_workgroup);
bld.sopp(aco_opcode::s_barrier);
barrier(storage_buffer, semantic_acquire, scope_workgroup);
load_global();
/* global->global, device scope, delayed waitcnt */
//>> p_unit_test 10
//! v1: %0:v[4] = global_load_dword %0:v[0-1], s1: undef storage:buffer semantics:atomic
//! v_nop
//! s_waitcnt vmcnt(0)
//! v1: %0:v[5] = global_load_dword %0:v[0-1], s1: undef storage:buffer
bld.reset(program->create_and_insert_block());
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10));
load_global(semantic_atomic);
barrier(storage_buffer, semantic_acquire, scope_device);
bld.vop1(aco_opcode::v_nop);
load_global() = dest1;
//>> p_unit_test 11
//! v1: %0:v[4] = global_load_dword %0:v[0-1], s1: undef storage:buffer semantics:atomic
//! v_nop
//! s_waitcnt vmcnt(0)
//! v_nop
//! v1: %0:v[5] = global_load_dword %0:v[0-1], s1: undef storage:buffer
bld.reset(program->create_and_insert_block());
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11));
load_global(semantic_atomic);
barrier(storage_buffer, semantic_acquire, scope_device);
bld.vop1(aco_opcode::v_nop);
wait_imm vm_imm;
vm_imm.vm = 0;
vm_imm.build_waitcnt(bld);
bld.vop1(aco_opcode::v_nop);
load_global() = dest1;
//>> p_unit_test 12
//! v1: %0:v[4] = global_load_dword %0:v[0-1], s1: undef storage:buffer semantics:atomic
//! v1: %0:v[5] = global_load_dword %0:v[0-1], s1: undef storage:buffer semantics:private
//! v_nop
//! s_waitcnt vmcnt(1)
//! v1: %0:v[6] = global_load_dword %0:v[0-1], s1: undef storage:buffer
bld.reset(program->create_and_insert_block());
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12));
load_global(semantic_atomic);
barrier(storage_buffer, semantic_acquire, scope_device);
load_global(semantic_private) = dest1;
bld.vop1(aco_opcode::v_nop); /* break up clause */
load_global() = dest2;
finish_waitcnt_test();
}
END_TEST