aco: reformat according to its .clang-format

Signed-off-by: Eric Engestrom <eric@igalia.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23253>
This commit is contained in:
Eric Engestrom 2023-05-26 12:55:35 +01:00 committed by Marge Bot
parent 8b319c6db8
commit 6b21653ab4
34 changed files with 1556 additions and 1430 deletions

View file

@ -1160,8 +1160,7 @@ emit_long_jump(asm_context& ctx, SOPP_instruction* branch, bool backwards,
emit_instruction(ctx, out, instr.get()); emit_instruction(ctx, out, instr.get());
/* create the s_setpc_b64 to jump */ /* create the s_setpc_b64 to jump */
instr.reset( instr.reset(bld.sop1(aco_opcode::s_setpc_b64, Operand(def.physReg(), s2)).instr);
bld.sop1(aco_opcode::s_setpc_b64, Operand(def.physReg(), s2)).instr);
emit_instruction(ctx, out, instr.get()); emit_instruction(ctx, out, instr.get());
} }
@ -1218,8 +1217,7 @@ fix_constaddrs(asm_context& ctx, std::vector<uint32_t>& out)
} }
unsigned unsigned
emit_program(Program* program, std::vector<uint32_t>& code, emit_program(Program* program, std::vector<uint32_t>& code, std::vector<struct aco_symbol>* symbols)
std::vector<struct aco_symbol>* symbols)
{ {
asm_context ctx(program, symbols); asm_context ctx(program, symbols);
@ -1252,8 +1250,8 @@ emit_program(Program* program, std::vector<uint32_t>& code,
code.insert(code.end(), (uint32_t*)program->constant_data.data(), code.insert(code.end(), (uint32_t*)program->constant_data.data(),
(uint32_t*)(program->constant_data.data() + program->constant_data.size())); (uint32_t*)(program->constant_data.data() + program->constant_data.size()));
program->config->scratch_bytes_per_wave = align( program->config->scratch_bytes_per_wave =
program->config->scratch_bytes_per_wave, program->dev.scratch_alloc_granule); align(program->config->scratch_bytes_per_wave, program->dev.scratch_alloc_granule);
return exec_size; return exec_size;
} }

View file

@ -254,8 +254,7 @@ public:
void join_min(const VGPRCounterMap& other) void join_min(const VGPRCounterMap& other)
{ {
unsigned i; unsigned i;
BITSET_FOREACH_SET(i, other.resident, 256) BITSET_FOREACH_SET (i, other.resident, 256) {
{
if (BITSET_TEST(resident, i)) if (BITSET_TEST(resident, i))
val[i] = MIN2(val[i] + base, other.val[i] + other.base) - base; val[i] = MIN2(val[i] + base, other.val[i] + other.base) - base;
else else
@ -270,8 +269,7 @@ public:
return false; return false;
unsigned i; unsigned i;
BITSET_FOREACH_SET(i, other.resident, 256) BITSET_FOREACH_SET (i, other.resident, 256) {
{
if (!BITSET_TEST(resident, i)) if (!BITSET_TEST(resident, i))
return false; return false;
if (val[i] + base != other.val[i] + other.base) if (val[i] + base != other.val[i] + other.base)
@ -365,11 +363,11 @@ search_backwards_internal(State& state, GlobalState& global_state, BlockState bl
return; return;
} }
PRAGMA_DIAGNOSTIC_PUSH PRAGMA_DIAGNOSTIC_PUSH
PRAGMA_DIAGNOSTIC_IGNORED(-Waddress) PRAGMA_DIAGNOSTIC_IGNORED(-Waddress)
if (block_cb != nullptr && !block_cb(global_state, block_state, block)) if (block_cb != nullptr && !block_cb(global_state, block_state, block))
return; return;
PRAGMA_DIAGNOSTIC_POP PRAGMA_DIAGNOSTIC_POP
for (unsigned lin_pred : block->linear_preds) { for (unsigned lin_pred : block->linear_preds) {
search_backwards_internal<GlobalState, BlockState, block_cb, instr_cb>( search_backwards_internal<GlobalState, BlockState, block_cb, instr_cb>(

View file

@ -52,8 +52,7 @@ struct wqm_ctx {
/* state for WQM propagation */ /* state for WQM propagation */
std::set<unsigned> worklist; std::set<unsigned> worklist;
std::vector<bool> branch_wqm; /* true if the branch condition in this block should be in wqm */ std::vector<bool> branch_wqm; /* true if the branch condition in this block should be in wqm */
wqm_ctx(Program* program_) wqm_ctx(Program* program_) : program(program_), branch_wqm(program->blocks.size())
: program(program_), branch_wqm(program->blocks.size())
{ {
for (unsigned i = 0; i < program->blocks.size(); i++) for (unsigned i = 0; i < program->blocks.size(); i++)
worklist.insert(i); worklist.insert(i);
@ -137,8 +136,7 @@ get_block_needs(wqm_ctx& ctx, exec_ctx& exec_ctx, Block* block)
propagate_wqm = true; propagate_wqm = true;
bool pred_by_exec = needs_exec_mask(instr.get()) || bool pred_by_exec = needs_exec_mask(instr.get()) ||
instr->opcode == aco_opcode::p_logical_end || instr->opcode == aco_opcode::p_logical_end || instr->isBranch();
instr->isBranch();
if (needs_exact(instr)) if (needs_exact(instr))
instr_needs[i] = Exact; instr_needs[i] = Exact;
@ -574,7 +572,8 @@ process_instructions(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instructio
* WQM again. * WQM again.
*/ */
ctx.info[block->index].exec.resize(1); ctx.info[block->index].exec.resize(1);
assert(ctx.info[block->index].exec[0].second == (mask_type_exact | mask_type_global)); assert(ctx.info[block->index].exec[0].second ==
(mask_type_exact | mask_type_global));
current_exec = get_exec_op(ctx.info[block->index].exec.back().first); current_exec = get_exec_op(ctx.info[block->index].exec.back().first);
ctx.info[block->index].exec[0].first = Operand(bld.lm); ctx.info[block->index].exec[0].first = Operand(bld.lm);
} }

View file

@ -91,9 +91,8 @@ enum vmem_type : uint8_t {
vmem_bvh = 1 << 2, vmem_bvh = 1 << 2,
}; };
static const uint16_t exp_events = static const uint16_t exp_events = event_exp_pos | event_exp_param | event_exp_mrt_null |
event_exp_pos | event_exp_param | event_exp_mrt_null | event_gds_gpr_lock | event_vmem_gpr_lock | event_gds_gpr_lock | event_vmem_gpr_lock | event_ldsdir;
event_ldsdir;
static const uint16_t lgkm_events = event_smem | event_lds | event_gds | event_flat | event_sendmsg; static const uint16_t lgkm_events = event_smem | event_lds | event_gds | event_flat | event_sendmsg;
static const uint16_t vm_events = event_vmem | event_flat; static const uint16_t vm_events = event_vmem | event_flat;
static const uint16_t vs_events = event_vmem_store; static const uint16_t vs_events = event_vmem_store;
@ -580,7 +579,8 @@ kill(wait_imm& imm, alu_delay_info& delay, Instruction* instr, wait_ctx& ctx,
} }
if (ctx.program->gfx_level >= GFX11) { if (ctx.program->gfx_level >= GFX11) {
update_alu(ctx, false, false, false, MAX3(delay.salu_cycles, delay.valu_cycles, delay.trans_cycles)); update_alu(ctx, false, false, false,
MAX3(delay.salu_cycles, delay.valu_cycles, delay.trans_cycles));
} }
/* remove all gprs with higher counter from map */ /* remove all gprs with higher counter from map */
@ -775,8 +775,7 @@ insert_wait_entry(wait_ctx& ctx, Definition def, wait_event event, uint8_t vmem_
*/ */
uint32_t ds_vmem_events = event_lds | event_gds | event_vmem | event_flat; uint32_t ds_vmem_events = event_lds | event_gds | event_vmem | event_flat;
uint32_t alu_events = event_trans | event_valu | event_salu; uint32_t alu_events = event_trans | event_valu | event_salu;
bool force_linear = bool force_linear = ctx.gfx_level >= GFX11 && (event & (ds_vmem_events | alu_events));
ctx.gfx_level >= GFX11 && (event & (ds_vmem_events | alu_events));
insert_wait_entry(ctx, def.physReg(), def.regClass(), event, true, vmem_types, cycles, insert_wait_entry(ctx, def.physReg(), def.regClass(), event, true, vmem_types, cycles,
force_linear); force_linear);

View file

@ -26,8 +26,8 @@
#include "aco_instruction_selection.h" #include "aco_instruction_selection.h"
#include "aco_builder.h" #include "aco_builder.h"
#include "aco_ir.h"
#include "aco_interface.h" #include "aco_interface.h"
#include "aco_ir.h"
#include "common/ac_nir.h" #include "common/ac_nir.h"
#include "common/sid.h" #include "common/sid.h"
@ -661,8 +661,8 @@ convert_int(isel_context* ctx, Builder& bld, Temp src, unsigned src_bits, unsign
Operand::c32(src_bits), Operand::c32((unsigned)sign_extend)); Operand::c32(src_bits), Operand::c32((unsigned)sign_extend));
} else { } else {
assert(src_bits < 32); assert(src_bits < 32);
bld.pseudo(aco_opcode::p_extract, Definition(tmp), src, Operand::zero(), Operand::c32(src_bits), bld.pseudo(aco_opcode::p_extract, Definition(tmp), src, Operand::zero(),
Operand::c32((unsigned)sign_extend)); Operand::c32(src_bits), Operand::c32((unsigned)sign_extend));
} }
if (dst_bits == 64) { if (dst_bits == 64) {
@ -1894,8 +1894,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
} }
case nir_op_uadd_sat: { case nir_op_uadd_sat: {
if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) { if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
Instruction* add_instr = Instruction* add_instr = emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_u16, dst);
emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_u16, dst);
add_instr->valu().clamp = 1; add_instr->valu().clamp = 1;
break; break;
} }
@ -1977,8 +1976,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
} }
case nir_op_iadd_sat: { case nir_op_iadd_sat: {
if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) { if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
Instruction* add_instr = Instruction* add_instr = emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_i16, dst);
emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_i16, dst);
add_instr->valu().clamp = 1; add_instr->valu().clamp = 1;
break; break;
} }
@ -3316,8 +3314,8 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
exponent_large); exponent_large);
Temp cond = Temp cond =
bld.sopc(aco_opcode::s_cmp_ge_i32, bld.def(s1, scc), Operand::c32(64u), exponent); bld.sopc(aco_opcode::s_cmp_ge_i32, bld.def(s1, scc), Operand::c32(64u), exponent);
mantissa = bld.sop2(aco_opcode::s_cselect_b64, bld.def(s2), mantissa, mantissa =
Operand::c64(~0llu), cond); bld.sop2(aco_opcode::s_cselect_b64, bld.def(s2), mantissa, Operand::c64(~0llu), cond);
Temp lower = bld.tmp(s1), upper = bld.tmp(s1); Temp lower = bld.tmp(s1), upper = bld.tmp(s1);
bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), mantissa); bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), mantissa);
Temp cond_small = Temp cond_small =
@ -3483,9 +3481,8 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
case nir_op_unpack_64_4x16: case nir_op_unpack_64_4x16:
case nir_op_unpack_32_4x8: case nir_op_unpack_32_4x8:
bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0])); bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0]));
emit_split_vector(ctx, dst, emit_split_vector(
instr->op == nir_op_unpack_32_4x8 || ctx, dst, instr->op == nir_op_unpack_32_4x8 || instr->op == nir_op_unpack_64_4x16 ? 4 : 2);
instr->op == nir_op_unpack_64_4x16 ? 4 : 2);
break; break;
case nir_op_pack_64_2x32_split: { case nir_op_pack_64_2x32_split: {
Temp src0 = get_alu_src(ctx, instr->src[0]); Temp src0 = get_alu_src(ctx, instr->src[0]);
@ -4176,9 +4173,10 @@ emit_load(isel_context* ctx, Builder& bld, const LoadEmitInfo& info,
aligned_offset = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), lo, hi); aligned_offset = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), lo, hi);
} }
} }
Temp aligned_offset_tmp = Temp aligned_offset_tmp = aligned_offset.isTemp() ? aligned_offset.getTemp()
aligned_offset.isTemp() ? aligned_offset.getTemp() : : aligned_offset.isConstant()
aligned_offset.isConstant() ? bld.copy(bld.def(s1), aligned_offset) : Temp(0, s1); ? bld.copy(bld.def(s1), aligned_offset)
: Temp(0, s1);
Temp val = params.callback(bld, info, aligned_offset_tmp, bytes_needed, align, Temp val = params.callback(bld, info, aligned_offset_tmp, bytes_needed, align,
reduced_const_offset, byte_align ? Temp() : info.dst); reduced_const_offset, byte_align ? Temp() : info.dst);
@ -4508,8 +4506,7 @@ mubuf_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigne
mubuf->offen = offen; mubuf->offen = offen;
mubuf->idxen = idxen; mubuf->idxen = idxen;
mubuf->glc = info.glc; mubuf->glc = info.glc;
mubuf->dlc = mubuf->dlc = info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
mubuf->slc = info.slc; mubuf->slc = info.slc;
mubuf->sync = info.sync; mubuf->sync = info.sync;
mubuf->offset = const_offset; mubuf->offset = const_offset;
@ -4552,40 +4549,20 @@ mubuf_load_format_callback(Builder& bld, const LoadEmitInfo& info, Temp offset,
aco_opcode op = aco_opcode::num_opcodes; aco_opcode op = aco_opcode::num_opcodes;
if (info.component_size == 2) { if (info.component_size == 2) {
switch (bytes_needed) { switch (bytes_needed) {
case 2: case 2: op = aco_opcode::buffer_load_format_d16_x; break;
op = aco_opcode::buffer_load_format_d16_x; case 4: op = aco_opcode::buffer_load_format_d16_xy; break;
break; case 6: op = aco_opcode::buffer_load_format_d16_xyz; break;
case 4: case 8: op = aco_opcode::buffer_load_format_d16_xyzw; break;
op = aco_opcode::buffer_load_format_d16_xy; default: unreachable("invalid buffer load format size"); break;
break;
case 6:
op = aco_opcode::buffer_load_format_d16_xyz;
break;
case 8:
op = aco_opcode::buffer_load_format_d16_xyzw;
break;
default:
unreachable("invalid buffer load format size");
break;
} }
} else { } else {
assert(info.component_size == 4); assert(info.component_size == 4);
switch (bytes_needed) { switch (bytes_needed) {
case 4: case 4: op = aco_opcode::buffer_load_format_x; break;
op = aco_opcode::buffer_load_format_x; case 8: op = aco_opcode::buffer_load_format_xy; break;
break; case 12: op = aco_opcode::buffer_load_format_xyz; break;
case 8: case 16: op = aco_opcode::buffer_load_format_xyzw; break;
op = aco_opcode::buffer_load_format_xy; default: unreachable("invalid buffer load format size"); break;
break;
case 12:
op = aco_opcode::buffer_load_format_xyz;
break;
case 16:
op = aco_opcode::buffer_load_format_xyzw;
break;
default:
unreachable("invalid buffer load format size");
break;
} }
} }
@ -4596,8 +4573,7 @@ mubuf_load_format_callback(Builder& bld, const LoadEmitInfo& info, Temp offset,
mubuf->offen = offen; mubuf->offen = offen;
mubuf->idxen = idxen; mubuf->idxen = idxen;
mubuf->glc = info.glc; mubuf->glc = info.glc;
mubuf->dlc = mubuf->dlc = info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
mubuf->slc = info.slc; mubuf->slc = info.slc;
mubuf->sync = info.sync; mubuf->sync = info.sync;
mubuf->offset = const_offset; mubuf->offset = const_offset;
@ -5229,9 +5205,9 @@ resolve_excess_vmem_const_offset(Builder& bld, Temp& voffset, unsigned const_off
} }
void void
emit_single_mubuf_store(isel_context* ctx, Temp descriptor, Temp voffset, Temp soffset, Temp idx, Temp vdata, emit_single_mubuf_store(isel_context* ctx, Temp descriptor, Temp voffset, Temp soffset, Temp idx,
unsigned const_offset, memory_sync_info sync, bool glc, bool slc, Temp vdata, unsigned const_offset, memory_sync_info sync, bool glc,
bool swizzled) bool slc, bool swizzled)
{ {
assert(vdata.id()); assert(vdata.id());
assert(vdata.size() != 3 || ctx->program->gfx_level != GFX6); assert(vdata.size() != 3 || ctx->program->gfx_level != GFX6);
@ -5256,8 +5232,8 @@ emit_single_mubuf_store(isel_context* ctx, Temp descriptor, Temp voffset, Temp s
vaddr_op = Operand(idx); vaddr_op = Operand(idx);
Builder::Result r = Builder::Result r =
bld.mubuf(op, Operand(descriptor), vaddr_op, soffset_op, Operand(vdata), const_offset, bld.mubuf(op, Operand(descriptor), vaddr_op, soffset_op, Operand(vdata), const_offset, offen,
offen, swizzled, idxen, /* addr64 */ false, /* disable_wqm */ false, glc, swizzled, idxen, /* addr64 */ false, /* disable_wqm */ false, glc,
/* dlc*/ false, slc); /* dlc*/ false, slc);
r->mubuf().sync = sync; r->mubuf().sync = sync;
@ -5269,7 +5245,8 @@ store_vmem_mubuf(isel_context* ctx, Temp src, Temp descriptor, Temp voffset, Tem
bool swizzled, memory_sync_info sync, bool glc, bool slc) bool swizzled, memory_sync_info sync, bool glc, bool slc)
{ {
Builder bld(ctx->program, ctx->block); Builder bld(ctx->program, ctx->block);
assert(elem_size_bytes == 1 || elem_size_bytes == 2 || elem_size_bytes == 4 || elem_size_bytes == 8); assert(elem_size_bytes == 1 || elem_size_bytes == 2 || elem_size_bytes == 4 ||
elem_size_bytes == 8);
assert(write_mask); assert(write_mask);
write_mask = util_widen_mask(write_mask, elem_size_bytes); write_mask = util_widen_mask(write_mask, elem_size_bytes);
@ -5282,8 +5259,8 @@ store_vmem_mubuf(isel_context* ctx, Temp src, Temp descriptor, Temp voffset, Tem
for (unsigned i = 0; i < write_count; i++) { for (unsigned i = 0; i < write_count; i++) {
unsigned const_offset = offsets[i] + base_const_offset; unsigned const_offset = offsets[i] + base_const_offset;
emit_single_mubuf_store(ctx, descriptor, voffset, soffset, idx, write_datas[i], const_offset, sync, emit_single_mubuf_store(ctx, descriptor, voffset, soffset, idx, write_datas[i], const_offset,
glc, slc, swizzled); sync, glc, slc, swizzled);
} }
} }
@ -6331,8 +6308,7 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr)
if (instr->intrinsic == nir_intrinsic_bindless_image_fragment_mask_load_amd) { if (instr->intrinsic == nir_intrinsic_bindless_image_fragment_mask_load_amd) {
opcode = aco_opcode::image_load; opcode = aco_opcode::image_load;
} else { } else {
bool level_zero = bool level_zero = nir_src_is_const(instr->src[3]) && nir_src_as_uint(instr->src[3]) == 0;
nir_src_is_const(instr->src[3]) && nir_src_as_uint(instr->src[3]) == 0;
opcode = level_zero ? aco_opcode::image_load : aco_opcode::image_load_mip; opcode = level_zero ? aco_opcode::image_load : aco_opcode::image_load_mip;
} }
@ -6391,8 +6367,7 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
memory_sync_info sync = get_memory_sync_info(instr, storage_image, 0); memory_sync_info sync = get_memory_sync_info(instr, storage_image, 0);
unsigned access = nir_intrinsic_access(instr); unsigned access = nir_intrinsic_access(instr);
bool glc = ctx->options->gfx_level == GFX6 || bool glc = ctx->options->gfx_level == GFX6 ||
((access & (ACCESS_VOLATILE | ACCESS_COHERENT)) && ((access & (ACCESS_VOLATILE | ACCESS_COHERENT)) && ctx->program->gfx_level < GFX11);
ctx->program->gfx_level < GFX11);
if (dim == GLSL_SAMPLER_DIM_BUF) { if (dim == GLSL_SAMPLER_DIM_BUF) {
Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
@ -6463,7 +6438,7 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>( aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
aco_opcode::p_create_vector, Format::PSEUDO, dmask_count, 1)}; aco_opcode::p_create_vector, Format::PSEUDO, dmask_count, 1)};
uint32_t index = 0; uint32_t index = 0;
u_foreach_bit(bit, dmask) { u_foreach_bit (bit, dmask) {
vec->operands[index++] = Operand(emit_extract_vector(ctx, data, bit, rc)); vec->operands[index++] = Operand(emit_extract_vector(ctx, data, bit, rc));
} }
data = bld.tmp(RegClass::get(RegType::vgpr, dmask_count * rc.bytes())); data = bld.tmp(RegClass::get(RegType::vgpr, dmask_count * rc.bytes()));
@ -6491,9 +6466,8 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
} }
void void
translate_buffer_image_atomic_op(const nir_atomic_op op, translate_buffer_image_atomic_op(const nir_atomic_op op, aco_opcode* buf_op, aco_opcode* buf_op64,
aco_opcode *buf_op, aco_opcode *buf_op64, aco_opcode* image_op)
aco_opcode *image_op)
{ {
switch (op) { switch (op) {
case nir_atomic_op_iadd: case nir_atomic_op_iadd:
@ -6571,8 +6545,7 @@ translate_buffer_image_atomic_op(const nir_atomic_op op,
*buf_op64 = aco_opcode::buffer_atomic_fmax_x2; *buf_op64 = aco_opcode::buffer_atomic_fmax_x2;
*image_op = aco_opcode::image_atomic_fmax; *image_op = aco_opcode::image_atomic_fmax;
break; break;
default: default: unreachable("unsupported atomic operation");
unreachable("unsupported atomic operation");
} }
} }
@ -6682,8 +6655,7 @@ visit_store_ssbo(isel_context* ctx, nir_intrinsic_instr* instr)
Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa)); Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa));
memory_sync_info sync = get_memory_sync_info(instr, storage_buffer, 0); memory_sync_info sync = get_memory_sync_info(instr, storage_buffer, 0);
bool glc = bool glc = (nir_intrinsic_access(instr) & (ACCESS_VOLATILE | ACCESS_COHERENT)) &&
(nir_intrinsic_access(instr) & (ACCESS_VOLATILE | ACCESS_COHERENT)) &&
ctx->program->gfx_level < GFX11; ctx->program->gfx_level < GFX11;
unsigned write_count = 0; unsigned write_count = 0;
@ -6836,8 +6808,7 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr)
Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa)); Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
memory_sync_info sync = get_memory_sync_info(instr, storage_buffer, 0); memory_sync_info sync = get_memory_sync_info(instr, storage_buffer, 0);
bool glc = bool glc = (nir_intrinsic_access(instr) & (ACCESS_VOLATILE | ACCESS_COHERENT)) &&
(nir_intrinsic_access(instr) & (ACCESS_VOLATILE | ACCESS_COHERENT)) &&
ctx->program->gfx_level < GFX11; ctx->program->gfx_level < GFX11;
unsigned write_count = 0; unsigned write_count = 0;
@ -6999,8 +6970,7 @@ visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
op32 = global ? aco_opcode::global_atomic_fmax : aco_opcode::flat_atomic_fmax; op32 = global ? aco_opcode::global_atomic_fmax : aco_opcode::flat_atomic_fmax;
op64 = global ? aco_opcode::global_atomic_fmax_x2 : aco_opcode::flat_atomic_fmax_x2; op64 = global ? aco_opcode::global_atomic_fmax_x2 : aco_opcode::flat_atomic_fmax_x2;
break; break;
default: default: unreachable("unsupported atomic operation");
unreachable("unsupported atomic operation");
} }
aco_opcode op = instr->dest.ssa.bit_size == 32 ? op32 : op64; aco_opcode op = instr->dest.ssa.bit_size == 32 ? op32 : op64;
@ -7192,8 +7162,8 @@ visit_store_buffer(isel_context* ctx, nir_intrinsic_instr* intrin)
memory_sync_info sync(aco_storage_mode_from_nir_mem_mode(mem_mode), memory_sync_info sync(aco_storage_mode_from_nir_mem_mode(mem_mode),
written_once ? semantic_can_reorder : semantic_none); written_once ? semantic_can_reorder : semantic_none);
store_vmem_mubuf(ctx, store_src, descriptor, v_offset, s_offset, idx, const_offset, elem_size_bytes, store_vmem_mubuf(ctx, store_src, descriptor, v_offset, s_offset, idx, const_offset,
write_mask, swizzled, sync, glc, slc); elem_size_bytes, write_mask, swizzled, sync, glc, slc);
} }
void void
@ -7206,8 +7176,8 @@ visit_load_smem(isel_context* ctx, nir_intrinsic_instr* instr)
/* If base address is 32bit, convert to 64bit with the high 32bit part. */ /* If base address is 32bit, convert to 64bit with the high 32bit part. */
if (base.bytes() == 4) { if (base.bytes() == 4) {
base = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), base = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), base,
base, Operand::c32(ctx->options->address32_hi)); Operand::c32(ctx->options->address32_hi));
} }
aco_opcode opcode = aco_opcode::s_load_dword; aco_opcode opcode = aco_opcode::s_load_dword;
@ -7535,10 +7505,10 @@ get_scratch_resource(isel_context* ctx)
Builder bld(ctx->program, ctx->block); Builder bld(ctx->program, ctx->block);
Temp scratch_addr = ctx->program->private_segment_buffer; Temp scratch_addr = ctx->program->private_segment_buffer;
if (!scratch_addr.bytes()) { if (!scratch_addr.bytes()) {
Temp addr_lo = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Temp addr_lo =
Operand::c32(aco_symbol_scratch_addr_lo)); bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_lo));
Temp addr_hi = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Temp addr_hi =
Operand::c32(aco_symbol_scratch_addr_hi)); bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_hi));
scratch_addr = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi); scratch_addr = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
} else if (ctx->stage.hw != HWStage::CS) { } else if (ctx->stage.hw != HWStage::CS) {
scratch_addr = scratch_addr =
@ -8093,8 +8063,7 @@ Temp merged_wave_info_to_mask(isel_context* ctx, unsigned i);
Temp lanecount_to_mask(isel_context* ctx, Temp count); Temp lanecount_to_mask(isel_context* ctx, Temp count);
Temp Temp
get_interp_param(isel_context* ctx, nir_intrinsic_op intrin, get_interp_param(isel_context* ctx, nir_intrinsic_op intrin, enum glsl_interp_mode interp)
enum glsl_interp_mode interp)
{ {
bool linear = interp == INTERP_MODE_NOPERSPECTIVE; bool linear = interp == INTERP_MODE_NOPERSPECTIVE;
if (intrin == nir_intrinsic_load_barycentric_pixel || if (intrin == nir_intrinsic_load_barycentric_pixel ||
@ -8109,9 +8078,8 @@ get_interp_param(isel_context* ctx, nir_intrinsic_op intrin,
} }
void void
ds_ordered_count_offsets(isel_context *ctx, unsigned index_operand, ds_ordered_count_offsets(isel_context* ctx, unsigned index_operand, unsigned wave_release,
unsigned wave_release, unsigned wave_done, unsigned wave_done, unsigned* offset0, unsigned* offset1)
unsigned *offset0, unsigned *offset1)
{ {
unsigned ordered_count_index = index_operand & 0x3f; unsigned ordered_count_index = index_operand & 0x3f;
unsigned count_dword = (index_operand >> 24) & 0xf; unsigned count_dword = (index_operand >> 24) & 0xf;
@ -8189,7 +8157,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
RegClass rc = RegClass(offset.type(), 1); RegClass rc = RegClass(offset.type(), 1);
Temp pos1 = bld.tmp(rc), pos2 = bld.tmp(rc); Temp pos1 = bld.tmp(rc), pos2 = bld.tmp(rc);
bld.pseudo(aco_opcode::p_split_vector, Definition(pos1), Definition(pos2), offset); bld.pseudo(aco_opcode::p_split_vector, Definition(pos1), Definition(pos2), offset);
Temp bary = get_interp_param(ctx, instr->intrinsic, (glsl_interp_mode)nir_intrinsic_interp_mode(instr)); Temp bary = get_interp_param(ctx, instr->intrinsic,
(glsl_interp_mode)nir_intrinsic_interp_mode(instr));
emit_interp_center(ctx, get_ssa_temp(ctx, &instr->dest.ssa), bary, pos1, pos2); emit_interp_center(ctx, get_ssa_temp(ctx, &instr->dest.ssa), bary, pos1, pos2);
break; break;
} }
@ -8977,8 +8946,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa); Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
if (ctx->args->merged_wave_info.used) if (ctx->args->merged_wave_info.used)
bld.pseudo(aco_opcode::p_extract, Definition(dst), bld.def(s1, scc), bld.pseudo(aco_opcode::p_extract, Definition(dst), bld.def(s1, scc),
get_arg(ctx, ctx->args->merged_wave_info), Operand::c32(2u), get_arg(ctx, ctx->args->merged_wave_info), Operand::c32(2u), Operand::c32(8u),
Operand::c32(8u), Operand::zero()); Operand::zero());
else if (ctx->args->gs_wave_id.used) else if (ctx->args->gs_wave_id.used)
bld.copy(Definition(dst), get_arg(ctx, ctx->args->gs_wave_id)); bld.copy(Definition(dst), get_arg(ctx, ctx->args->gs_wave_id));
else else
@ -9025,8 +8994,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
case nir_intrinsic_overwrite_tes_arguments_amd: { case nir_intrinsic_overwrite_tes_arguments_amd: {
ctx->arg_temps[ctx->args->tes_u.arg_index] = get_ssa_temp(ctx, instr->src[0].ssa); ctx->arg_temps[ctx->args->tes_u.arg_index] = get_ssa_temp(ctx, instr->src[0].ssa);
ctx->arg_temps[ctx->args->tes_v.arg_index] = get_ssa_temp(ctx, instr->src[1].ssa); ctx->arg_temps[ctx->args->tes_v.arg_index] = get_ssa_temp(ctx, instr->src[1].ssa);
ctx->arg_temps[ctx->args->tes_rel_patch_id.arg_index] = ctx->arg_temps[ctx->args->tes_rel_patch_id.arg_index] = get_ssa_temp(ctx, instr->src[3].ssa);
get_ssa_temp(ctx, instr->src[3].ssa);
ctx->arg_temps[ctx->args->tes_patch_id.arg_index] = get_ssa_temp(ctx, instr->src[2].ssa); ctx->arg_temps[ctx->args->tes_patch_id.arg_index] = get_ssa_temp(ctx, instr->src[2].ssa);
break; break;
} }
@ -9036,7 +9004,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa); Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
Temp src = ctx->arg_temps[nir_intrinsic_base(instr)]; Temp src = ctx->arg_temps[nir_intrinsic_base(instr)];
assert(src.id()); assert(src.id());
assert(src.type() == (instr->intrinsic == nir_intrinsic_load_scalar_arg_amd ? RegType::sgpr : RegType::vgpr)); assert(src.type() == (instr->intrinsic == nir_intrinsic_load_scalar_arg_amd ? RegType::sgpr
: RegType::vgpr));
bld.copy(Definition(dst), src); bld.copy(Definition(dst), src);
emit_split_vector(ctx, dst, dst.size()); emit_split_vector(ctx, dst, dst.size());
break; break;
@ -9048,35 +9017,34 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
Temp gds_base = bld.copy(bld.def(v1), Operand::c32(0u)); Temp gds_base = bld.copy(bld.def(v1), Operand::c32(0u));
unsigned offset0, offset1; unsigned offset0, offset1;
Instruction *ds_instr; Instruction* ds_instr;
Operand m; Operand m;
/* Lock a GDS mutex. */ /* Lock a GDS mutex. */
ds_ordered_count_offsets(ctx, 1 << 24u, false, false, &offset0, &offset1); ds_ordered_count_offsets(ctx, 1 << 24u, false, false, &offset0, &offset1);
m = bld.m0(bld.as_uniform(ordered_id)); m = bld.m0(bld.as_uniform(ordered_id));
ds_instr = bld.ds(aco_opcode::ds_ordered_count, bld.def(v1), gds_base, m, ds_instr =
offset0, offset1, true); bld.ds(aco_opcode::ds_ordered_count, bld.def(v1), gds_base, m, offset0, offset1, true);
ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_volatile); ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_volatile);
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>( aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
aco_opcode::p_create_vector, Format::PSEUDO, instr->num_components, 1)}; aco_opcode::p_create_vector, Format::PSEUDO, instr->num_components, 1)};
unsigned write_mask = nir_intrinsic_write_mask(instr); unsigned write_mask = nir_intrinsic_write_mask(instr);
bool use_gds_registers = bool use_gds_registers = ctx->options->gfx_level >= GFX11 && ctx->options->is_opengl;
ctx->options->gfx_level >= GFX11 && ctx->options->is_opengl;
for (unsigned i = 0; i < instr->num_components; i++) { for (unsigned i = 0; i < instr->num_components; i++) {
if (write_mask & (1 << i)) { if (write_mask & (1 << i)) {
Temp chan_counter = emit_extract_vector(ctx, counter, i, v1); Temp chan_counter = emit_extract_vector(ctx, counter, i, v1);
if (use_gds_registers) { if (use_gds_registers) {
ds_instr = bld.ds(aco_opcode::ds_add_gs_reg_rtn, bld.def(v1), ds_instr = bld.ds(aco_opcode::ds_add_gs_reg_rtn, bld.def(v1), Operand(),
Operand(), chan_counter, i * 4, 0u, true); chan_counter, i * 4, 0u, true);
} else { } else {
m = bld.m0((Temp)bld.copy(bld.def(s1, m0), Operand::c32(0x100u))); m = bld.m0((Temp)bld.copy(bld.def(s1, m0), Operand::c32(0x100u)));
ds_instr = bld.ds(aco_opcode::ds_add_rtn_u32, bld.def(v1), ds_instr = bld.ds(aco_opcode::ds_add_rtn_u32, bld.def(v1), gds_base, chan_counter, m,
gds_base, chan_counter, m, i * 4, 0u, true); i * 4, 0u, true);
} }
ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_atomicrmw); ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_atomicrmw);
@ -9092,33 +9060,32 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
/* Unlock a GDS mutex. */ /* Unlock a GDS mutex. */
ds_ordered_count_offsets(ctx, 1 << 24u, true, true, &offset0, &offset1); ds_ordered_count_offsets(ctx, 1 << 24u, true, true, &offset0, &offset1);
m = bld.m0(bld.as_uniform(ordered_id)); m = bld.m0(bld.as_uniform(ordered_id));
ds_instr = bld.ds(aco_opcode::ds_ordered_count, bld.def(v1), gds_base, m, ds_instr =
offset0, offset1, true); bld.ds(aco_opcode::ds_ordered_count, bld.def(v1), gds_base, m, offset0, offset1, true);
ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_volatile); ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_volatile);
emit_split_vector(ctx, dst, instr->num_components); emit_split_vector(ctx, dst, instr->num_components);
break; break;
} }
case nir_intrinsic_xfb_counter_sub_amd: { case nir_intrinsic_xfb_counter_sub_amd: {
bool use_gds_registers = bool use_gds_registers = ctx->options->gfx_level >= GFX11 && ctx->options->is_opengl;
ctx->options->gfx_level >= GFX11 && ctx->options->is_opengl;
unsigned write_mask = nir_intrinsic_write_mask(instr); unsigned write_mask = nir_intrinsic_write_mask(instr);
Temp counter = get_ssa_temp(ctx, instr->src[0].ssa); Temp counter = get_ssa_temp(ctx, instr->src[0].ssa);
Temp gds_base = bld.copy(bld.def(v1), Operand::c32(0u)); Temp gds_base = bld.copy(bld.def(v1), Operand::c32(0u));
u_foreach_bit(i, write_mask) { u_foreach_bit (i, write_mask) {
Temp chan_counter = emit_extract_vector(ctx, counter, i, v1); Temp chan_counter = emit_extract_vector(ctx, counter, i, v1);
Instruction *ds_instr; Instruction* ds_instr;
if (use_gds_registers) { if (use_gds_registers) {
ds_instr = bld.ds(aco_opcode::ds_sub_gs_reg_rtn, bld.def(v1), ds_instr = bld.ds(aco_opcode::ds_sub_gs_reg_rtn, bld.def(v1), Operand(), chan_counter,
Operand(), chan_counter, i * 4, 0u, true); i * 4, 0u, true);
} else { } else {
Operand m = bld.m0((Temp)bld.copy(bld.def(s1, m0), Operand::c32(0x100u))); Operand m = bld.m0((Temp)bld.copy(bld.def(s1, m0), Operand::c32(0x100u)));
ds_instr = bld.ds(aco_opcode::ds_sub_rtn_u32, bld.def(v1), ds_instr = bld.ds(aco_opcode::ds_sub_rtn_u32, bld.def(v1), gds_base, chan_counter, m,
gds_base, chan_counter, m, i * 4, 0u, true); i * 4, 0u, true);
} }
ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_atomicrmw); ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_atomicrmw);
} }
@ -9162,15 +9129,14 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
exp->valid_mask = false; exp->valid_mask = false;
/* Compressed export uses two bits for a channel. */ /* Compressed export uses two bits for a channel. */
uint32_t channel_mask = exp->compressed ? uint32_t channel_mask =
(write_mask & 0x3 ? 1 : 0) | (write_mask & 0xc ? 2 : 0) : exp->compressed ? (write_mask & 0x3 ? 1 : 0) | (write_mask & 0xc ? 2 : 0) : write_mask;
write_mask;
Temp value = get_ssa_temp(ctx, instr->src[0].ssa); Temp value = get_ssa_temp(ctx, instr->src[0].ssa);
for (unsigned i = 0; i < 4; i++) { for (unsigned i = 0; i < 4; i++) {
exp->operands[i] = channel_mask & BITFIELD_BIT(i) ? exp->operands[i] = channel_mask & BITFIELD_BIT(i)
Operand(emit_extract_vector(ctx, value, i, v1)) : ? Operand(emit_extract_vector(ctx, value, i, v1))
Operand(v1); : Operand(v1);
} }
ctx->block->instructions.emplace_back(std::move(exp)); ctx->block->instructions.emplace_back(std::move(exp));
@ -9183,13 +9149,11 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
struct aco_export_mrt mrt0, mrt1; struct aco_export_mrt mrt0, mrt1;
for (unsigned i = 0; i < 4; i++) { for (unsigned i = 0; i < 4; i++) {
mrt0.out[i] = write_mask & BITFIELD_BIT(i) ? mrt0.out[i] = write_mask & BITFIELD_BIT(i) ? Operand(emit_extract_vector(ctx, val0, i, v1))
Operand(emit_extract_vector(ctx, val0, i, v1)) : : Operand(v1);
Operand(v1);
mrt1.out[i] = write_mask & BITFIELD_BIT(i) ? mrt1.out[i] = write_mask & BITFIELD_BIT(i) ? Operand(emit_extract_vector(ctx, val1, i, v1))
Operand(emit_extract_vector(ctx, val1, i, v1)) : : Operand(v1);
Operand(v1);
} }
mrt0.enabled_channels = mrt1.enabled_channels = write_mask; mrt0.enabled_channels = mrt1.enabled_channels = write_mask;
@ -9383,7 +9347,8 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
} }
if (has_wqm_coord) { if (has_wqm_coord) {
assert(instr->op == nir_texop_tex || instr->op == nir_texop_txb || instr->op == nir_texop_lod); assert(instr->op == nir_texop_tex || instr->op == nir_texop_txb ||
instr->op == nir_texop_lod);
assert(wqm_coord.regClass().is_linear_vgpr()); assert(wqm_coord.regClass().is_linear_vgpr());
assert(!a16 && !g16); assert(!a16 && !g16);
} }
@ -9701,9 +9666,8 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
if (dst.regClass() == s1) { if (dst.regClass() == s1) {
Temp is_not_null = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand::zero(), Temp is_not_null = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand::zero(),
emit_extract_vector(ctx, resource, 1, s1)); emit_extract_vector(ctx, resource, 1, s1));
bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), bld.as_uniform(tmp_dst),
bld.as_uniform(tmp_dst), Operand::c32(0x76543210), Operand::c32(0x76543210), bld.scc(is_not_null));
bld.scc(is_not_null));
} else { } else {
Temp is_not_null = bld.tmp(bld.lm); Temp is_not_null = bld.tmp(bld.lm);
bld.vopc_e64(aco_opcode::v_cmp_lg_u32, Definition(is_not_null), Operand::zero(), bld.vopc_e64(aco_opcode::v_cmp_lg_u32, Definition(is_not_null), Operand::zero(),
@ -10782,10 +10746,12 @@ export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export* out,
/* Replace NaN by zero (only 32-bit) to fix game bugs if requested. */ /* Replace NaN by zero (only 32-bit) to fix game bugs if requested. */
if (out->enable_mrt_output_nan_fixup && !is_16bit && if (out->enable_mrt_output_nan_fixup && !is_16bit &&
(out->col_format == V_028714_SPI_SHADER_32_R || out->col_format == V_028714_SPI_SHADER_32_GR || (out->col_format == V_028714_SPI_SHADER_32_R ||
out->col_format == V_028714_SPI_SHADER_32_AR || out->col_format == V_028714_SPI_SHADER_32_ABGR || out->col_format == V_028714_SPI_SHADER_32_GR ||
out->col_format == V_028714_SPI_SHADER_32_AR ||
out->col_format == V_028714_SPI_SHADER_32_ABGR ||
out->col_format == V_028714_SPI_SHADER_FP16_ABGR)) { out->col_format == V_028714_SPI_SHADER_FP16_ABGR)) {
u_foreach_bit(i, out->write_mask) { u_foreach_bit (i, out->write_mask) {
Temp is_not_nan = Temp is_not_nan =
bld.vopc(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm), values[i], values[i]); bld.vopc(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm), values[i], values[i]);
values[i] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), values[i], values[i] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), values[i],
@ -10847,7 +10813,6 @@ export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export* out,
} }
break; break;
case V_028714_SPI_SHADER_SNORM16_ABGR: case V_028714_SPI_SHADER_SNORM16_ABGR:
if (is_16bit && ctx->options->gfx_level >= GFX9) { if (is_16bit && ctx->options->gfx_level >= GFX9) {
compr_op = aco_opcode::v_cvt_pknorm_i16_f16; compr_op = aco_opcode::v_cvt_pknorm_i16_f16;
@ -10862,13 +10827,13 @@ export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export* out,
/* clamp */ /* clamp */
uint32_t max_rgb = out->is_int8 ? 255 : out->is_int10 ? 1023 : 0; uint32_t max_rgb = out->is_int8 ? 255 : out->is_int10 ? 1023 : 0;
u_foreach_bit(i, out->write_mask) { u_foreach_bit (i, out->write_mask) {
uint32_t max = i == 3 && out->is_int10 ? 3 : max_rgb; uint32_t max = i == 3 && out->is_int10 ? 3 : max_rgb;
values[i] = bld.vop2(aco_opcode::v_min_u32, bld.def(v1), Operand::c32(max), values[i]); values[i] = bld.vop2(aco_opcode::v_min_u32, bld.def(v1), Operand::c32(max), values[i]);
} }
} else if (is_16bit) { } else if (is_16bit) {
u_foreach_bit(i, out->write_mask) { u_foreach_bit (i, out->write_mask) {
Temp tmp = convert_int(ctx, bld, values[i].getTemp(), 16, 32, false); Temp tmp = convert_int(ctx, bld, values[i].getTemp(), 16, 32, false);
values[i] = Operand(tmp); values[i] = Operand(tmp);
} }
@ -10882,7 +10847,7 @@ export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export* out,
uint32_t max_rgb = out->is_int8 ? 127 : out->is_int10 ? 511 : 0; uint32_t max_rgb = out->is_int8 ? 127 : out->is_int10 ? 511 : 0;
uint32_t min_rgb = out->is_int8 ? -128 : out->is_int10 ? -512 : 0; uint32_t min_rgb = out->is_int8 ? -128 : out->is_int10 ? -512 : 0;
u_foreach_bit(i, out->write_mask) { u_foreach_bit (i, out->write_mask) {
uint32_t max = i == 3 && out->is_int10 ? 1 : max_rgb; uint32_t max = i == 3 && out->is_int10 ? 1 : max_rgb;
uint32_t min = i == 3 && out->is_int10 ? -2u : min_rgb; uint32_t min = i == 3 && out->is_int10 ? -2u : min_rgb;
@ -10890,7 +10855,7 @@ export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export* out,
values[i] = bld.vop2(aco_opcode::v_max_i32, bld.def(v1), Operand::c32(min), values[i]); values[i] = bld.vop2(aco_opcode::v_max_i32, bld.def(v1), Operand::c32(min), values[i]);
} }
} else if (is_16bit) { } else if (is_16bit) {
u_foreach_bit(i, out->write_mask) { u_foreach_bit (i, out->write_mask) {
Temp tmp = convert_int(ctx, bld, values[i].getTemp(), 16, 32, true); Temp tmp = convert_int(ctx, bld, values[i].getTemp(), 16, 32, true);
values[i] = Operand(tmp); values[i] = Operand(tmp);
} }
@ -10996,8 +10961,7 @@ create_fs_jump_to_epilog(isel_context* ctx)
} }
} }
Temp continue_pc = Temp continue_pc = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->program->info.ps.epilog_pc));
convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->program->info.ps.epilog_pc));
aco_ptr<Pseudo_instruction> jump{create_instruction<Pseudo_instruction>( aco_ptr<Pseudo_instruction> jump{create_instruction<Pseudo_instruction>(
aco_opcode::p_jump_to_epilog, Format::PSEUDO, 1 + color_exports.size(), 0)}; aco_opcode::p_jump_to_epilog, Format::PSEUDO, 1 + color_exports.size(), 0)};
@ -11068,12 +11032,13 @@ add_startpgm(struct isel_context* ctx)
Operand scratch_offset = Operand(get_arg(ctx, ctx->args->scratch_offset)); Operand scratch_offset = Operand(get_arg(ctx, ctx->args->scratch_offset));
scratch_offset.setLateKill(true); scratch_offset.setLateKill(true);
Operand scratch_addr = ctx->args->ring_offsets.used ? Operand scratch_addr = ctx->args->ring_offsets.used
Operand(get_arg(ctx, ctx->args->ring_offsets)) : Operand(s2); ? Operand(get_arg(ctx, ctx->args->ring_offsets))
: Operand(s2);
Builder bld(ctx->program, ctx->block); Builder bld(ctx->program, ctx->block);
bld.pseudo(aco_opcode::p_init_scratch, bld.def(s2), bld.def(s1, scc), bld.pseudo(aco_opcode::p_init_scratch, bld.def(s2), bld.def(s1, scc), scratch_addr,
scratch_addr, scratch_offset); scratch_offset);
} }
return startpgm; return startpgm;
@ -11085,9 +11050,9 @@ fix_ls_vgpr_init_bug(isel_context* ctx, Pseudo_instruction* startpgm)
assert(ctx->shader->info.stage == MESA_SHADER_VERTEX); assert(ctx->shader->info.stage == MESA_SHADER_VERTEX);
Builder bld(ctx->program, ctx->block); Builder bld(ctx->program, ctx->block);
constexpr unsigned hs_idx = 1u; constexpr unsigned hs_idx = 1u;
Builder::Result hs_thread_count = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), Builder::Result hs_thread_count =
get_arg(ctx, ctx->args->merged_wave_info), bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
Operand::c32((8u << 16) | (hs_idx * 8u))); get_arg(ctx, ctx->args->merged_wave_info), Operand::c32((8u << 16) | (hs_idx * 8u)));
Temp ls_has_nonzero_hs_threads = bool_to_vector_condition(ctx, hs_thread_count.def(1).getTemp()); Temp ls_has_nonzero_hs_threads = bool_to_vector_condition(ctx, hs_thread_count.def(1).getTemp());
/* If there are no HS threads, SPI mistakenly loads the LS VGPRs starting at VGPR 0. */ /* If there are no HS threads, SPI mistakenly loads the LS VGPRs starting at VGPR 0. */
@ -11218,8 +11183,7 @@ merged_wave_info_to_mask(isel_context* ctx, unsigned i)
Builder bld(ctx->program, ctx->block); Builder bld(ctx->program, ctx->block);
/* lanecount_to_mask() only cares about s0.u[6:0] so we don't need either s_bfe nor s_and here */ /* lanecount_to_mask() only cares about s0.u[6:0] so we don't need either s_bfe nor s_and here */
Temp count = i == 0 Temp count = i == 0 ? get_arg(ctx, ctx->args->merged_wave_info)
? get_arg(ctx, ctx->args->merged_wave_info)
: bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc), : bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc),
get_arg(ctx, ctx->args->merged_wave_info), Operand::c32(i * 8u)); get_arg(ctx, ctx->args->merged_wave_info), Operand::c32(i * 8u));
@ -11276,10 +11240,10 @@ select_program_rt(isel_context& ctx, unsigned shader_count, struct nir_shader* c
void void
select_program(Program* program, unsigned shader_count, struct nir_shader* const* shaders, select_program(Program* program, unsigned shader_count, struct nir_shader* const* shaders,
ac_shader_config* config, const struct aco_compiler_options* options, ac_shader_config* config, const struct aco_compiler_options* options,
const struct aco_shader_info* info, const struct aco_shader_info* info, const struct ac_shader_args* args)
const struct ac_shader_args* args)
{ {
isel_context ctx = setup_isel_context(program, shader_count, shaders, config, options, info, args, false); isel_context ctx =
setup_isel_context(program, shader_count, shaders, config, options, info, args, false);
if (ctx.stage == raytracing_cs) if (ctx.stage == raytracing_cs)
return select_program_rt(ctx, shader_count, shaders, args); return select_program_rt(ctx, shader_count, shaders, args);
@ -11391,8 +11355,7 @@ select_program(Program* program, unsigned shader_count, struct nir_shader* const
void void
select_trap_handler_shader(Program* program, struct nir_shader* shader, ac_shader_config* config, select_trap_handler_shader(Program* program, struct nir_shader* shader, ac_shader_config* config,
const struct aco_compiler_options* options, const struct aco_compiler_options* options,
const struct aco_shader_info* info, const struct aco_shader_info* info, const struct ac_shader_args* args)
const struct ac_shader_args* args)
{ {
assert(options->gfx_level == GFX8); assert(options->gfx_level == GFX8);

View file

@ -660,8 +660,8 @@ cleanup_context(isel_context* ctx)
isel_context isel_context
setup_isel_context(Program* program, unsigned shader_count, struct nir_shader* const* shaders, setup_isel_context(Program* program, unsigned shader_count, struct nir_shader* const* shaders,
ac_shader_config* config, const struct aco_compiler_options* options, ac_shader_config* config, const struct aco_compiler_options* options,
const struct aco_shader_info* info, const struct aco_shader_info* info, const struct ac_shader_args* args,
const struct ac_shader_args* args, bool is_ps_epilog) bool is_ps_epilog)
{ {
SWStage sw_stage = SWStage::None; SWStage sw_stage = SWStage::None;
for (unsigned i = 0; i < shader_count; i++) { for (unsigned i = 0; i < shader_count; i++) {

View file

@ -80,8 +80,7 @@ validate(aco::Program* program)
} }
static std::string static std::string
get_disasm_string(aco::Program* program, std::vector<uint32_t>& code, get_disasm_string(aco::Program* program, std::vector<uint32_t>& code, unsigned exec_size)
unsigned exec_size)
{ {
std::string disasm; std::string disasm;
@ -111,8 +110,7 @@ get_disasm_string(aco::Program* program, std::vector<uint32_t>& code,
static std::string static std::string
aco_postprocess_shader(const struct aco_compiler_options* options, aco_postprocess_shader(const struct aco_compiler_options* options,
const struct aco_shader_info *info, const struct aco_shader_info* info, std::unique_ptr<aco::Program>& program)
std::unique_ptr<aco::Program>& program)
{ {
std::string llvm_ir; std::string llvm_ir;
@ -211,12 +209,9 @@ aco_postprocess_shader(const struct aco_compiler_options* options,
} }
void void
aco_compile_shader(const struct aco_compiler_options* options, aco_compile_shader(const struct aco_compiler_options* options, const struct aco_shader_info* info,
const struct aco_shader_info* info,
unsigned shader_count, struct nir_shader* const* shaders, unsigned shader_count, struct nir_shader* const* shaders,
const struct ac_shader_args *args, const struct ac_shader_args* args, aco_callback* build_binary, void** binary)
aco_callback *build_binary,
void **binary)
{ {
aco::init(); aco::init();
@ -335,13 +330,8 @@ aco_compile_vs_prolog(const struct aco_compiler_options* options,
if (get_disasm) if (get_disasm)
disasm = get_disasm_string(program.get(), code, exec_size); disasm = get_disasm_string(program.get(), code, exec_size);
(*build_prolog)(binary, (*build_prolog)(binary, config.num_sgprs, config.num_vgprs, code.data(), code.size(),
config.num_sgprs, disasm.data(), disasm.size());
config.num_vgprs,
code.data(),
code.size(),
disasm.data(),
disasm.size());
} }
void void
@ -377,11 +367,6 @@ aco_compile_ps_epilog(const struct aco_compiler_options* options,
if (get_disasm) if (get_disasm)
disasm = get_disasm_string(program.get(), code, exec_size); disasm = get_disasm_string(program.get(), code, exec_size);
(*build_epilog)(binary, (*build_epilog)(binary, config.num_sgprs, config.num_vgprs, code.data(), code.size(),
config.num_sgprs, disasm.data(), disasm.size());
config.num_vgprs,
code.data(),
code.size(),
disasm.data(),
disasm.size());
} }

View file

@ -25,9 +25,9 @@
#ifndef ACO_INTERFACE_H #ifndef ACO_INTERFACE_H
#define ACO_INTERFACE_H #define ACO_INTERFACE_H
#include "amd_family.h"
#include "aco_shader_info.h" #include "aco_shader_info.h"
#include "amd_family.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
@ -47,24 +47,18 @@ typedef void(aco_callback)(void** priv_ptr, const struct ac_shader_config* confi
const char* llvm_ir_str, unsigned llvm_ir_size, const char* disasm_str, const char* llvm_ir_str, unsigned llvm_ir_size, const char* disasm_str,
unsigned disasm_size, uint32_t* statistics, uint32_t stats_size, unsigned disasm_size, uint32_t* statistics, uint32_t stats_size,
uint32_t exec_size, const uint32_t* code, uint32_t code_dw, uint32_t exec_size, const uint32_t* code, uint32_t code_dw,
const struct aco_symbol *symbols, unsigned num_symbols); const struct aco_symbol* symbols, unsigned num_symbols);
typedef void (aco_shader_part_callback)(void **priv_ptr, typedef void(aco_shader_part_callback)(void** priv_ptr, uint32_t num_sgprs, uint32_t num_vgprs,
uint32_t num_sgprs, const uint32_t* code, uint32_t code_size,
uint32_t num_vgprs, const char* disasm_str, uint32_t disasm_size);
const uint32_t *code,
uint32_t code_size,
const char *disasm_str,
uint32_t disasm_size);
extern const struct aco_compiler_statistic_info* aco_statistic_infos; extern const struct aco_compiler_statistic_info* aco_statistic_infos;
void aco_compile_shader(const struct aco_compiler_options* options, void aco_compile_shader(const struct aco_compiler_options* options,
const struct aco_shader_info* info, const struct aco_shader_info* info, unsigned shader_count,
unsigned shader_count, struct nir_shader* const* shaders, struct nir_shader* const* shaders, const struct ac_shader_args* args,
const struct ac_shader_args *args, aco_callback* build_binary, void** binary);
aco_callback *build_binary,
void **binary);
void aco_compile_rt_prolog(const struct aco_compiler_options* options, void aco_compile_rt_prolog(const struct aco_compiler_options* options,
const struct aco_shader_info* info, const struct ac_shader_args* in_args, const struct aco_shader_info* info, const struct ac_shader_args* in_args,

View file

@ -98,8 +98,9 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info,
program->wave_size = info->wave_size; program->wave_size = info->wave_size;
program->lane_mask = program->wave_size == 32 ? s1 : s2; program->lane_mask = program->wave_size == 32 ? s1 : s2;
program->dev.lds_encoding_granule = gfx_level >= GFX11 && stage == fragment_fs ? 1024 : program->dev.lds_encoding_granule = gfx_level >= GFX11 && stage == fragment_fs ? 1024
gfx_level >= GFX7 ? 512 : 256; : gfx_level >= GFX7 ? 512
: 256;
program->dev.lds_alloc_granule = gfx_level >= GFX10_3 ? 1024 : program->dev.lds_encoding_granule; program->dev.lds_alloc_granule = gfx_level >= GFX10_3 ? 1024 : program->dev.lds_encoding_granule;
/* GFX6: There is 64KB LDS per CU, but a single workgroup can only use 32KB. */ /* GFX6: There is 64KB LDS per CU, but a single workgroup can only use 32KB. */

View file

@ -142,7 +142,7 @@ enum storage_class : uint8_t {
storage_image = 0x4, storage_image = 0x4,
storage_shared = 0x8, /* or TCS output */ storage_shared = 0x8, /* or TCS output */
storage_vmem_output = 0x10, /* GS or TCS output stores using VMEM */ storage_vmem_output = 0x10, /* GS or TCS output stores using VMEM */
storage_task_payload = 0x20,/* Task-Mesh payload */ storage_task_payload = 0x20, /* Task-Mesh payload */
storage_scratch = 0x40, storage_scratch = 0x40,
storage_vgpr_spill = 0x80, storage_vgpr_spill = 0x80,
storage_count = 8, /* not counting storage_none */ storage_count = 8, /* not counting storage_none */
@ -823,7 +823,8 @@ public:
assert(bytes() == 2 || bytes() == 4); assert(bytes() == 2 || bytes() == 4);
if (opsel) { if (opsel) {
if (bytes() == 2 && int16_t(data_.i) >= -16 && int16_t(data_.i) <= 64 && !isLiteral()) if (bytes() == 2 && int16_t(data_.i) >= -16 && int16_t(data_.i) <= 64 && !isLiteral())
return int16_t(data_.i) >> 16; /* 16-bit inline integers are sign-extended, even with fp16 instrs */ return int16_t(data_.i) >>
16; /* 16-bit inline integers are sign-extended, even with fp16 instrs */
else else
return data_.i >> 16; return data_.i >> 16;
} }
@ -1418,7 +1419,8 @@ struct VINTERP_inreg_instruction : public VALU_instruction {
uint8_t padding5; uint8_t padding5;
uint8_t padding6; uint8_t padding6;
}; };
static_assert(sizeof(VINTERP_inreg_instruction) == sizeof(VALU_instruction) + 4, "Unexpected padding"); static_assert(sizeof(VINTERP_inreg_instruction) == sizeof(VALU_instruction) + 4,
"Unexpected padding");
/** /**
* Data Parallel Primitives Format: * Data Parallel Primitives Format:
@ -1809,8 +1811,7 @@ memory_sync_info get_sync_info(const Instruction* instr);
inline bool inline bool
is_dead(const std::vector<uint16_t>& uses, const Instruction* instr) is_dead(const std::vector<uint16_t>& uses, const Instruction* instr)
{ {
if (instr->definitions.empty() || instr->isBranch() || if (instr->definitions.empty() || instr->isBranch() || instr->opcode == aco_opcode::p_startpgm ||
instr->opcode == aco_opcode::p_startpgm ||
instr->opcode == aco_opcode::p_init_scratch || instr->opcode == aco_opcode::p_init_scratch ||
instr->opcode == aco_opcode::p_dual_src_export_gfx11) instr->opcode == aco_opcode::p_dual_src_export_gfx11)
return false; return false;
@ -2216,8 +2217,7 @@ void init_program(Program* program, Stage stage, const struct aco_shader_info* i
void select_program(Program* program, unsigned shader_count, struct nir_shader* const* shaders, void select_program(Program* program, unsigned shader_count, struct nir_shader* const* shaders,
ac_shader_config* config, const struct aco_compiler_options* options, ac_shader_config* config, const struct aco_compiler_options* options,
const struct aco_shader_info* info, const struct aco_shader_info* info, const struct ac_shader_args* args);
const struct ac_shader_args* args);
void select_trap_handler_shader(Program* program, struct nir_shader* shader, void select_trap_handler_shader(Program* program, struct nir_shader* shader,
ac_shader_config* config, ac_shader_config* config,
const struct aco_compiler_options* options, const struct aco_compiler_options* options,
@ -2258,7 +2258,7 @@ bool dealloc_vgprs(Program* program);
void insert_NOPs(Program* program); void insert_NOPs(Program* program);
void form_hard_clauses(Program* program); void form_hard_clauses(Program* program);
unsigned emit_program(Program* program, std::vector<uint32_t>& code, unsigned emit_program(Program* program, std::vector<uint32_t>& code,
std::vector<struct aco_symbol> *symbols); std::vector<struct aco_symbol>* symbols);
/** /**
* Returns true if print_asm can disassemble the given program for the current build/runtime * Returns true if print_asm can disassemble the given program for the current build/runtime
* configuration * configuration

View file

@ -2181,7 +2181,7 @@ lower_image_sample(lower_context* ctx, aco_ptr<Instruction>& instr)
instr->mimg().strict_wqm = false; instr->mimg().strict_wqm = false;
if ((3 + num_vaddr) > instr->operands.size()) { if ((3 + num_vaddr) > instr->operands.size()) {
MIMG_instruction *new_instr = create_instruction<MIMG_instruction>( MIMG_instruction* new_instr = create_instruction<MIMG_instruction>(
instr->opcode, Format::MIMG, 3 + num_vaddr, instr->definitions.size()); instr->opcode, Format::MIMG, 3 + num_vaddr, instr->definitions.size());
std::copy(instr->definitions.cbegin(), instr->definitions.cend(), std::copy(instr->definitions.cbegin(), instr->definitions.cend(),
new_instr->definitions.begin()); new_instr->definitions.begin());
@ -2346,8 +2346,8 @@ lower_to_hw_instr(Program* program)
target = target =
program->has_color_exports ? V_008DFC_SQ_EXP_MRT : V_008DFC_SQ_EXP_MRTZ; program->has_color_exports ? V_008DFC_SQ_EXP_MRT : V_008DFC_SQ_EXP_MRTZ;
if (program->stage == fragment_fs) if (program->stage == fragment_fs)
bld.exp(aco_opcode::exp, Operand(v1), Operand(v1), Operand(v1), Operand(v1), bld.exp(aco_opcode::exp, Operand(v1), Operand(v1), Operand(v1), Operand(v1), 0,
0, target, false, true, true); target, false, true, true);
if (should_dealloc_vgprs) if (should_dealloc_vgprs)
bld.sopp(aco_opcode::s_sendmsg, -1, sendmsg_dealloc_vgprs); bld.sopp(aco_opcode::s_sendmsg, -1, sendmsg_dealloc_vgprs);
bld.sopp(aco_opcode::s_endpgm); bld.sopp(aco_opcode::s_endpgm);
@ -2518,8 +2518,7 @@ lower_to_hw_instr(Program* program)
create_bperm(bld, ext_swiz, dst, Operand::zero()); create_bperm(bld, ext_swiz, dst, Operand::zero());
} }
} else { } else {
SDWA_instruction& sdwa = SDWA_instruction& sdwa = bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, op)->sdwa();
bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, op)->sdwa();
sdwa.sel[0] = SubdwordSel(bits / 8, offset / 8, signext); sdwa.sel[0] = SubdwordSel(bits / 8, offset / 8, signext);
} }
} }
@ -2574,7 +2573,8 @@ lower_to_hw_instr(Program* program)
} else { } else {
assert(dst.regClass() == v2b); assert(dst.regClass() == v2b);
bld.vop2_sdwa(aco_opcode::v_lshlrev_b32, dst, Operand::c32(offset), op) bld.vop2_sdwa(aco_opcode::v_lshlrev_b32, dst, Operand::c32(offset), op)
->sdwa().sel[1] = SubdwordSel::ubyte; ->sdwa()
.sel[1] = SubdwordSel::ubyte;
} }
break; break;
} }

View file

@ -2116,9 +2116,10 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
case aco_opcode::v_mbcnt_hi_u32_b32_e64: { case aco_opcode::v_mbcnt_hi_u32_b32_e64: {
if (instr->operands[0].constantEquals(-1) && instr->operands[1].isTemp() && if (instr->operands[0].constantEquals(-1) && instr->operands[1].isTemp() &&
ctx.info[instr->operands[1].tempId()].is_usedef()) { ctx.info[instr->operands[1].tempId()].is_usedef()) {
Instruction *usedef_instr = ctx.info[instr->operands[1].tempId()].instr; Instruction* usedef_instr = ctx.info[instr->operands[1].tempId()].instr;
if (usedef_instr->opcode == aco_opcode::v_mbcnt_lo_u32_b32 && if (usedef_instr->opcode == aco_opcode::v_mbcnt_lo_u32_b32 &&
usedef_instr->operands[0].constantEquals(-1) && usedef_instr->operands[1].constantEquals(0)) usedef_instr->operands[0].constantEquals(-1) &&
usedef_instr->operands[1].constantEquals(0))
ctx.info[instr->definitions[0].tempId()].set_subgroup_invocation(instr.get()); ctx.info[instr->definitions[0].tempId()].set_subgroup_invocation(instr.get());
} }
break; break;
@ -2370,7 +2371,9 @@ optimize_cmp_subgroup_invocation(opt_ctx& ctx, aco_ptr<Instruction>& instr)
return false; return false;
/* Find the constant operand or return early if there isn't one. */ /* Find the constant operand or return early if there isn't one. */
const int const_op_idx = instr->operands[0].isConstant() ? 0 : instr->operands[1].isConstant() ? 1 : -1; const int const_op_idx = instr->operands[0].isConstant() ? 0
: instr->operands[1].isConstant() ? 1
: -1;
if (const_op_idx == -1) if (const_op_idx == -1)
return false; return false;
@ -2413,11 +2416,10 @@ optimize_cmp_subgroup_invocation(opt_ctx& ctx, aco_ptr<Instruction>& instr)
first_bit = val + 1; first_bit = val + 1;
num_bits = val >= wave_size ? 0 : (wave_size - val - 1); num_bits = val >= wave_size ? 0 : (wave_size - val - 1);
break; break;
default: default: return false;
return false;
} }
Instruction *cpy = NULL; Instruction* cpy = NULL;
const uint64_t mask = BITFIELD64_RANGE(first_bit, num_bits); const uint64_t mask = BITFIELD64_RANGE(first_bit, num_bits);
if (wave_size == 64 && mask > 0x7fffffff && mask != -1ull) { if (wave_size == 64 && mask > 0x7fffffff && mask != -1ull) {
/* Mask can't be represented as a 64-bit constant or literal, use s_bfm_b64. */ /* Mask can't be represented as a 64-bit constant or literal, use s_bfm_b64. */
@ -2426,7 +2428,8 @@ optimize_cmp_subgroup_invocation(opt_ctx& ctx, aco_ptr<Instruction>& instr)
cpy->operands[1] = Operand::c32(first_bit); cpy->operands[1] = Operand::c32(first_bit);
} else { } else {
/* Copy mask as a literal constant. */ /* Copy mask as a literal constant. */
cpy = create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO, 1, 1); cpy =
create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO, 1, 1);
cpy->operands[0] = wave_size == 32 ? Operand::c32((uint32_t)mask) : Operand::c64(mask); cpy->operands[0] = wave_size == 32 ? Operand::c32((uint32_t)mask) : Operand::c64(mask);
} }
@ -4821,10 +4824,12 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
*/ */
if (instr->opcode == aco_opcode::s_and_b32 || instr->opcode == aco_opcode::s_and_b64) { if (instr->opcode == aco_opcode::s_and_b32 || instr->opcode == aco_opcode::s_and_b64) {
if (instr->operands[0].isTemp() && fixed_to_exec(instr->operands[1]) && if (instr->operands[0].isTemp() && fixed_to_exec(instr->operands[1]) &&
ctx.uses[instr->operands[0].tempId()] == 1 && ctx.uses[instr->definitions[1].tempId()] == 0 && ctx.uses[instr->operands[0].tempId()] == 1 &&
ctx.uses[instr->definitions[1].tempId()] == 0 &&
can_eliminate_and_exec(ctx, instr->operands[0].getTemp(), instr->pass_flags)) { can_eliminate_and_exec(ctx, instr->operands[0].getTemp(), instr->pass_flags)) {
ctx.uses[instr->operands[0].tempId()]--; ctx.uses[instr->operands[0].tempId()]--;
ctx.info[instr->operands[0].tempId()].instr->definitions[0].setTemp(instr->definitions[0].getTemp()); ctx.info[instr->operands[0].tempId()].instr->definitions[0].setTemp(
instr->definitions[0].getTemp());
instr.reset(); instr.reset();
return; return;
} }

View file

@ -460,8 +460,7 @@ print_regs(ra_ctx& ctx, bool vgprs, RegisterFile& reg_file)
printf("%u/%u used, %u/%u free\n", regs.size - free_regs, regs.size, free_regs, regs.size); printf("%u/%u used, %u/%u free\n", regs.size - free_regs, regs.size, free_regs, regs.size);
/* print assignments ordered by registers */ /* print assignments ordered by registers */
std::map<PhysReg, std::pair<unsigned, unsigned>> std::map<PhysReg, std::pair<unsigned, unsigned>> regs_to_vars; /* maps to byte size and temp id */
regs_to_vars; /* maps to byte size and temp id */
for (unsigned id : find_vars(ctx, reg_file, regs)) { for (unsigned id : find_vars(ctx, reg_file, regs)) {
const assignment& var = ctx.assignments[id]; const assignment& var = ctx.assignments[id];
PhysReg reg = var.reg; PhysReg reg = var.reg;
@ -1088,8 +1087,8 @@ get_reg_for_create_vector_copy(ra_ctx& ctx, RegisterFile& reg_file,
instr->operands[i].regClass() == info.rc) { instr->operands[i].regClass() == info.rc) {
assignment& op = ctx.assignments[instr->operands[i].tempId()]; assignment& op = ctx.assignments[instr->operands[i].tempId()];
/* if everything matches, create parallelcopy for the killed operand */ /* if everything matches, create parallelcopy for the killed operand */
if (!intersects(def_reg, PhysRegInterval{op.reg, op.rc.size()}) && if (!intersects(def_reg, PhysRegInterval{op.reg, op.rc.size()}) && op.reg != scc &&
op.reg != scc && reg_file.get_id(op.reg) == instr->operands[i].tempId()) { reg_file.get_id(op.reg) == instr->operands[i].tempId()) {
Definition pc_def = Definition(reg, info.rc); Definition pc_def = Definition(reg, info.rc);
parallelcopies.emplace_back(instr->operands[i], pc_def); parallelcopies.emplace_back(instr->operands[i], pc_def);
return op.reg; return op.reg;
@ -1655,8 +1654,7 @@ get_reg(ra_ctx& ctx, RegisterFile& reg_file, Temp temp,
return vcc; return vcc;
} }
if (ctx.assignments[temp.id()].m0) { if (ctx.assignments[temp.id()].m0) {
if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, m0) && if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, m0) && can_write_m0(instr))
can_write_m0(instr))
return m0; return m0;
} }

View file

@ -587,8 +587,10 @@ perform_hazard_query(hazard_query* query, Instruction* instr, bool upwards)
/* don't move non-reorderable instructions */ /* don't move non-reorderable instructions */
if (instr->opcode == aco_opcode::s_memtime || instr->opcode == aco_opcode::s_memrealtime || if (instr->opcode == aco_opcode::s_memtime || instr->opcode == aco_opcode::s_memrealtime ||
instr->opcode == aco_opcode::s_setprio || instr->opcode == aco_opcode::s_getreg_b32 || instr->opcode == aco_opcode::s_setprio || instr->opcode == aco_opcode::s_getreg_b32 ||
instr->opcode == aco_opcode::p_init_scratch || instr->opcode == aco_opcode::p_jump_to_epilog || instr->opcode == aco_opcode::p_init_scratch ||
instr->opcode == aco_opcode::s_sendmsg_rtn_b32 || instr->opcode == aco_opcode::s_sendmsg_rtn_b64) instr->opcode == aco_opcode::p_jump_to_epilog ||
instr->opcode == aco_opcode::s_sendmsg_rtn_b32 ||
instr->opcode == aco_opcode::s_sendmsg_rtn_b64)
return hazard_fail_unreorderable; return hazard_fail_unreorderable;
memory_event_set instr_set; memory_event_set instr_set;
@ -663,8 +665,7 @@ schedule_SMEM(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& registe
int16_t k = 0; int16_t k = 0;
/* don't move s_memtime/s_memrealtime */ /* don't move s_memtime/s_memrealtime */
if (current->opcode == aco_opcode::s_memtime || if (current->opcode == aco_opcode::s_memtime || current->opcode == aco_opcode::s_memrealtime ||
current->opcode == aco_opcode::s_memrealtime ||
current->opcode == aco_opcode::s_sendmsg_rtn_b32 || current->opcode == aco_opcode::s_sendmsg_rtn_b32 ||
current->opcode == aco_opcode::s_sendmsg_rtn_b64) current->opcode == aco_opcode::s_sendmsg_rtn_b64)
return; return;

View file

@ -133,8 +133,8 @@ struct aco_compiler_options {
enum amd_gfx_level gfx_level; enum amd_gfx_level gfx_level;
uint32_t address32_hi; uint32_t address32_hi;
struct { struct {
void (*func)(void *private_data, enum aco_compiler_debug_level level, const char *message); void (*func)(void* private_data, enum aco_compiler_debug_level level, const char* message);
void *private_data; void* private_data;
} debug; } debug;
}; };

View file

@ -94,7 +94,8 @@ struct spill_ctx {
spill_ctx(const RegisterDemand target_pressure_, Program* program_, spill_ctx(const RegisterDemand target_pressure_, Program* program_,
std::vector<std::vector<RegisterDemand>> register_demand_) std::vector<std::vector<RegisterDemand>> register_demand_)
: target_pressure(target_pressure_), program(program_), memory(), : target_pressure(target_pressure_), program(program_), memory(),
register_demand(std::move(register_demand_)), renames(program->blocks.size(), aco::map<Temp, Temp>(memory)), register_demand(std::move(register_demand_)),
renames(program->blocks.size(), aco::map<Temp, Temp>(memory)),
spills_entry(program->blocks.size(), aco::unordered_map<Temp, uint32_t>(memory)), spills_entry(program->blocks.size(), aco::unordered_map<Temp, uint32_t>(memory)),
spills_exit(program->blocks.size(), aco::unordered_map<Temp, uint32_t>(memory)), spills_exit(program->blocks.size(), aco::unordered_map<Temp, uint32_t>(memory)),
processed(program->blocks.size(), false), processed(program->blocks.size(), false),
@ -226,7 +227,8 @@ next_uses_per_block(spill_ctx& ctx, unsigned block_idx, uint32_t& worklist)
std::pair<uint32_t, uint32_t> distance{block_idx, 0}; std::pair<uint32_t, uint32_t> distance{block_idx, 0};
auto it = instr->definitions[0].isTemp() ? next_use_distances_start.find(instr->definitions[0].getTemp()) auto it = instr->definitions[0].isTemp()
? next_use_distances_start.find(instr->definitions[0].getTemp())
: next_use_distances_start.end(); : next_use_distances_start.end();
if (it != next_use_distances_start.end() && if (it != next_use_distances_start.end() &&
phi_defs.insert(instr->definitions[0].getTemp()).second) { phi_defs.insert(instr->definitions[0].getTemp()).second) {
@ -1407,7 +1409,8 @@ load_scratch_resource(spill_ctx& ctx, Temp& scratch_offset, Block& block,
continue; continue;
/* find p_logical_end */ /* find p_logical_end */
std::vector<aco_ptr<Instruction>>& prev_instructions = ctx.program->blocks[block_idx].instructions; std::vector<aco_ptr<Instruction>>& prev_instructions =
ctx.program->blocks[block_idx].instructions;
unsigned idx = prev_instructions.size() - 1; unsigned idx = prev_instructions.size() - 1;
while (prev_instructions[idx]->opcode != aco_opcode::p_logical_end) while (prev_instructions[idx]->opcode != aco_opcode::p_logical_end)
idx--; idx--;
@ -1422,10 +1425,10 @@ load_scratch_resource(spill_ctx& ctx, Temp& scratch_offset, Block& block,
Temp private_segment_buffer = ctx.program->private_segment_buffer; Temp private_segment_buffer = ctx.program->private_segment_buffer;
if (!private_segment_buffer.bytes()) { if (!private_segment_buffer.bytes()) {
Temp addr_lo = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Temp addr_lo =
Operand::c32(aco_symbol_scratch_addr_lo)); bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_lo));
Temp addr_hi = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Temp addr_hi =
Operand::c32(aco_symbol_scratch_addr_hi)); bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_hi));
private_segment_buffer = private_segment_buffer =
bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi); bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
} else if (ctx.program->stage.hw != HWStage::CS) { } else if (ctx.program->stage.hw != HWStage::CS) {
@ -1471,8 +1474,7 @@ setup_vgpr_spill_reload(spill_ctx& ctx, Block& block,
if (ctx.scratch_rsrc == Temp()) { if (ctx.scratch_rsrc == Temp()) {
int32_t saddr = ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size - int32_t saddr = ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size -
ctx.program->dev.scratch_global_offset_min; ctx.program->dev.scratch_global_offset_min;
ctx.scratch_rsrc = ctx.scratch_rsrc = load_scratch_resource(ctx, scratch_offset, block, instructions, saddr);
load_scratch_resource(ctx, scratch_offset, block, instructions, saddr);
} }
} else { } else {
bool add_offset_to_sgpr = bool add_offset_to_sgpr =

View file

@ -35,8 +35,8 @@
namespace aco { namespace aco {
static void static void
aco_log(Program* program, enum aco_compiler_debug_level level, const char* prefix, aco_log(Program* program, enum aco_compiler_debug_level level, const char* prefix, const char* file,
const char* file, unsigned line, const char* fmt, va_list args) unsigned line, const char* fmt, va_list args)
{ {
char* msg; char* msg;
@ -270,8 +270,7 @@ validate_ir(Program* program)
(instr->opcode == aco_opcode::p_bpermute_gfx11w64 && i == 0) || (instr->opcode == aco_opcode::p_bpermute_gfx11w64 && i == 0) ||
(flat && i == 1) || (instr->isMIMG() && (i == 1 || i == 2)) || (flat && i == 1) || (instr->isMIMG() && (i == 1 || i == 2)) ||
((instr->isMUBUF() || instr->isMTBUF()) && i == 1) || ((instr->isMUBUF() || instr->isMTBUF()) && i == 1) ||
(instr->isScratch() && i == 0) || (instr->isScratch() && i == 0) || (instr->isDS() && i == 0) ||
(instr->isDS() && i == 0) ||
(instr->opcode == aco_opcode::p_init_scratch && i == 0); (instr->opcode == aco_opcode::p_init_scratch && i == 0);
check(can_be_undef, "Undefs can only be used in certain operands", instr.get()); check(can_be_undef, "Undefs can only be used in certain operands", instr.get());
} else { } else {
@ -393,7 +392,7 @@ validate_ir(Program* program)
"OPSEL_LO set for unsupported instruction format", instr.get()); "OPSEL_LO set for unsupported instruction format", instr.get());
check(!instr->valu().opsel_hi || instr->isVOP3P(), check(!instr->valu().opsel_hi || instr->isVOP3P(),
"OPSEL_HI set for unsupported instruction format", instr.get()); "OPSEL_HI set for unsupported instruction format", instr.get());
check(!instr->valu().omod || instr->isVOP3() ||instr->isSDWA(), check(!instr->valu().omod || instr->isVOP3() || instr->isSDWA(),
"OMOD set for unsupported instruction format", instr.get()); "OMOD set for unsupported instruction format", instr.get());
check(!instr->valu().clamp || instr->isVOP3() || instr->isVOP3P() || check(!instr->valu().clamp || instr->isVOP3() || instr->isVOP3P() ||
instr->isSDWA() || instr->isVINTERP_INREG(), instr->isSDWA() || instr->isVINTERP_INREG(),
@ -562,7 +561,8 @@ validate_ir(Program* program)
instr->definitions[2].regClass().size() == 1, instr->definitions[2].regClass().size() == 1,
"Third definition of p_dual_src_export_gfx11 must be a v1", instr.get()); "Third definition of p_dual_src_export_gfx11 must be a v1", instr.get());
check(instr->definitions[3].regClass() == program->lane_mask, check(instr->definitions[3].regClass() == program->lane_mask,
"Fourth definition of p_dual_src_export_gfx11 must be a lane mask", instr.get()); "Fourth definition of p_dual_src_export_gfx11 must be a lane mask",
instr.get());
check(instr->definitions[4].physReg() == vcc, check(instr->definitions[4].physReg() == vcc,
"Fifth definition of p_dual_src_export_gfx11 must be vcc", instr.get()); "Fifth definition of p_dual_src_export_gfx11 must be vcc", instr.get());
check(instr->definitions[5].physReg() == scc, check(instr->definitions[5].physReg() == scc,
@ -627,7 +627,9 @@ validate_ir(Program* program)
check(instr->operands.size() < 4 || instr->operands[3].isOfType(RegType::vgpr), check(instr->operands.size() < 4 || instr->operands[3].isOfType(RegType::vgpr),
"VMEM write data must be vgpr", instr.get()); "VMEM write data must be vgpr", instr.get());
const bool d16 = instr->opcode == aco_opcode::buffer_load_dword || // FIXME: used to spill subdword variables const bool d16 =
instr->opcode ==
aco_opcode::buffer_load_dword || // FIXME: used to spill subdword variables
instr->opcode == aco_opcode::buffer_load_ubyte || instr->opcode == aco_opcode::buffer_load_ubyte ||
instr->opcode == aco_opcode::buffer_load_sbyte || instr->opcode == aco_opcode::buffer_load_sbyte ||
instr->opcode == aco_opcode::buffer_load_ushort || instr->opcode == aco_opcode::buffer_load_ushort ||
@ -763,11 +765,14 @@ validate_ir(Program* program)
break; break;
} }
case Format::LDSDIR: { case Format::LDSDIR: {
check(instr->definitions.size() == 1 && instr->definitions[0].regClass() == v1, "LDSDIR must have an v1 definition", instr.get()); check(instr->definitions.size() == 1 && instr->definitions[0].regClass() == v1,
"LDSDIR must have an v1 definition", instr.get());
check(instr->operands.size() == 1, "LDSDIR must have an operand", instr.get()); check(instr->operands.size() == 1, "LDSDIR must have an operand", instr.get());
if (!instr->operands.empty()) { if (!instr->operands.empty()) {
check(instr->operands[0].regClass() == s1, "LDSDIR must have an s1 operand", instr.get()); check(instr->operands[0].regClass() == s1, "LDSDIR must have an s1 operand",
check(instr->operands[0].isFixed() && instr->operands[0].physReg() == m0, "LDSDIR must have an operand fixed to m0", instr.get()); instr.get());
check(instr->operands[0].isFixed() && instr->operands[0].physReg() == m0,
"LDSDIR must have an operand fixed to m0", instr.get());
} }
break; break;
} }

View file

@ -35,19 +35,20 @@
#include <string> #include <string>
struct TestDef { struct TestDef {
const char *name; const char* name;
const char *source_file; const char* source_file;
void (*func)(); void (*func)();
}; };
extern std::map<std::string, TestDef> tests; extern std::map<std::string, TestDef> tests;
extern FILE *output; extern FILE* output;
bool set_variant(const char *name); bool set_variant(const char* name);
inline bool set_variant(amd_gfx_level cls, const char *rest="") inline bool
set_variant(amd_gfx_level cls, const char* rest = "")
{ {
char buf[8+strlen(rest)]; char buf[8 + strlen(rest)];
if (cls != GFX10_3) { if (cls != GFX10_3) {
snprintf(buf, sizeof(buf), "gfx%d%s", cls - GFX6 + 6 - (cls > GFX10_3), rest); snprintf(buf, sizeof(buf), "gfx%d%s", cls - GFX6 + 6 - (cls > GFX10_3), rest);
} else { } else {
@ -56,18 +57,21 @@ inline bool set_variant(amd_gfx_level cls, const char *rest="")
return set_variant(buf); return set_variant(buf);
} }
void fail_test(const char *fmt, ...); void fail_test(const char* fmt, ...);
void skip_test(const char *fmt, ...); void skip_test(const char* fmt, ...);
#define _BEGIN_TEST(name, struct_name) static void struct_name(); static __attribute__((constructor)) void CONCAT2(add_test_, __COUNTER__)() {\ #define _BEGIN_TEST(name, struct_name) \
tests[#name] = (TestDef){#name, ACO_TEST_BUILD_ROOT "/" __FILE__, &struct_name};\ static void struct_name(); \
}\ static __attribute__((constructor)) void CONCAT2(add_test_, __COUNTER__)() \
static void struct_name() {\ { \
tests[#name] = (TestDef){#name, ACO_TEST_BUILD_ROOT "/" __FILE__, &struct_name}; \
} \
static void struct_name() \
{
#define BEGIN_TEST(name) _BEGIN_TEST(name, CONCAT2(Test_, __COUNTER__)) #define BEGIN_TEST(name) _BEGIN_TEST(name, CONCAT2(Test_, __COUNTER__))
#define BEGIN_TEST_TODO(name) _BEGIN_TEST(name, CONCAT2(Test_, __COUNTER__)) #define BEGIN_TEST_TODO(name) _BEGIN_TEST(name, CONCAT2(Test_, __COUNTER__))
#define BEGIN_TEST_FAIL(name) _BEGIN_TEST(name, CONCAT2(Test_, __COUNTER__)) #define BEGIN_TEST_FAIL(name) _BEGIN_TEST(name, CONCAT2(Test_, __COUNTER__))
#define END_TEST \ #define END_TEST }
}
#endif /* ACO_TEST_COMMON_H */ #endif /* ACO_TEST_COMMON_H */

View file

@ -22,19 +22,20 @@
* *
*/ */
#include "helpers.h" #include "helpers.h"
#include "vulkan/vk_format.h"
#include "common/amd_family.h" #include "common/amd_family.h"
#include <stdio.h> #include "vulkan/vk_format.h"
#include <sstream>
#include <llvm-c/Target.h> #include <llvm-c/Target.h>
#include <mutex> #include <mutex>
#include <sstream>
#include <stdio.h>
using namespace aco; using namespace aco;
extern "C" { extern "C" {
PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr( PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(VkInstance instance, const char* pName);
VkInstance instance,
const char* pName);
} }
ac_shader_config config; ac_shader_config config;
@ -47,32 +48,34 @@ static VkInstance instance_cache[CHIP_LAST] = {VK_NULL_HANDLE};
static VkDevice device_cache[CHIP_LAST] = {VK_NULL_HANDLE}; static VkDevice device_cache[CHIP_LAST] = {VK_NULL_HANDLE};
static std::mutex create_device_mutex; static std::mutex create_device_mutex;
#define FUNCTION_LIST\ #define FUNCTION_LIST \
ITEM(CreateInstance)\ ITEM(CreateInstance) \
ITEM(DestroyInstance)\ ITEM(DestroyInstance) \
ITEM(EnumeratePhysicalDevices)\ ITEM(EnumeratePhysicalDevices) \
ITEM(GetPhysicalDeviceProperties2)\ ITEM(GetPhysicalDeviceProperties2) \
ITEM(CreateDevice)\ ITEM(CreateDevice) \
ITEM(DestroyDevice)\ ITEM(DestroyDevice) \
ITEM(CreateShaderModule)\ ITEM(CreateShaderModule) \
ITEM(DestroyShaderModule)\ ITEM(DestroyShaderModule) \
ITEM(CreateGraphicsPipelines)\ ITEM(CreateGraphicsPipelines) \
ITEM(CreateComputePipelines)\ ITEM(CreateComputePipelines) \
ITEM(DestroyPipeline)\ ITEM(DestroyPipeline) \
ITEM(CreateDescriptorSetLayout)\ ITEM(CreateDescriptorSetLayout) \
ITEM(DestroyDescriptorSetLayout)\ ITEM(DestroyDescriptorSetLayout) \
ITEM(CreatePipelineLayout)\ ITEM(CreatePipelineLayout) \
ITEM(DestroyPipelineLayout)\ ITEM(DestroyPipelineLayout) \
ITEM(CreateRenderPass)\ ITEM(CreateRenderPass) \
ITEM(DestroyRenderPass)\ ITEM(DestroyRenderPass) \
ITEM(GetPipelineExecutablePropertiesKHR)\ ITEM(GetPipelineExecutablePropertiesKHR) \
ITEM(GetPipelineExecutableInternalRepresentationsKHR) ITEM(GetPipelineExecutableInternalRepresentationsKHR)
#define ITEM(n) PFN_vk##n n; #define ITEM(n) PFN_vk##n n;
FUNCTION_LIST FUNCTION_LIST
#undef ITEM #undef ITEM
void create_program(enum amd_gfx_level gfx_level, Stage stage, unsigned wave_size, enum radeon_family family) void
create_program(enum amd_gfx_level gfx_level, Stage stage, unsigned wave_size,
enum radeon_family family)
{ {
memset(&config, 0, sizeof(config)); memset(&config, 0, sizeof(config));
info.wave_size = wave_size; info.wave_size = wave_size;
@ -90,7 +93,7 @@ void create_program(enum amd_gfx_level gfx_level, Stage stage, unsigned wave_siz
program->debug.func = nullptr; program->debug.func = nullptr;
program->debug.private_data = nullptr; program->debug.private_data = nullptr;
Block *block = program->create_and_insert_block(); Block* block = program->create_and_insert_block();
block->kind = block_kind_top_level; block->kind = block_kind_top_level;
bld = Builder(program.get(), &program->blocks[0]); bld = Builder(program.get(), &program->blocks[0]);
@ -98,9 +101,9 @@ void create_program(enum amd_gfx_level gfx_level, Stage stage, unsigned wave_siz
config.float_mode = program->blocks[0].fp_mode.val; config.float_mode = program->blocks[0].fp_mode.val;
} }
bool setup_cs(const char *input_spec, enum amd_gfx_level gfx_level, bool
enum radeon_family family, const char* subvariant, setup_cs(const char* input_spec, enum amd_gfx_level gfx_level, enum radeon_family family,
unsigned wave_size) const char* subvariant, unsigned wave_size)
{ {
if (!set_variant(gfx_level, subvariant)) if (!set_variant(gfx_level, subvariant))
return false; return false;
@ -117,7 +120,8 @@ bool setup_cs(const char *input_spec, enum amd_gfx_level gfx_level,
input_classes.push_back(RegClass::get(type, size * (in_bytes ? 1 : 4))); input_classes.push_back(RegClass::get(type, size * (in_bytes ? 1 : 4)));
input_spec += 2 + in_bytes; input_spec += 2 + in_bytes;
while (input_spec[0] == ' ') input_spec++; while (input_spec[0] == ' ')
input_spec++;
} }
aco_ptr<Instruction> startpgm{create_instruction<Pseudo_instruction>( aco_ptr<Instruction> startpgm{create_instruction<Pseudo_instruction>(
@ -132,7 +136,8 @@ bool setup_cs(const char *input_spec, enum amd_gfx_level gfx_level,
return true; return true;
} }
void finish_program(Program *prog) void
finish_program(Program* prog)
{ {
for (Block& BB : prog->blocks) { for (Block& BB : prog->blocks) {
for (unsigned idx : BB.linear_preds) for (unsigned idx : BB.linear_preds)
@ -149,7 +154,8 @@ void finish_program(Program *prog)
} }
} }
void finish_validator_test() void
finish_validator_test()
{ {
finish_program(program.get()); finish_program(program.get());
aco_print_program(program.get(), output); aco_print_program(program.get(), output);
@ -160,7 +166,8 @@ void finish_validator_test()
fprintf(output, "Validation failed\n"); fprintf(output, "Validation failed\n");
} }
void finish_opt_test() void
finish_opt_test()
{ {
finish_program(program.get()); finish_program(program.get());
if (!aco::validate_ir(program.get())) { if (!aco::validate_ir(program.get())) {
@ -175,7 +182,8 @@ void finish_opt_test()
aco_print_program(program.get(), output); aco_print_program(program.get(), output);
} }
void finish_setup_reduce_temp_test() void
finish_setup_reduce_temp_test()
{ {
finish_program(program.get()); finish_program(program.get());
if (!aco::validate_ir(program.get())) { if (!aco::validate_ir(program.get())) {
@ -190,7 +198,8 @@ void finish_setup_reduce_temp_test()
aco_print_program(program.get(), output); aco_print_program(program.get(), output);
} }
void finish_ra_test(ra_test_policy policy, bool lower) void
finish_ra_test(ra_test_policy policy, bool lower)
{ {
finish_program(program.get()); finish_program(program.get());
if (!aco::validate_ir(program.get())) { if (!aco::validate_ir(program.get())) {
@ -215,42 +224,48 @@ void finish_ra_test(ra_test_policy policy, bool lower)
aco_print_program(program.get(), output); aco_print_program(program.get(), output);
} }
void finish_optimizer_postRA_test() void
finish_optimizer_postRA_test()
{ {
finish_program(program.get()); finish_program(program.get());
aco::optimize_postRA(program.get()); aco::optimize_postRA(program.get());
aco_print_program(program.get(), output); aco_print_program(program.get(), output);
} }
void finish_to_hw_instr_test() void
finish_to_hw_instr_test()
{ {
finish_program(program.get()); finish_program(program.get());
aco::lower_to_hw_instr(program.get()); aco::lower_to_hw_instr(program.get());
aco_print_program(program.get(), output); aco_print_program(program.get(), output);
} }
void finish_waitcnt_test() void
finish_waitcnt_test()
{ {
finish_program(program.get()); finish_program(program.get());
aco::insert_wait_states(program.get()); aco::insert_wait_states(program.get());
aco_print_program(program.get(), output); aco_print_program(program.get(), output);
} }
void finish_insert_nops_test() void
finish_insert_nops_test()
{ {
finish_program(program.get()); finish_program(program.get());
aco::insert_NOPs(program.get()); aco::insert_NOPs(program.get());
aco_print_program(program.get(), output); aco_print_program(program.get(), output);
} }
void finish_form_hard_clause_test() void
finish_form_hard_clause_test()
{ {
finish_program(program.get()); finish_program(program.get());
aco::form_hard_clauses(program.get()); aco::form_hard_clauses(program.get());
aco_print_program(program.get(), output); aco_print_program(program.get(), output);
} }
void finish_assembler_test() void
finish_assembler_test()
{ {
finish_program(program.get()); finish_program(program.get());
std::vector<uint32_t> binary; std::vector<uint32_t> binary;
@ -261,13 +276,14 @@ void finish_assembler_test()
if (program->gfx_level >= GFX8) { if (program->gfx_level >= GFX8) {
print_asm(program.get(), binary, exec_size / 4u, output); print_asm(program.get(), binary, exec_size / 4u, output);
} else { } else {
//TODO: maybe we should use CLRX and skip this test if it's not available? // TODO: maybe we should use CLRX and skip this test if it's not available?
for (uint32_t dword : binary) for (uint32_t dword : binary)
fprintf(output, "%.8x\n", dword); fprintf(output, "%.8x\n", dword);
} }
} }
void writeout(unsigned i, Temp tmp) void
writeout(unsigned i, Temp tmp)
{ {
if (tmp.id()) if (tmp.id())
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), tmp); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), tmp);
@ -275,22 +291,26 @@ void writeout(unsigned i, Temp tmp)
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i));
} }
void writeout(unsigned i, aco::Builder::Result res) void
writeout(unsigned i, aco::Builder::Result res)
{ {
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), res); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), res);
} }
void writeout(unsigned i, Operand op) void
writeout(unsigned i, Operand op)
{ {
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op);
} }
void writeout(unsigned i, Operand op0, Operand op1) void
writeout(unsigned i, Operand op0, Operand op1)
{ {
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op0, op1); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op0, op1);
} }
Temp fneg(Temp src, Builder b) Temp
fneg(Temp src, Builder b)
{ {
if (src.bytes() == 2) if (src.bytes() == 2)
return b.vop2(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0xbc00u), src); return b.vop2(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0xbc00u), src);
@ -298,35 +318,42 @@ Temp fneg(Temp src, Builder b)
return b.vop2(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0xbf800000u), src); return b.vop2(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0xbf800000u), src);
} }
Temp fabs(Temp src, Builder b) Temp
fabs(Temp src, Builder b)
{ {
if (src.bytes() == 2) { if (src.bytes() == 2) {
Builder::Result res = b.vop2_e64(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0x3c00), src); Builder::Result res =
b.vop2_e64(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0x3c00), src);
res->valu().abs[1] = true; res->valu().abs[1] = true;
return res; return res;
} else { } else {
Builder::Result res = b.vop2_e64(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0x3f800000u), src); Builder::Result res =
b.vop2_e64(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0x3f800000u), src);
res->valu().abs[1] = true; res->valu().abs[1] = true;
return res; return res;
} }
} }
Temp f2f32(Temp src, Builder b) Temp
f2f32(Temp src, Builder b)
{ {
return b.vop1(aco_opcode::v_cvt_f32_f16, b.def(v1), src); return b.vop1(aco_opcode::v_cvt_f32_f16, b.def(v1), src);
} }
Temp f2f16(Temp src, Builder b) Temp
f2f16(Temp src, Builder b)
{ {
return b.vop1(aco_opcode::v_cvt_f16_f32, b.def(v2b), src); return b.vop1(aco_opcode::v_cvt_f16_f32, b.def(v2b), src);
} }
Temp u2u16(Temp src, Builder b) Temp
u2u16(Temp src, Builder b)
{ {
return b.pseudo(aco_opcode::p_extract_vector, b.def(v2b), src, Operand::zero()); return b.pseudo(aco_opcode::p_extract_vector, b.def(v2b), src, Operand::zero());
} }
Temp fadd(Temp src0, Temp src1, Builder b) Temp
fadd(Temp src0, Temp src1, Builder b)
{ {
if (src0.bytes() == 2) if (src0.bytes() == 2)
return b.vop2(aco_opcode::v_add_f16, b.def(v2b), src0, src1); return b.vop2(aco_opcode::v_add_f16, b.def(v2b), src0, src1);
@ -334,7 +361,8 @@ Temp fadd(Temp src0, Temp src1, Builder b)
return b.vop2(aco_opcode::v_add_f32, b.def(v1), src0, src1); return b.vop2(aco_opcode::v_add_f32, b.def(v1), src0, src1);
} }
Temp fmul(Temp src0, Temp src1, Builder b) Temp
fmul(Temp src0, Temp src1, Builder b)
{ {
if (src0.bytes() == 2) if (src0.bytes() == 2)
return b.vop2(aco_opcode::v_mul_f16, b.def(v2b), src0, src1); return b.vop2(aco_opcode::v_mul_f16, b.def(v2b), src0, src1);
@ -342,7 +370,8 @@ Temp fmul(Temp src0, Temp src1, Builder b)
return b.vop2(aco_opcode::v_mul_f32, b.def(v1), src0, src1); return b.vop2(aco_opcode::v_mul_f32, b.def(v1), src0, src1);
} }
Temp fma(Temp src0, Temp src1, Temp src2, Builder b) Temp
fma(Temp src0, Temp src1, Temp src2, Builder b)
{ {
if (src0.bytes() == 2) if (src0.bytes() == 2)
return b.vop3(aco_opcode::v_fma_f16, b.def(v2b), src0, src1, src2); return b.vop3(aco_opcode::v_fma_f16, b.def(v2b), src0, src1, src2);
@ -350,39 +379,45 @@ Temp fma(Temp src0, Temp src1, Temp src2, Builder b)
return b.vop3(aco_opcode::v_fma_f32, b.def(v1), src0, src1, src2); return b.vop3(aco_opcode::v_fma_f32, b.def(v1), src0, src1, src2);
} }
Temp fsat(Temp src, Builder b) Temp
fsat(Temp src, Builder b)
{ {
if (src.bytes() == 2) if (src.bytes() == 2)
return b.vop3(aco_opcode::v_med3_f16, b.def(v2b), Operand::c16(0u), return b.vop3(aco_opcode::v_med3_f16, b.def(v2b), Operand::c16(0u), Operand::c16(0x3c00u),
Operand::c16(0x3c00u), src); src);
else else
return b.vop3(aco_opcode::v_med3_f32, b.def(v1), Operand::zero(), return b.vop3(aco_opcode::v_med3_f32, b.def(v1), Operand::zero(), Operand::c32(0x3f800000u),
Operand::c32(0x3f800000u), src); src);
} }
Temp fmin(Temp src0, Temp src1, Builder b) Temp
fmin(Temp src0, Temp src1, Builder b)
{ {
return b.vop2(aco_opcode::v_min_f32, b.def(v1), src0, src1); return b.vop2(aco_opcode::v_min_f32, b.def(v1), src0, src1);
} }
Temp fmax(Temp src0, Temp src1, Builder b) Temp
fmax(Temp src0, Temp src1, Builder b)
{ {
return b.vop2(aco_opcode::v_max_f32, b.def(v1), src0, src1); return b.vop2(aco_opcode::v_max_f32, b.def(v1), src0, src1);
} }
Temp ext_ushort(Temp src, unsigned idx, Builder b) Temp
ext_ushort(Temp src, unsigned idx, Builder b)
{ {
return b.pseudo(aco_opcode::p_extract, b.def(src.regClass()), src, Operand::c32(idx), return b.pseudo(aco_opcode::p_extract, b.def(src.regClass()), src, Operand::c32(idx),
Operand::c32(16u), Operand::c32(false)); Operand::c32(16u), Operand::c32(false));
} }
Temp ext_ubyte(Temp src, unsigned idx, Builder b) Temp
ext_ubyte(Temp src, unsigned idx, Builder b)
{ {
return b.pseudo(aco_opcode::p_extract, b.def(src.regClass()), src, Operand::c32(idx), return b.pseudo(aco_opcode::p_extract, b.def(src.regClass()), src, Operand::c32(idx),
Operand::c32(8u), Operand::c32(false)); Operand::c32(8u), Operand::c32(false));
} }
void emit_divergent_if_else(Program* prog, aco::Builder& b, Operand cond, std::function<void()> then, void
emit_divergent_if_else(Program* prog, aco::Builder& b, Operand cond, std::function<void()> then,
std::function<void()> els) std::function<void()> els)
{ {
prog->blocks.reserve(prog->blocks.size() + 6); prog->blocks.reserve(prog->blocks.size() + 6);
@ -418,8 +453,10 @@ void emit_divergent_if_else(Program* prog, aco::Builder& b, Operand cond, std::f
PhysReg saved_exec_reg(84); PhysReg saved_exec_reg(84);
b.reset(if_block); b.reset(if_block);
Temp saved_exec = b.sop1(Builder::s_and_saveexec, b.def(b.lm, saved_exec_reg), Definition(scc, s1), Definition(exec, b.lm), cond, Operand(exec, b.lm)); Temp saved_exec = b.sop1(Builder::s_and_saveexec, b.def(b.lm, saved_exec_reg),
b.branch(aco_opcode::p_cbranch_nz, Definition(vcc, bld.lm), then_logical->index, then_linear->index); Definition(scc, s1), Definition(exec, b.lm), cond, Operand(exec, b.lm));
b.branch(aco_opcode::p_cbranch_nz, Definition(vcc, bld.lm), then_logical->index,
then_linear->index);
b.reset(then_logical); b.reset(then_logical);
b.pseudo(aco_opcode::p_logical_start); b.pseudo(aco_opcode::p_logical_start);
@ -431,8 +468,10 @@ void emit_divergent_if_else(Program* prog, aco::Builder& b, Operand cond, std::f
b.branch(aco_opcode::p_branch, Definition(vcc, bld.lm), invert->index); b.branch(aco_opcode::p_branch, Definition(vcc, bld.lm), invert->index);
b.reset(invert); b.reset(invert);
b.sop2(Builder::s_andn2, Definition(exec, bld.lm), Definition(scc, s1), Operand(saved_exec, saved_exec_reg), Operand(exec, bld.lm)); b.sop2(Builder::s_andn2, Definition(exec, bld.lm), Definition(scc, s1),
b.branch(aco_opcode::p_cbranch_nz, Definition(vcc, bld.lm), else_logical->index, else_linear->index); Operand(saved_exec, saved_exec_reg), Operand(exec, bld.lm));
b.branch(aco_opcode::p_cbranch_nz, Definition(vcc, bld.lm), else_logical->index,
else_linear->index);
b.reset(else_logical); b.reset(else_logical);
b.pseudo(aco_opcode::p_logical_start); b.pseudo(aco_opcode::p_logical_start);
@ -444,42 +483,29 @@ void emit_divergent_if_else(Program* prog, aco::Builder& b, Operand cond, std::f
b.branch(aco_opcode::p_branch, Definition(vcc, bld.lm), endif_block->index); b.branch(aco_opcode::p_branch, Definition(vcc, bld.lm), endif_block->index);
b.reset(endif_block); b.reset(endif_block);
b.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm), Operand(saved_exec, saved_exec_reg)); b.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm),
Operand(saved_exec, saved_exec_reg));
} }
VkDevice get_vk_device(enum amd_gfx_level gfx_level) VkDevice
get_vk_device(enum amd_gfx_level gfx_level)
{ {
enum radeon_family family; enum radeon_family family;
switch (gfx_level) { switch (gfx_level) {
case GFX6: case GFX6: family = CHIP_TAHITI; break;
family = CHIP_TAHITI; case GFX7: family = CHIP_BONAIRE; break;
break; case GFX8: family = CHIP_POLARIS10; break;
case GFX7: case GFX9: family = CHIP_VEGA10; break;
family = CHIP_BONAIRE; case GFX10: family = CHIP_NAVI10; break;
break; case GFX10_3: family = CHIP_NAVI21; break;
case GFX8: case GFX11: family = CHIP_GFX1100; break;
family = CHIP_POLARIS10; default: family = CHIP_UNKNOWN; break;
break;
case GFX9:
family = CHIP_VEGA10;
break;
case GFX10:
family = CHIP_NAVI10;
break;
case GFX10_3:
family = CHIP_NAVI21;
break;
case GFX11:
family = CHIP_GFX1100;
break;
default:
family = CHIP_UNKNOWN;
break;
} }
return get_vk_device(family); return get_vk_device(family);
} }
VkDevice get_vk_device(enum radeon_family family) VkDevice
get_vk_device(enum radeon_family family)
{ {
assert(family != CHIP_UNKNOWN); assert(family != CHIP_UNKNOWN);
@ -496,12 +522,13 @@ VkDevice get_vk_device(enum radeon_family family)
VkInstanceCreateInfo instance_create_info = {}; VkInstanceCreateInfo instance_create_info = {};
instance_create_info.pApplicationInfo = &app_info; instance_create_info.pApplicationInfo = &app_info;
instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
ASSERTED VkResult result = ((PFN_vkCreateInstance)vk_icdGetInstanceProcAddr(NULL, "vkCreateInstance"))(&instance_create_info, NULL, &instance_cache[family]); ASSERTED VkResult result = ((PFN_vkCreateInstance)vk_icdGetInstanceProcAddr(
NULL, "vkCreateInstance"))(&instance_create_info, NULL, &instance_cache[family]);
assert(result == VK_SUCCESS); assert(result == VK_SUCCESS);
#define ITEM(n) n = (PFN_vk##n)vk_icdGetInstanceProcAddr(instance_cache[family], "vk" #n); #define ITEM(n) n = (PFN_vk##n)vk_icdGetInstanceProcAddr(instance_cache[family], "vk" #n);
FUNCTION_LIST FUNCTION_LIST
#undef ITEM #undef ITEM
uint32_t device_count = 1; uint32_t device_count = 1;
VkPhysicalDevice device = VK_NULL_HANDLE; VkPhysicalDevice device = VK_NULL_HANDLE;
@ -511,7 +538,7 @@ VkDevice get_vk_device(enum radeon_family family)
VkDeviceCreateInfo device_create_info = {}; VkDeviceCreateInfo device_create_info = {};
device_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; device_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
static const char *extensions[] = {"VK_KHR_pipeline_executable_properties"}; static const char* extensions[] = {"VK_KHR_pipeline_executable_properties"};
device_create_info.enabledExtensionCount = sizeof(extensions) / sizeof(extensions[0]); device_create_info.enabledExtensionCount = sizeof(extensions) / sizeof(extensions[0]);
device_create_info.ppEnabledExtensionNames = extensions; device_create_info.ppEnabledExtensionNames = extensions;
result = CreateDevice(device, &device_create_info, NULL, &device_cache[family]); result = CreateDevice(device, &device_create_info, NULL, &device_cache[family]);
@ -520,7 +547,8 @@ VkDevice get_vk_device(enum radeon_family family)
} }
static struct DestroyDevices { static struct DestroyDevices {
~DestroyDevices() { ~DestroyDevices()
{
for (unsigned i = 0; i < CHIP_LAST; i++) { for (unsigned i = 0; i < CHIP_LAST; i++) {
if (!device_cache[i]) if (!device_cache[i])
continue; continue;
@ -530,8 +558,9 @@ static struct DestroyDevices {
} }
} destroy_devices; } destroy_devices;
void print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBits stages, void
const char *name, bool remove_encoding) print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBits stages,
const char* name, bool remove_encoding)
{ {
uint32_t executable_count = 16; uint32_t executable_count = 16;
VkPipelineExecutablePropertiesKHR executables[16]; VkPipelineExecutablePropertiesKHR executables[16];
@ -539,7 +568,8 @@ void print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBi
pipeline_info.sType = VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR; pipeline_info.sType = VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR;
pipeline_info.pNext = NULL; pipeline_info.pNext = NULL;
pipeline_info.pipeline = pipeline; pipeline_info.pipeline = pipeline;
ASSERTED VkResult result = GetPipelineExecutablePropertiesKHR(device, &pipeline_info, &executable_count, executables); ASSERTED VkResult result =
GetPipelineExecutablePropertiesKHR(device, &pipeline_info, &executable_count, executables);
assert(result == VK_SUCCESS); assert(result == VK_SUCCESS);
uint32_t executable = 0; uint32_t executable = 0;
@ -570,13 +600,13 @@ void print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBi
} }
assert(requested_ir && "Could not find requested IR"); assert(requested_ir && "Could not find requested IR");
char *data = (char*)malloc(requested_ir->dataSize); char* data = (char*)malloc(requested_ir->dataSize);
requested_ir->pData = data; requested_ir->pData = data;
result = GetPipelineExecutableInternalRepresentationsKHR(device, &exec_info, &ir_count, ir); result = GetPipelineExecutableInternalRepresentationsKHR(device, &exec_info, &ir_count, ir);
assert(result == VK_SUCCESS); assert(result == VK_SUCCESS);
if (remove_encoding) { if (remove_encoding) {
for (char *c = data; *c; c++) { for (char* c = data; *c; c++) {
if (*c == ';') { if (*c == ';') {
for (; *c && *c != '\n'; c++) for (; *c && *c != '\n'; c++)
*c = ' '; *c = ' ';
@ -588,7 +618,8 @@ void print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBi
free(data); free(data);
} }
VkShaderModule __qoCreateShaderModule(VkDevice dev, const QoShaderModuleCreateInfo *module_info) VkShaderModule
__qoCreateShaderModule(VkDevice dev, const QoShaderModuleCreateInfo* module_info)
{ {
VkShaderModuleCreateInfo vk_module_info; VkShaderModuleCreateInfo vk_module_info;
vk_module_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; vk_module_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
@ -604,7 +635,8 @@ VkShaderModule __qoCreateShaderModule(VkDevice dev, const QoShaderModuleCreateIn
return module; return module;
} }
PipelineBuilder::PipelineBuilder(VkDevice dev) { PipelineBuilder::PipelineBuilder(VkDevice dev)
{
memset(this, 0, sizeof(*this)); memset(this, 0, sizeof(*this));
topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
device = dev; device = dev;
@ -615,7 +647,7 @@ PipelineBuilder::~PipelineBuilder()
DestroyPipeline(device, pipeline, NULL); DestroyPipeline(device, pipeline, NULL);
for (unsigned i = 0; i < (is_compute() ? 1 : gfx_pipeline_info.stageCount); i++) { for (unsigned i = 0; i < (is_compute() ? 1 : gfx_pipeline_info.stageCount); i++) {
VkPipelineShaderStageCreateInfo *stage_info = &stages[i]; VkPipelineShaderStageCreateInfo* stage_info = &stages[i];
if (owned_stages & stage_info->stage) if (owned_stages & stage_info->stage)
DestroyShaderModule(device, stage_info->module, NULL); DestroyShaderModule(device, stage_info->module, NULL);
} }
@ -628,72 +660,87 @@ PipelineBuilder::~PipelineBuilder()
DestroyRenderPass(device, render_pass, NULL); DestroyRenderPass(device, render_pass, NULL);
} }
void PipelineBuilder::add_desc_binding(VkShaderStageFlags stage_flags, uint32_t layout, void
uint32_t binding, VkDescriptorType type, uint32_t count) PipelineBuilder::add_desc_binding(VkShaderStageFlags stage_flags, uint32_t layout, uint32_t binding,
VkDescriptorType type, uint32_t count)
{ {
desc_layouts_used |= 1ull << layout; desc_layouts_used |= 1ull << layout;
desc_bindings[layout][num_desc_bindings[layout]++] = {binding, type, count, stage_flags, NULL}; desc_bindings[layout][num_desc_bindings[layout]++] = {binding, type, count, stage_flags, NULL};
} }
void PipelineBuilder::add_vertex_binding(uint32_t binding, uint32_t stride, VkVertexInputRate rate) void
PipelineBuilder::add_vertex_binding(uint32_t binding, uint32_t stride, VkVertexInputRate rate)
{ {
vs_bindings[vs_input.vertexBindingDescriptionCount++] = {binding, stride, rate}; vs_bindings[vs_input.vertexBindingDescriptionCount++] = {binding, stride, rate};
} }
void PipelineBuilder::add_vertex_attribute(uint32_t location, uint32_t binding, VkFormat format, uint32_t offset) void
PipelineBuilder::add_vertex_attribute(uint32_t location, uint32_t binding, VkFormat format,
uint32_t offset)
{ {
vs_attributes[vs_input.vertexAttributeDescriptionCount++] = {location, binding, format, offset}; vs_attributes[vs_input.vertexAttributeDescriptionCount++] = {location, binding, format, offset};
} }
void PipelineBuilder::add_resource_decls(QoShaderModuleCreateInfo *module) void
PipelineBuilder::add_resource_decls(QoShaderModuleCreateInfo* module)
{ {
for (unsigned i = 0; i < module->declarationCount; i++) { for (unsigned i = 0; i < module->declarationCount; i++) {
const QoShaderDecl *decl = &module->pDeclarations[i]; const QoShaderDecl* decl = &module->pDeclarations[i];
switch (decl->decl_type) { switch (decl->decl_type) {
case QoShaderDeclType_ubo: case QoShaderDeclType_ubo:
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); add_desc_binding(module->stage, decl->set, decl->binding,
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
break; break;
case QoShaderDeclType_ssbo: case QoShaderDeclType_ssbo:
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); add_desc_binding(module->stage, decl->set, decl->binding,
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
break; break;
case QoShaderDeclType_img_buf: case QoShaderDeclType_img_buf:
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER); add_desc_binding(module->stage, decl->set, decl->binding,
VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
break; break;
case QoShaderDeclType_img: case QoShaderDeclType_img:
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE); add_desc_binding(module->stage, decl->set, decl->binding,
VK_DESCRIPTOR_TYPE_STORAGE_IMAGE);
break; break;
case QoShaderDeclType_tex_buf: case QoShaderDeclType_tex_buf:
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER); add_desc_binding(module->stage, decl->set, decl->binding,
VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER);
break; break;
case QoShaderDeclType_combined: case QoShaderDeclType_combined:
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER); add_desc_binding(module->stage, decl->set, decl->binding,
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
break; break;
case QoShaderDeclType_tex: case QoShaderDeclType_tex:
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE); add_desc_binding(module->stage, decl->set, decl->binding,
VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE);
break; break;
case QoShaderDeclType_samp: case QoShaderDeclType_samp:
add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_SAMPLER); add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_SAMPLER);
break; break;
default: default: break;
break;
} }
} }
} }
void PipelineBuilder::add_io_decls(QoShaderModuleCreateInfo *module) void
PipelineBuilder::add_io_decls(QoShaderModuleCreateInfo* module)
{ {
unsigned next_vtx_offset = 0; unsigned next_vtx_offset = 0;
for (unsigned i = 0; i < module->declarationCount; i++) { for (unsigned i = 0; i < module->declarationCount; i++) {
const QoShaderDecl *decl = &module->pDeclarations[i]; const QoShaderDecl* decl = &module->pDeclarations[i];
switch (decl->decl_type) { switch (decl->decl_type) {
case QoShaderDeclType_in: case QoShaderDeclType_in:
if (module->stage == VK_SHADER_STAGE_VERTEX_BIT) { if (module->stage == VK_SHADER_STAGE_VERTEX_BIT) {
if (!strcmp(decl->type, "float") || decl->type[0] == 'v') if (!strcmp(decl->type, "float") || decl->type[0] == 'v')
add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SFLOAT, next_vtx_offset); add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SFLOAT,
next_vtx_offset);
else if (decl->type[0] == 'u') else if (decl->type[0] == 'u')
add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_UINT, next_vtx_offset); add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_UINT,
next_vtx_offset);
else if (decl->type[0] == 'i') else if (decl->type[0] == 'i')
add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SINT, next_vtx_offset); add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SINT,
next_vtx_offset);
next_vtx_offset += 16; next_vtx_offset += 16;
} }
break; break;
@ -707,17 +754,17 @@ void PipelineBuilder::add_io_decls(QoShaderModuleCreateInfo *module)
color_outputs[decl->location] = VK_FORMAT_R32G32B32A32_SINT; color_outputs[decl->location] = VK_FORMAT_R32G32B32A32_SINT;
} }
break; break;
default: default: break;
break;
} }
} }
if (next_vtx_offset) if (next_vtx_offset)
add_vertex_binding(0, next_vtx_offset); add_vertex_binding(0, next_vtx_offset);
} }
void PipelineBuilder::add_stage(VkShaderStageFlagBits stage, VkShaderModule module, const char *name) void
PipelineBuilder::add_stage(VkShaderStageFlagBits stage, VkShaderModule module, const char* name)
{ {
VkPipelineShaderStageCreateInfo *stage_info; VkPipelineShaderStageCreateInfo* stage_info;
if (stage == VK_SHADER_STAGE_COMPUTE_BIT) if (stage == VK_SHADER_STAGE_COMPUTE_BIT)
stage_info = &stages[0]; stage_info = &stages[0];
else else
@ -732,40 +779,50 @@ void PipelineBuilder::add_stage(VkShaderStageFlagBits stage, VkShaderModule modu
owned_stages |= stage; owned_stages |= stage;
} }
void PipelineBuilder::add_stage(VkShaderStageFlagBits stage, QoShaderModuleCreateInfo module, const char *name) void
PipelineBuilder::add_stage(VkShaderStageFlagBits stage, QoShaderModuleCreateInfo module,
const char* name)
{ {
add_stage(stage, __qoCreateShaderModule(device, &module), name); add_stage(stage, __qoCreateShaderModule(device, &module), name);
add_resource_decls(&module); add_resource_decls(&module);
add_io_decls(&module); add_io_decls(&module);
} }
void PipelineBuilder::add_vsfs(VkShaderModule vs, VkShaderModule fs) void
PipelineBuilder::add_vsfs(VkShaderModule vs, VkShaderModule fs)
{ {
add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs); add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs);
add_stage(VK_SHADER_STAGE_FRAGMENT_BIT, fs); add_stage(VK_SHADER_STAGE_FRAGMENT_BIT, fs);
} }
void PipelineBuilder::add_vsfs(QoShaderModuleCreateInfo vs, QoShaderModuleCreateInfo fs) void
PipelineBuilder::add_vsfs(QoShaderModuleCreateInfo vs, QoShaderModuleCreateInfo fs)
{ {
add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs); add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs);
add_stage(VK_SHADER_STAGE_FRAGMENT_BIT, fs); add_stage(VK_SHADER_STAGE_FRAGMENT_BIT, fs);
} }
void PipelineBuilder::add_cs(VkShaderModule cs) void
PipelineBuilder::add_cs(VkShaderModule cs)
{ {
add_stage(VK_SHADER_STAGE_COMPUTE_BIT, cs); add_stage(VK_SHADER_STAGE_COMPUTE_BIT, cs);
} }
void PipelineBuilder::add_cs(QoShaderModuleCreateInfo cs) void
PipelineBuilder::add_cs(QoShaderModuleCreateInfo cs)
{ {
add_stage(VK_SHADER_STAGE_COMPUTE_BIT, cs); add_stage(VK_SHADER_STAGE_COMPUTE_BIT, cs);
} }
bool PipelineBuilder::is_compute() { bool
PipelineBuilder::is_compute()
{
return gfx_pipeline_info.stageCount == 0; return gfx_pipeline_info.stageCount == 0;
} }
void PipelineBuilder::create_compute_pipeline() { void
PipelineBuilder::create_compute_pipeline()
{
VkComputePipelineCreateInfo create_info; VkComputePipelineCreateInfo create_info;
create_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; create_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
create_info.pNext = NULL; create_info.pNext = NULL;
@ -775,11 +832,14 @@ void PipelineBuilder::create_compute_pipeline() {
create_info.basePipelineHandle = VK_NULL_HANDLE; create_info.basePipelineHandle = VK_NULL_HANDLE;
create_info.basePipelineIndex = 0; create_info.basePipelineIndex = 0;
ASSERTED VkResult result = CreateComputePipelines(device, VK_NULL_HANDLE, 1, &create_info, NULL, &pipeline); ASSERTED VkResult result =
CreateComputePipelines(device, VK_NULL_HANDLE, 1, &create_info, NULL, &pipeline);
assert(result == VK_SUCCESS); assert(result == VK_SUCCESS);
} }
void PipelineBuilder::create_graphics_pipeline() { void
PipelineBuilder::create_graphics_pipeline()
{
/* create the create infos */ /* create the create infos */
if (!samples) if (!samples)
samples = VK_SAMPLE_COUNT_1_BIT; samples = VK_SAMPLE_COUNT_1_BIT;
@ -792,7 +852,7 @@ void PipelineBuilder::create_graphics_pipeline() {
if (color_outputs[i] == VK_FORMAT_UNDEFINED) if (color_outputs[i] == VK_FORMAT_UNDEFINED)
continue; continue;
VkAttachmentDescription *desc = &attachment_descs[num_color_attachments]; VkAttachmentDescription* desc = &attachment_descs[num_color_attachments];
desc->flags = 0; desc->flags = 0;
desc->format = color_outputs[i]; desc->format = color_outputs[i];
desc->samples = samples; desc->samples = samples;
@ -803,16 +863,14 @@ void PipelineBuilder::create_graphics_pipeline() {
desc->initialLayout = VK_IMAGE_LAYOUT_GENERAL; desc->initialLayout = VK_IMAGE_LAYOUT_GENERAL;
desc->finalLayout = VK_IMAGE_LAYOUT_GENERAL; desc->finalLayout = VK_IMAGE_LAYOUT_GENERAL;
VkAttachmentReference *ref = &color_attachments[num_color_attachments]; VkAttachmentReference* ref = &color_attachments[num_color_attachments];
ref->attachment = num_color_attachments; ref->attachment = num_color_attachments;
ref->layout = VK_IMAGE_LAYOUT_GENERAL; ref->layout = VK_IMAGE_LAYOUT_GENERAL;
VkPipelineColorBlendAttachmentState *blend = &blend_attachment_states[num_color_attachments]; VkPipelineColorBlendAttachmentState* blend = &blend_attachment_states[num_color_attachments];
blend->blendEnable = false; blend->blendEnable = false;
blend->colorWriteMask = VK_COLOR_COMPONENT_R_BIT | blend->colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
VK_COLOR_COMPONENT_B_BIT |
VK_COLOR_COMPONENT_A_BIT;
num_color_attachments++; num_color_attachments++;
} }
@ -820,7 +878,7 @@ void PipelineBuilder::create_graphics_pipeline() {
unsigned num_attachments = num_color_attachments; unsigned num_attachments = num_color_attachments;
VkAttachmentReference ds_attachment; VkAttachmentReference ds_attachment;
if (ds_output != VK_FORMAT_UNDEFINED) { if (ds_output != VK_FORMAT_UNDEFINED) {
VkAttachmentDescription *desc = &attachment_descs[num_attachments]; VkAttachmentDescription* desc = &attachment_descs[num_attachments];
desc->flags = 0; desc->flags = 0;
desc->format = ds_output; desc->format = ds_output;
desc->samples = samples; desc->samples = samples;
@ -902,8 +960,7 @@ void PipelineBuilder::create_graphics_pipeline() {
ds_state.front.passOp = VK_STENCIL_OP_REPLACE; ds_state.front.passOp = VK_STENCIL_OP_REPLACE;
ds_state.front.depthFailOp = VK_STENCIL_OP_REPLACE; ds_state.front.depthFailOp = VK_STENCIL_OP_REPLACE;
ds_state.front.compareOp = VK_COMPARE_OP_ALWAYS; ds_state.front.compareOp = VK_COMPARE_OP_ALWAYS;
ds_state.front.compareMask = 0xffffffff, ds_state.front.compareMask = 0xffffffff, ds_state.front.writeMask = 0;
ds_state.front.writeMask = 0;
ds_state.front.reference = 0; ds_state.front.reference = 0;
ds_state.back = ds_state.front; ds_state.back = ds_state.front;
@ -915,8 +972,7 @@ void PipelineBuilder::create_graphics_pipeline() {
color_blend_state.attachmentCount = num_color_attachments; color_blend_state.attachmentCount = num_color_attachments;
color_blend_state.pAttachments = blend_attachment_states; color_blend_state.pAttachments = blend_attachment_states;
VkDynamicState dynamic_states[9] = { VkDynamicState dynamic_states[9] = {VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_LINE_WIDTH, VK_DYNAMIC_STATE_LINE_WIDTH,
VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_DEPTH_BIAS,
@ -924,8 +980,7 @@ void PipelineBuilder::create_graphics_pipeline() {
VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_DEPTH_BOUNDS,
VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
VK_DYNAMIC_STATE_STENCIL_REFERENCE VK_DYNAMIC_STATE_STENCIL_REFERENCE};
};
VkPipelineDynamicStateCreateInfo dynamic_state; VkPipelineDynamicStateCreateInfo dynamic_state;
dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
@ -985,7 +1040,9 @@ void PipelineBuilder::create_graphics_pipeline() {
assert(result == VK_SUCCESS); assert(result == VK_SUCCESS);
} }
void PipelineBuilder::create_pipeline() { void
PipelineBuilder::create_pipeline()
{
unsigned num_desc_layouts = 0; unsigned num_desc_layouts = 0;
for (unsigned i = 0; i < 64; i++) { for (unsigned i = 0; i < 64; i++) {
if (!(desc_layouts_used & (1ull << i))) if (!(desc_layouts_used & (1ull << i)))
@ -998,7 +1055,8 @@ void PipelineBuilder::create_pipeline() {
desc_layout_info.bindingCount = num_desc_bindings[i]; desc_layout_info.bindingCount = num_desc_bindings[i];
desc_layout_info.pBindings = desc_bindings[i]; desc_layout_info.pBindings = desc_bindings[i];
ASSERTED VkResult result = CreateDescriptorSetLayout(device, &desc_layout_info, NULL, &desc_layouts[num_desc_layouts]); ASSERTED VkResult result = CreateDescriptorSetLayout(device, &desc_layout_info, NULL,
&desc_layouts[num_desc_layouts]);
assert(result == VK_SUCCESS); assert(result == VK_SUCCESS);
num_desc_layouts++; num_desc_layouts++;
} }
@ -1012,7 +1070,8 @@ void PipelineBuilder::create_pipeline() {
pipeline_layout_info.setLayoutCount = num_desc_layouts; pipeline_layout_info.setLayoutCount = num_desc_layouts;
pipeline_layout_info.pSetLayouts = desc_layouts; pipeline_layout_info.pSetLayouts = desc_layouts;
ASSERTED VkResult result = CreatePipelineLayout(device, &pipeline_layout_info, NULL, &pipeline_layout); ASSERTED VkResult result =
CreatePipelineLayout(device, &pipeline_layout_info, NULL, &pipeline_layout);
assert(result == VK_SUCCESS); assert(result == VK_SUCCESS);
if (is_compute()) if (is_compute())
@ -1021,7 +1080,8 @@ void PipelineBuilder::create_pipeline() {
create_graphics_pipeline(); create_graphics_pipeline();
} }
void PipelineBuilder::print_ir(VkShaderStageFlagBits stage_flags, const char *name, bool remove_encoding) void
PipelineBuilder::print_ir(VkShaderStageFlagBits stage_flags, const char* name, bool remove_encoding)
{ {
if (!pipeline) if (!pipeline)
create_pipeline(); create_pipeline();

View file

@ -24,8 +24,9 @@
#ifndef ACO_TEST_HELPERS_H #ifndef ACO_TEST_HELPERS_H
#define ACO_TEST_HELPERS_H #define ACO_TEST_HELPERS_H
#include "framework.h"
#include "vulkan/vulkan.h" #include "vulkan/vulkan.h"
#include "framework.h"
#include <functional> #include <functional>
enum QoShaderDeclType { enum QoShaderDeclType {
@ -42,10 +43,10 @@ enum QoShaderDeclType {
}; };
struct QoShaderDecl { struct QoShaderDecl {
const char *name; const char* name;
const char *type; const char* type;
QoShaderDeclType decl_type; QoShaderDeclType decl_type;
//TODO: array size? // TODO: array size?
unsigned location; unsigned location;
unsigned component; unsigned component;
unsigned binding; unsigned binding;
@ -53,11 +54,11 @@ struct QoShaderDecl {
}; };
struct QoShaderModuleCreateInfo { struct QoShaderModuleCreateInfo {
void *pNext; void* pNext;
size_t spirvSize; size_t spirvSize;
const void *pSpirv; const void* pSpirv;
uint32_t declarationCount; uint32_t declarationCount;
const QoShaderDecl *pDeclarations; const QoShaderDecl* pDeclarations;
VkShaderStageFlagBits stage; VkShaderStageFlagBits stage;
}; };
@ -71,17 +72,17 @@ namespace aco {
struct ra_test_policy; struct ra_test_policy;
} }
void create_program(enum amd_gfx_level gfx_level, aco::Stage stage, void create_program(enum amd_gfx_level gfx_level, aco::Stage stage, unsigned wave_size = 64,
unsigned wave_size=64, enum radeon_family family=CHIP_UNKNOWN); enum radeon_family family = CHIP_UNKNOWN);
bool setup_cs(const char *input_spec, enum amd_gfx_level gfx_level, bool setup_cs(const char* input_spec, enum amd_gfx_level gfx_level,
enum radeon_family family=CHIP_UNKNOWN, const char* subvariant = "", enum radeon_family family = CHIP_UNKNOWN, const char* subvariant = "",
unsigned wave_size=64); unsigned wave_size = 64);
void finish_program(aco::Program *program); void finish_program(aco::Program* program);
void finish_validator_test(); void finish_validator_test();
void finish_opt_test(); void finish_opt_test();
void finish_setup_reduce_temp_test(); void finish_setup_reduce_temp_test();
void finish_ra_test(aco::ra_test_policy, bool lower=false); void finish_ra_test(aco::ra_test_policy, bool lower = false);
void finish_optimizer_postRA_test(); void finish_optimizer_postRA_test();
void finish_to_hw_instr_test(); void finish_to_hw_instr_test();
void finish_waitcnt_test(); void finish_waitcnt_test();
@ -89,35 +90,35 @@ void finish_insert_nops_test();
void finish_form_hard_clause_test(); void finish_form_hard_clause_test();
void finish_assembler_test(); void finish_assembler_test();
void writeout(unsigned i, aco::Temp tmp=aco::Temp(0, aco::s1)); void writeout(unsigned i, aco::Temp tmp = aco::Temp(0, aco::s1));
void writeout(unsigned i, aco::Builder::Result res); void writeout(unsigned i, aco::Builder::Result res);
void writeout(unsigned i, aco::Operand op); void writeout(unsigned i, aco::Operand op);
void writeout(unsigned i, aco::Operand op0, aco::Operand op1); void writeout(unsigned i, aco::Operand op0, aco::Operand op1);
aco::Temp fneg(aco::Temp src, aco::Builder b=bld); aco::Temp fneg(aco::Temp src, aco::Builder b = bld);
aco::Temp fabs(aco::Temp src, aco::Builder b=bld); aco::Temp fabs(aco::Temp src, aco::Builder b = bld);
aco::Temp f2f32(aco::Temp src, aco::Builder b=bld); aco::Temp f2f32(aco::Temp src, aco::Builder b = bld);
aco::Temp f2f16(aco::Temp src, aco::Builder b=bld); aco::Temp f2f16(aco::Temp src, aco::Builder b = bld);
aco::Temp u2u16(aco::Temp src, aco::Builder b=bld); aco::Temp u2u16(aco::Temp src, aco::Builder b = bld);
aco::Temp fadd(aco::Temp src0, aco::Temp src1, aco::Builder b=bld); aco::Temp fadd(aco::Temp src0, aco::Temp src1, aco::Builder b = bld);
aco::Temp fmul(aco::Temp src0, aco::Temp src1, aco::Builder b=bld); aco::Temp fmul(aco::Temp src0, aco::Temp src1, aco::Builder b = bld);
aco::Temp fma(aco::Temp src0, aco::Temp src1, aco::Temp src2, aco::Builder b=bld); aco::Temp fma(aco::Temp src0, aco::Temp src1, aco::Temp src2, aco::Builder b = bld);
aco::Temp fsat(aco::Temp src, aco::Builder b=bld); aco::Temp fsat(aco::Temp src, aco::Builder b = bld);
aco::Temp fmin(aco::Temp src0, aco::Temp src1, aco::Builder b=bld); aco::Temp fmin(aco::Temp src0, aco::Temp src1, aco::Builder b = bld);
aco::Temp fmax(aco::Temp src0, aco::Temp src1, aco::Builder b=bld); aco::Temp fmax(aco::Temp src0, aco::Temp src1, aco::Builder b = bld);
aco::Temp ext_ushort(aco::Temp src, unsigned idx, aco::Builder b=bld); aco::Temp ext_ushort(aco::Temp src, unsigned idx, aco::Builder b = bld);
aco::Temp ext_ubyte(aco::Temp src, unsigned idx, aco::Builder b=bld); aco::Temp ext_ubyte(aco::Temp src, unsigned idx, aco::Builder b = bld);
void emit_divergent_if_else(aco::Program* prog, aco::Builder& b, aco::Operand cond, std::function<void()> then, void emit_divergent_if_else(aco::Program* prog, aco::Builder& b, aco::Operand cond,
std::function<void()> els); std::function<void()> then, std::function<void()> els);
/* vulkan helpers */ /* vulkan helpers */
VkDevice get_vk_device(enum amd_gfx_level gfx_level); VkDevice get_vk_device(enum amd_gfx_level gfx_level);
VkDevice get_vk_device(enum radeon_family family); VkDevice get_vk_device(enum radeon_family family);
void print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBits stages, void print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBits stages,
const char *name, bool remove_encoding=false); const char* name, bool remove_encoding = false);
VkShaderModule __qoCreateShaderModule(VkDevice dev, const QoShaderModuleCreateInfo *info); VkShaderModule __qoCreateShaderModule(VkDevice dev, const QoShaderModuleCreateInfo* info);
class PipelineBuilder { class PipelineBuilder {
public: public:
@ -152,19 +153,21 @@ public:
~PipelineBuilder(); ~PipelineBuilder();
PipelineBuilder(const PipelineBuilder&) = delete; PipelineBuilder(const PipelineBuilder&) = delete;
PipelineBuilder& operator = (const PipelineBuilder&) = delete; PipelineBuilder& operator=(const PipelineBuilder&) = delete;
void add_desc_binding(VkShaderStageFlags stage_flags, uint32_t layout, void add_desc_binding(VkShaderStageFlags stage_flags, uint32_t layout, uint32_t binding,
uint32_t binding, VkDescriptorType type, uint32_t count=1); VkDescriptorType type, uint32_t count = 1);
void add_vertex_binding(uint32_t binding, uint32_t stride, VkVertexInputRate rate=VK_VERTEX_INPUT_RATE_VERTEX); void add_vertex_binding(uint32_t binding, uint32_t stride,
VkVertexInputRate rate = VK_VERTEX_INPUT_RATE_VERTEX);
void add_vertex_attribute(uint32_t location, uint32_t binding, VkFormat format, uint32_t offset); void add_vertex_attribute(uint32_t location, uint32_t binding, VkFormat format, uint32_t offset);
void add_resource_decls(QoShaderModuleCreateInfo *module); void add_resource_decls(QoShaderModuleCreateInfo* module);
void add_io_decls(QoShaderModuleCreateInfo *module); void add_io_decls(QoShaderModuleCreateInfo* module);
void add_stage(VkShaderStageFlagBits stage, VkShaderModule module, const char *name="main"); void add_stage(VkShaderStageFlagBits stage, VkShaderModule module, const char* name = "main");
void add_stage(VkShaderStageFlagBits stage, QoShaderModuleCreateInfo module, const char *name="main"); void add_stage(VkShaderStageFlagBits stage, QoShaderModuleCreateInfo module,
const char* name = "main");
void add_vsfs(VkShaderModule vs, VkShaderModule fs); void add_vsfs(VkShaderModule vs, VkShaderModule fs);
void add_vsfs(QoShaderModuleCreateInfo vs, QoShaderModuleCreateInfo fs); void add_vsfs(QoShaderModuleCreateInfo vs, QoShaderModuleCreateInfo fs);
void add_cs(VkShaderModule cs); void add_cs(VkShaderModule cs);
@ -174,7 +177,8 @@ public:
void create_pipeline(); void create_pipeline();
void print_ir(VkShaderStageFlagBits stages, const char *name, bool remove_encoding=false); void print_ir(VkShaderStageFlagBits stages, const char* name, bool remove_encoding = false);
private: private:
void create_compute_pipeline(); void create_compute_pipeline();
void create_graphics_pipeline(); void create_graphics_pipeline();

View file

@ -21,20 +21,22 @@
* IN THE SOFTWARE. * IN THE SOFTWARE.
* *
*/ */
#include "aco_ir.h"
#include <llvm-c/Target.h>
#include "framework.h"
#include <getopt.h>
#include <map> #include <map>
#include <set> #include <set>
#include <string> #include <stdarg.h>
#include <vector>
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include <getopt.h> #include <string>
#include <unistd.h> #include <unistd.h>
#include <stdarg.h> #include <vector>
#include <llvm-c/Target.h>
#include "aco_ir.h"
#include "framework.h"
static const char *help_message = static const char* help_message =
"Usage: %s [-h] [-l --list] [--no-check] [TEST [TEST ...]]\n" "Usage: %s [-h] [-l --list] [--no-check] [TEST [TEST ...]]\n"
"\n" "\n"
"Run ACO unit test(s). If TEST is not provided, all tests are run.\n" "Run ACO unit test(s). If TEST is not provided, all tests are run.\n"
@ -50,26 +52,27 @@ static const char *help_message =
" --no-check Print test output instead of checking it.\n"; " --no-check Print test output instead of checking it.\n";
std::map<std::string, TestDef> tests; std::map<std::string, TestDef> tests;
FILE *output = NULL; FILE* output = NULL;
static TestDef current_test; static TestDef current_test;
static unsigned tests_written = 0; static unsigned tests_written = 0;
static FILE *checker_stdin = NULL; static FILE* checker_stdin = NULL;
static char *checker_stdin_data = NULL; static char* checker_stdin_data = NULL;
static size_t checker_stdin_size = 0; static size_t checker_stdin_size = 0;
static char *output_data = NULL; static char* output_data = NULL;
static size_t output_size = 0; static size_t output_size = 0;
static size_t output_offset = 0; static size_t output_offset = 0;
static char current_variant[64] = {0}; static char current_variant[64] = {0};
static std::set<std::string> *variant_filter = NULL; static std::set<std::string>* variant_filter = NULL;
bool test_failed = false; bool test_failed = false;
bool test_skipped = false; bool test_skipped = false;
static char fail_message[256] = {0}; static char fail_message[256] = {0};
void write_test() void
write_test()
{ {
if (!checker_stdin) { if (!checker_stdin) {
/* not entirely correct, but shouldn't matter */ /* not entirely correct, but shouldn't matter */
@ -81,18 +84,18 @@ void write_test()
if (output_offset == output_size && !test_skipped && !test_failed) if (output_offset == output_size && !test_skipped && !test_failed)
return; return;
char *data = output_data + output_offset; char* data = output_data + output_offset;
uint32_t size = output_size - output_offset; uint32_t size = output_size - output_offset;
fwrite("test", 1, 4, checker_stdin); fwrite("test", 1, 4, checker_stdin);
fwrite(current_test.name, 1, strlen(current_test.name)+1, checker_stdin); fwrite(current_test.name, 1, strlen(current_test.name) + 1, checker_stdin);
fwrite(current_variant, 1, strlen(current_variant)+1, checker_stdin); fwrite(current_variant, 1, strlen(current_variant) + 1, checker_stdin);
fwrite(current_test.source_file, 1, strlen(current_test.source_file)+1, checker_stdin); fwrite(current_test.source_file, 1, strlen(current_test.source_file) + 1, checker_stdin);
if (test_failed || test_skipped) { if (test_failed || test_skipped) {
const char *res = test_failed ? "failed" : "skipped"; const char* res = test_failed ? "failed" : "skipped";
fwrite("\x01", 1, 1, checker_stdin); fwrite("\x01", 1, 1, checker_stdin);
fwrite(res, 1, strlen(res)+1, checker_stdin); fwrite(res, 1, strlen(res) + 1, checker_stdin);
fwrite(fail_message, 1, strlen(fail_message)+1, checker_stdin); fwrite(fail_message, 1, strlen(fail_message) + 1, checker_stdin);
} else { } else {
fwrite("\x00", 1, 1, checker_stdin); fwrite("\x00", 1, 1, checker_stdin);
} }
@ -103,7 +106,8 @@ void write_test()
output_offset += size; output_offset += size;
} }
bool set_variant(const char *name) bool
set_variant(const char* name)
{ {
if (variant_filter && !variant_filter->count(name)) if (variant_filter && !variant_filter->count(name))
return false; return false;
@ -118,7 +122,8 @@ bool set_variant(const char *name)
return true; return true;
} }
void fail_test(const char *fmt, ...) void
fail_test(const char* fmt, ...)
{ {
va_list args; va_list args;
va_start(args, fmt); va_start(args, fmt);
@ -129,7 +134,8 @@ void fail_test(const char *fmt, ...)
va_end(args); va_end(args);
} }
void skip_test(const char *fmt, ...) void
skip_test(const char* fmt, ...)
{ {
va_list args; va_list args;
va_start(args, fmt); va_start(args, fmt);
@ -140,7 +146,8 @@ void skip_test(const char *fmt, ...)
va_end(args); va_end(args);
} }
void run_test(TestDef def) void
run_test(TestDef def)
{ {
current_test = def; current_test = def;
output_data = NULL; output_data = NULL;
@ -163,7 +170,8 @@ void run_test(TestDef def)
free(output_data); free(output_data);
} }
int check_output(char **argv) int
check_output(char** argv)
{ {
fflush(stdout); fflush(stdout);
fflush(stderr); fflush(stderr);
@ -183,7 +191,8 @@ int check_output(char **argv)
close(stdin_pipe[0]); close(stdin_pipe[0]);
close(stdin_pipe[1]); close(stdin_pipe[1]);
execlp(ACO_TEST_PYTHON_BIN, ACO_TEST_PYTHON_BIN, ACO_TEST_SOURCE_DIR "/check_output.py", NULL); execlp(ACO_TEST_PYTHON_BIN, ACO_TEST_PYTHON_BIN, ACO_TEST_SOURCE_DIR "/check_output.py",
NULL);
fprintf(stderr, "%s: execlp() failed: %s\n", argv[0], strerror(errno)); fprintf(stderr, "%s: execlp() failed: %s\n", argv[0], strerror(errno));
return 99; return 99;
} else { } else {
@ -197,7 +206,8 @@ int check_output(char **argv)
} }
} }
bool match_test(std::string name, std::string pattern) bool
match_test(std::string name, std::string pattern)
{ {
if (name.length() < pattern.length()) if (name.length() < pattern.length())
return false; return false;
@ -206,33 +216,25 @@ bool match_test(std::string name, std::string pattern)
return name == pattern; return name == pattern;
} }
int main(int argc, char **argv) int
main(int argc, char** argv)
{ {
int print_help = 0; int print_help = 0;
int do_list = 0; int do_list = 0;
int do_check = 1; int do_check = 1;
const struct option opts[] = { const struct option opts[] = {{"help", no_argument, &print_help, 1},
{ "help", no_argument, &print_help, 1 }, {"list", no_argument, &do_list, 1},
{ "list", no_argument, &do_list, 1 }, {"no-check", no_argument, &do_check, 0},
{ "no-check", no_argument, &do_check, 0 }, {NULL, 0, NULL, 0}};
{ NULL, 0, NULL, 0 }
};
int c; int c;
while ((c = getopt_long(argc, argv, "hl", opts, NULL)) != -1) { while ((c = getopt_long(argc, argv, "hl", opts, NULL)) != -1) {
switch (c) { switch (c) {
case 'h': case 'h': print_help = 1; break;
print_help = 1; case 'l': do_list = 1; break;
break; case 0: break;
case 'l':
do_list = 1;
break;
case 0:
break;
case '?': case '?':
default: default: fprintf(stderr, "%s: Invalid argument\n", argv[0]); return 99;
fprintf(stderr, "%s: Invalid argument\n", argv[0]);
return 99;
} }
} }

View file

@ -21,11 +21,11 @@
* IN THE SOFTWARE. * IN THE SOFTWARE.
* *
*/ */
#include <llvm/Config/llvm-config.h>
#include "helpers.h" #include "helpers.h"
#include "sid.h" #include "sid.h"
#include <llvm/Config/llvm-config.h>
using namespace aco; using namespace aco;
BEGIN_TEST(assembler.s_memtime) BEGIN_TEST(assembler.s_memtime)
@ -178,7 +178,7 @@ BEGIN_TEST(assembler.long_jump.conditional_backwards)
finish_assembler_test(); finish_assembler_test();
END_TEST END_TEST
BEGIN_TEST(assembler.long_jump.3f) BEGIN_TEST(assembler.long_jump .3f)
if (!setup_cs(NULL, (amd_gfx_level)GFX10)) if (!setup_cs(NULL, (amd_gfx_level)GFX10))
return; return;
@ -354,25 +354,31 @@ BEGIN_TEST(assembler.vopc_sdwa)
//~gfx9>> v_cmp_lt_u32_sdwa vcc, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d9300f9 86860080 //~gfx9>> v_cmp_lt_u32_sdwa vcc, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d9300f9 86860080
//~gfx10>> v_cmp_lt_u32_sdwa vcc, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d8300f9 86860080 //~gfx10>> v_cmp_lt_u32_sdwa vcc, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d8300f9 86860080
bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(vcc, s2), Operand::zero(), Operand::zero()); bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(vcc, s2), Operand::zero(),
Operand::zero());
//~gfx9! v_cmp_lt_u32_sdwa s[44:45], 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d9300f9 8686ac80 //~gfx9! v_cmp_lt_u32_sdwa s[44:45], 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d9300f9 8686ac80
//~gfx10! v_cmp_lt_u32_sdwa s[44:45], 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d8300f9 8686ac80 //~gfx10! v_cmp_lt_u32_sdwa s[44:45], 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d8300f9 8686ac80
bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(PhysReg(0x2c), s2), Operand::zero(), Operand::zero()); bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(PhysReg(0x2c), s2), Operand::zero(),
Operand::zero());
//~gfx9! v_cmp_lt_u32_sdwa exec, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d9300f9 8686fe80 //~gfx9! v_cmp_lt_u32_sdwa exec, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d9300f9 8686fe80
//~gfx10! v_cmp_lt_u32_sdwa exec, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d8300f9 8686fe80 //~gfx10! v_cmp_lt_u32_sdwa exec, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d8300f9 8686fe80
bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(exec, s2), Operand::zero(), Operand::zero()); bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(exec, s2), Operand::zero(),
Operand::zero());
if (i == GFX10) { if (i == GFX10) {
//~gfx10! v_cmpx_lt_u32_sdwa 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7da300f9 86860080 //~gfx10! v_cmpx_lt_u32_sdwa 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7da300f9 86860080
bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(exec, s2), Operand::zero(), Operand::zero()); bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(exec, s2), Operand::zero(),
Operand::zero());
} else { } else {
//~gfx9! v_cmpx_lt_u32_sdwa vcc, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7db300f9 86860080 //~gfx9! v_cmpx_lt_u32_sdwa vcc, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7db300f9 86860080
bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(vcc, s2), Definition(exec, s2), Operand::zero(), Operand::zero()); bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(vcc, s2), Definition(exec, s2),
Operand::zero(), Operand::zero());
//~gfx9! v_cmpx_lt_u32_sdwa s[44:45], 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7db300f9 8686ac80 //~gfx9! v_cmpx_lt_u32_sdwa s[44:45], 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7db300f9 8686ac80
bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(PhysReg(0x2c), s2), Definition(exec, s2), Operand::zero(), Operand::zero()); bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(PhysReg(0x2c), s2),
Definition(exec, s2), Operand::zero(), Operand::zero());
} }
finish_assembler_test(); finish_assembler_test();
@ -452,48 +458,70 @@ BEGIN_TEST(assembler.gfx11.mubuf)
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v1, op_s1, 0, true); bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v1, op_s1, 0, true);
//! buffer_load_b32 v42, v10, s[32:35], s30 idxen ; e0500000 1e882a0a //! buffer_load_b32 v42, v10, s[32:35], s30 idxen ; e0500000 1e882a0a
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v1, op_s1, 0, false)->mubuf().idxen = true; bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v1, op_s1, 0, false)->mubuf().idxen =
true;
//! buffer_load_b32 v42, v[20:21], s[32:35], s30 idxen offen ; e0500000 1ec82a14 //! buffer_load_b32 v42, v[20:21], s[32:35], s30 idxen offen ; e0500000 1ec82a14
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v2, op_s1, 0, true)->mubuf().idxen = true; bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v2, op_s1, 0, true)->mubuf().idxen =
true;
//! buffer_load_b32 v42, off, s[32:35], s30 offset:84 ; e0500054 1e082a80 //! buffer_load_b32 v42, off, s[32:35], s30 offset:84 ; e0500054 1e082a80
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), op_s1, 84, false); bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), op_s1, 84, false);
/* Various flags */ /* Various flags */
//! buffer_load_b32 v42, off, s[32:35], 0 glc ; e0504000 80082a80 //! buffer_load_b32 v42, off, s[32:35], 0 glc ; e0504000 80082a80
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)->mubuf().glc = true; bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
->mubuf()
.glc = true;
//! buffer_load_b32 v42, off, s[32:35], 0 dlc ; e0502000 80082a80 //! buffer_load_b32 v42, off, s[32:35], 0 dlc ; e0502000 80082a80
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)->mubuf().dlc = true; bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
->mubuf()
.dlc = true;
//! buffer_load_b32 v42, off, s[32:35], 0 slc ; e0501000 80082a80 //! buffer_load_b32 v42, off, s[32:35], 0 slc ; e0501000 80082a80
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)->mubuf().slc = true; bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
->mubuf()
.slc = true;
//; if llvm_ver >= 16: //; if llvm_ver >= 16:
//; insert_pattern('buffer_load_b32 v[42:43], off, s[32:35], 0 tfe ; e0500000 80282a80') //; insert_pattern('buffer_load_b32 v[42:43], off, s[32:35], 0 tfe ; e0500000 80282a80')
//; else: //; else:
//; insert_pattern('buffer_load_b32 v42, off, s[32:35], 0 tfe ; e0500000 80282a80') //; insert_pattern('buffer_load_b32 v42, off, s[32:35], 0 tfe ; e0500000 80282a80')
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)->mubuf().tfe = true; bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
->mubuf()
.tfe = true;
/* LDS */ /* LDS */
//! buffer_load_lds_b32 off, s[32:35], 0 ; e0c40000 80080080 //! buffer_load_lds_b32 off, s[32:35], 0 ; e0c40000 80080080
bld.mubuf(aco_opcode::buffer_load_dword, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)->mubuf().lds = true; bld.mubuf(aco_opcode::buffer_load_dword, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
->mubuf()
.lds = true;
//! buffer_load_lds_i8 off, s[32:35], 0 ; e0b80000 80080080 //! buffer_load_lds_i8 off, s[32:35], 0 ; e0b80000 80080080
bld.mubuf(aco_opcode::buffer_load_sbyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)->mubuf().lds = true; bld.mubuf(aco_opcode::buffer_load_sbyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
->mubuf()
.lds = true;
//! buffer_load_lds_i16 off, s[32:35], 0 ; e0c00000 80080080 //! buffer_load_lds_i16 off, s[32:35], 0 ; e0c00000 80080080
bld.mubuf(aco_opcode::buffer_load_sshort, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)->mubuf().lds = true; bld.mubuf(aco_opcode::buffer_load_sshort, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
->mubuf()
.lds = true;
//! buffer_load_lds_u8 off, s[32:35], 0 ; e0b40000 80080080 //! buffer_load_lds_u8 off, s[32:35], 0 ; e0b40000 80080080
bld.mubuf(aco_opcode::buffer_load_ubyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)->mubuf().lds = true; bld.mubuf(aco_opcode::buffer_load_ubyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
->mubuf()
.lds = true;
//! buffer_load_lds_u16 off, s[32:35], 0 ; e0bc0000 80080080 //! buffer_load_lds_u16 off, s[32:35], 0 ; e0bc0000 80080080
bld.mubuf(aco_opcode::buffer_load_ushort, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)->mubuf().lds = true; bld.mubuf(aco_opcode::buffer_load_ushort, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
->mubuf()
.lds = true;
//! buffer_load_lds_format_x off, s[32:35], 0 ; e0c80000 80080080 //! buffer_load_lds_format_x off, s[32:35], 0 ; e0c80000 80080080
bld.mubuf(aco_opcode::buffer_load_format_x, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)->mubuf().lds = true; bld.mubuf(aco_opcode::buffer_load_format_x, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
->mubuf()
.lds = true;
/* Stores */ /* Stores */
//! buffer_store_b32 v10, off, s[32:35], s30 ; e0680000 1e080a80 //! buffer_store_b32 v10, off, s[32:35], s30 ; e0680000 1e080a80
@ -532,42 +560,62 @@ BEGIN_TEST(assembler.gfx11.mtbuf)
/* Addressing */ /* Addressing */
//>> tbuffer_load_format_x v42, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] ; e9900000 1e082a80 //>> tbuffer_load_format_x v42, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] ; e9900000 1e082a80
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), op_s1, dfmt, nfmt, 0, false); bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), op_s1, dfmt, nfmt, 0,
false);
//! tbuffer_load_format_x v42, off, s[32:35], 42 format:[BUF_FMT_32_32_FLOAT] ; e9900000 aa082a80 //! tbuffer_load_format_x v42, off, s[32:35], 42 format:[BUF_FMT_32_32_FLOAT] ; e9900000 aa082a80
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::c32(42), dfmt, nfmt, 0, false); bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::c32(42), dfmt,
nfmt, 0, false);
//! tbuffer_load_format_x v42, v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offen ; e9900000 1e482a0a //! tbuffer_load_format_x v42, v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offen ; e9900000 1e482a0a
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v1, op_s1, dfmt, nfmt, 0, true); bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v1, op_s1, dfmt, nfmt, 0, true);
//! tbuffer_load_format_x v42, v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] idxen ; e9900000 1e882a0a //! tbuffer_load_format_x v42, v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] idxen ; e9900000 1e882a0a
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v1, op_s1, dfmt, nfmt, 0, false)->mtbuf().idxen = true; bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v1, op_s1, dfmt, nfmt, 0, false)
->mtbuf()
.idxen = true;
//! tbuffer_load_format_x v42, v[20:21], s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] idxen offen ; e9900000 1ec82a14 //! tbuffer_load_format_x v42, v[20:21], s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] idxen offen ; e9900000 1ec82a14
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v2, op_s1, dfmt, nfmt, 0, true)->mtbuf().idxen = true; bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v2, op_s1, dfmt, nfmt, 0, true)
->mtbuf()
.idxen = true;
//! tbuffer_load_format_x v42, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offset:84 ; e9900054 1e082a80 //! tbuffer_load_format_x v42, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offset:84 ; e9900054 1e082a80
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), op_s1, dfmt, nfmt, 84, false); bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), op_s1, dfmt, nfmt, 84,
false);
/* Various flags */ /* Various flags */
//! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] glc ; e9904000 80082a80 //! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] glc ; e9904000 80082a80
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt, nfmt, 0, false)->mtbuf().glc = true; bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
nfmt, 0, false)
->mtbuf()
.glc = true;
//! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] dlc ; e9902000 80082a80 //! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] dlc ; e9902000 80082a80
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt, nfmt, 0, false)->mtbuf().dlc = true; bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
nfmt, 0, false)
->mtbuf()
.dlc = true;
//! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] slc ; e9901000 80082a80 //! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] slc ; e9901000 80082a80
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt, nfmt, 0, false)->mtbuf().slc = true; bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
nfmt, 0, false)
->mtbuf()
.slc = true;
//; if llvm_ver >= 16: //; if llvm_ver >= 16:
//; insert_pattern('tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] ; e9900000 80282a80') //; insert_pattern('tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] ; e9900000 80282a80')
//; else: //; else:
//; insert_pattern('tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] tfe ; e9900000 80282a80') //; insert_pattern('tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] tfe ; e9900000 80282a80')
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt, nfmt, 0, false)->mtbuf().tfe = true; bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
nfmt, 0, false)
->mtbuf()
.tfe = true;
/* Stores */ /* Stores */
//! tbuffer_store_format_x v10, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] ; e9920000 1e080a80 //! tbuffer_store_format_x v10, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] ; e9920000 1e080a80
bld.mtbuf(aco_opcode::tbuffer_store_format_x, op_s4, Operand(v1), op_s1, op_v1, dfmt, nfmt, 0, false); bld.mtbuf(aco_opcode::tbuffer_store_format_x, op_s4, Operand(v1), op_s1, op_v1, dfmt, nfmt, 0,
false);
//! tbuffer_store_format_xy v[20:21], v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offen ; e9928000 1e48140a //! tbuffer_store_format_xy v[20:21], v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offen ; e9928000 1e48140a
bld.mtbuf(aco_opcode::tbuffer_store_format_xy, op_s4, op_v1, op_s1, op_v2, dfmt, nfmt, 0, true); bld.mtbuf(aco_opcode::tbuffer_store_format_xy, op_s4, op_v1, op_s1, op_v2, dfmt, nfmt, 0, true);
@ -604,7 +652,8 @@ BEGIN_TEST(assembler.gfx11.mimg)
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1); bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1);
//! image_sample v[84:87], v[20:21], s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_2D ; f06c0f04 20105414 //! image_sample v[84:87], v[20:21], s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_2D ; f06c0f04 20105414
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v2)->mimg().dim = ac_image_2d; bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v2)->mimg().dim =
ac_image_2d;
//! image_sample v42, v10, s[64:71], s[32:35] dmask:0x1 dim:SQ_RSRC_IMG_1D ; f06c0100 20102a0a //! image_sample v42, v10, s[64:71], s[32:35] dmask:0x1 dim:SQ_RSRC_IMG_1D ; f06c0100 20102a0a
bld.mimg(aco_opcode::image_sample, dst_v1, op_s8, op_s4, Operand(v1), op_v1)->mimg().dmask = 0x1; bld.mimg(aco_opcode::image_sample, dst_v1, op_s8, op_s4, Operand(v1), op_v1)->mimg().dmask = 0x1;
@ -636,14 +685,20 @@ BEGIN_TEST(assembler.gfx11.mimg)
/* NSA */ /* NSA */
//! image_sample v[84:87], [v10, v40], s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_2D ; f06c0f05 2010540a 00000028 //! image_sample v[84:87], [v10, v40], s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_2D ; f06c0f05 2010540a 00000028
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1, Operand(bld.tmp(v1), PhysReg(256 + 40)))->mimg().dim = ac_image_2d; bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1,
Operand(bld.tmp(v1), PhysReg(256 + 40)))
->mimg()
.dim = ac_image_2d;
/* Stores */ /* Stores */
//! image_store v[30:33], v10, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_1D ; f0180f00 00101e0a //! image_store v[30:33], v10, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_1D ; f0180f00 00101e0a
bld.mimg(aco_opcode::image_store, op_s8, Operand(s4), op_v4, op_v1); bld.mimg(aco_opcode::image_store, op_s8, Operand(s4), op_v4, op_v1);
//! image_atomic_add v10, v20, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_2D ; f0300f04 00100a14 //! image_atomic_add v10, v20, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_2D ; f0300f04 00100a14
bld.mimg(aco_opcode::image_atomic_add, Definition(op_v1.physReg(), v1), op_s8, Operand(s4), op_v1, op_v2)->mimg().dim = ac_image_2d; bld.mimg(aco_opcode::image_atomic_add, Definition(op_v1.physReg(), v1), op_s8, Operand(s4),
op_v1, op_v2)
->mimg()
.dim = ac_image_2d;
finish_assembler_test(); finish_assembler_test();
END_TEST END_TEST
@ -761,13 +816,19 @@ BEGIN_TEST(assembler.gfx11.vinterp)
bld.vinterp_inreg(aco_opcode::v_interp_p2_f32_inreg, dst, op0, op1, op2, 0); bld.vinterp_inreg(aco_opcode::v_interp_p2_f32_inreg, dst, op0, op1, op2, 0);
//! v_interp_p10_f32 v42, -v10, v20, v30 ; cd00002a 247a290a //! v_interp_p10_f32 v42, -v10, v20, v30 ; cd00002a 247a290a
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().neg[0] = true; bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)
->vinterp_inreg()
.neg[0] = true;
//! v_interp_p10_f32 v42, v10, -v20, v30 ; cd00002a 447a290a //! v_interp_p10_f32 v42, v10, -v20, v30 ; cd00002a 447a290a
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().neg[1] = true; bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)
->vinterp_inreg()
.neg[1] = true;
//! v_interp_p10_f32 v42, v10, v20, -v30 ; cd00002a 847a290a //! v_interp_p10_f32 v42, v10, v20, -v30 ; cd00002a 847a290a
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().neg[2] = true; bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)
->vinterp_inreg()
.neg[2] = true;
//! v_interp_p10_f16_f32 v42, v10, v20, v30 op_sel:[1,0,0,0] ; cd02082a 047a290a //! v_interp_p10_f16_f32 v42, v10, v20, v30 op_sel:[1,0,0,0] ; cd02082a 047a290a
bld.vinterp_inreg(aco_opcode::v_interp_p10_f16_f32_inreg, dst, op0, op1, op2, 0, 0x1); bld.vinterp_inreg(aco_opcode::v_interp_p10_f16_f32_inreg, dst, op0, op1, op2, 0, 0x1);
@ -782,7 +843,9 @@ BEGIN_TEST(assembler.gfx11.vinterp)
bld.vinterp_inreg(aco_opcode::v_interp_p2_rtz_f16_f32_inreg, dst, op0, op1, op2, 0, 0x8); bld.vinterp_inreg(aco_opcode::v_interp_p2_rtz_f16_f32_inreg, dst, op0, op1, op2, 0, 0x8);
//! v_interp_p10_f32 v42, v10, v20, v30 clamp ; cd00802a 047a290a //! v_interp_p10_f32 v42, v10, v20, v30 clamp ; cd00802a 047a290a
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().clamp = true; bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)
->vinterp_inreg()
.clamp = true;
finish_assembler_test(); finish_assembler_test();
END_TEST END_TEST
@ -899,16 +962,22 @@ BEGIN_TEST(assembler.gfx11.vop12c_v128)
bld.vop1_dpp(aco_opcode::v_rcp_f16, dst_v128, op_v1, dpp_row_rr(1))->dpp16().abs[0] = true; bld.vop1_dpp(aco_opcode::v_rcp_f16, dst_v128, op_v1, dpp_row_rr(1))->dpp16().abs[0] = true;
//! v_mul_f16_e64_dpp v128, -v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d5350080 200204fa ff1d2101 //! v_mul_f16_e64_dpp v128, -v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d5350080 200204fa ff1d2101
bld.vop2_dpp(aco_opcode::v_mul_f16, dst_v128, op_v1, op_v2, dpp_row_rr(1))->dpp16().neg[0] = true; bld.vop2_dpp(aco_opcode::v_mul_f16, dst_v128, op_v1, op_v2, dpp_row_rr(1))->dpp16().neg[0] =
true;
//! v_mul_f16_e64_dpp v128, |v1|, v2 row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d5350180 000204fa ff2d2101 //! v_mul_f16_e64_dpp v128, |v1|, v2 row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d5350180 000204fa ff2d2101
bld.vop2_dpp(aco_opcode::v_mul_f16, dst_v128, op_v1, op_v2, dpp_row_rr(1))->dpp16().abs[0] = true; bld.vop2_dpp(aco_opcode::v_mul_f16, dst_v128, op_v1, op_v2, dpp_row_rr(1))->dpp16().abs[0] =
true;
//! v_cmp_eq_f16_e64_dpp vcc, -v129, v2 row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d402006a 200204fa ff1d2181 //! v_cmp_eq_f16_e64_dpp vcc, -v129, v2 row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d402006a 200204fa ff1d2181
bld.vopc_dpp(aco_opcode::v_cmp_eq_f16, bld.def(s2, vcc), op_v129, op_v2, dpp_row_rr(1))->dpp16().neg[0] = true; bld.vopc_dpp(aco_opcode::v_cmp_eq_f16, bld.def(s2, vcc), op_v129, op_v2, dpp_row_rr(1))
->dpp16()
.neg[0] = true;
//! v_cmp_eq_f16_e64_dpp vcc, |v129|, v2 row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d402016a 000204fa ff2d2181 //! v_cmp_eq_f16_e64_dpp vcc, |v129|, v2 row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d402016a 000204fa ff2d2181
bld.vopc_dpp(aco_opcode::v_cmp_eq_f16, bld.def(s2, vcc), op_v129, op_v2, dpp_row_rr(1))->dpp16().abs[0] = true; bld.vopc_dpp(aco_opcode::v_cmp_eq_f16, bld.def(s2, vcc), op_v129, op_v2, dpp_row_rr(1))
->dpp16()
.abs[0] = true;
finish_assembler_test(); finish_assembler_test();
END_TEST END_TEST

View file

@ -633,9 +633,10 @@ BEGIN_TEST(d3d11_derivs.nsa_max)
//~gfx11! v4: %_:v[0-3] = image_sample_c_b_o s8: undef, s4: undef, v1: undef, %_:v[6], %_:v[7], %_:v[8], %_:v[3], %_:v[4-5] 2darray da //~gfx11! v4: %_:v[0-3] = image_sample_c_b_o s8: undef, s4: undef, v1: undef, %_:v[6], %_:v[7], %_:v[8], %_:v[3], %_:v[4-5] 2darray da
Instruction *instr = bld.mimg(aco_opcode::image_sample_c_b_o, Definition(reg_v0, v4), Instruction* instr =
Operand(s8), Operand(s4), Operand(v1), Operand(reg_v0, v6.as_linear()), bld.mimg(aco_opcode::image_sample_c_b_o, Definition(reg_v0, v4), Operand(s8), Operand(s4),
Operand(reg_v6, v1), Operand(reg_v7, v1), Operand(reg_v8, v1)); Operand(v1), Operand(reg_v0, v6.as_linear()), Operand(reg_v6, v1),
Operand(reg_v7, v1), Operand(reg_v8, v1));
instr->mimg().dim = ac_image_2darray; instr->mimg().dim = ac_image_2darray;
instr->mimg().da = true; instr->mimg().da = true;
instr->mimg().strict_wqm = true; instr->mimg().strict_wqm = true;

View file

@ -26,7 +26,8 @@
using namespace aco; using namespace aco;
static void create_mubuf(Temp desc=Temp(0, s8)) static void
create_mubuf(Temp desc = Temp(0, s8))
{ {
Operand desc_op(desc); Operand desc_op(desc);
desc_op.setFixed(PhysReg(0)); desc_op.setFixed(PhysReg(0));
@ -34,13 +35,15 @@ static void create_mubuf(Temp desc=Temp(0, s8))
Operand(PhysReg(256), v1), Operand::zero(), 0, false); Operand(PhysReg(256), v1), Operand::zero(), 0, false);
} }
static void create_mubuf_store() static void
create_mubuf_store()
{ {
bld.mubuf(aco_opcode::buffer_store_dword, Operand(PhysReg(0), s4), Operand(PhysReg(256), v1), bld.mubuf(aco_opcode::buffer_store_dword, Operand(PhysReg(0), s4), Operand(PhysReg(256), v1),
Operand(PhysReg(256), v1), Operand::zero(), 0, false); Operand(PhysReg(256), v1), Operand::zero(), 0, false);
} }
static void create_mtbuf(Temp desc=Temp(0, s8)) static void
create_mtbuf(Temp desc = Temp(0, s8))
{ {
Operand desc_op(desc); Operand desc_op(desc);
desc_op.setFixed(PhysReg(0)); desc_op.setFixed(PhysReg(0));
@ -49,22 +52,25 @@ static void create_mtbuf(Temp desc=Temp(0, s8))
V_008F0C_BUF_NUM_FORMAT_FLOAT, 0, false); V_008F0C_BUF_NUM_FORMAT_FLOAT, 0, false);
} }
static void create_flat() static void
create_flat()
{ {
bld.flat(aco_opcode::flat_load_dword, Definition(PhysReg(256), v1), bld.flat(aco_opcode::flat_load_dword, Definition(PhysReg(256), v1), Operand(PhysReg(256), v2),
Operand(PhysReg(256), v2), Operand(s2)); Operand(s2));
} }
static void create_global() static void
create_global()
{ {
bld.global(aco_opcode::global_load_dword, Definition(PhysReg(256), v1), bld.global(aco_opcode::global_load_dword, Definition(PhysReg(256), v1),
Operand(PhysReg(256), v2), Operand(s2)); Operand(PhysReg(256), v2), Operand(s2));
} }
static void create_mimg(bool nsa, Temp desc=Temp(0, s8)) static void
create_mimg(bool nsa, Temp desc = Temp(0, s8))
{ {
aco_ptr<MIMG_instruction> mimg{create_instruction<MIMG_instruction>( aco_ptr<MIMG_instruction> mimg{
aco_opcode::image_sample, Format::MIMG, 5, 1)}; create_instruction<MIMG_instruction>(aco_opcode::image_sample, Format::MIMG, 5, 1)};
mimg->definitions[0] = Definition(PhysReg(256), v1); mimg->definitions[0] = Definition(PhysReg(256), v1);
mimg->operands[0] = Operand(desc); mimg->operands[0] = Operand(desc);
mimg->operands[0].setFixed(PhysReg(0)); mimg->operands[0].setFixed(PhysReg(0));
@ -78,13 +84,15 @@ static void create_mimg(bool nsa, Temp desc=Temp(0, s8))
bld.insert(std::move(mimg)); bld.insert(std::move(mimg));
} }
static void create_smem() static void
create_smem()
{ {
bld.smem(aco_opcode::s_load_dword, Definition(PhysReg(0), s1), Operand(PhysReg(0), s2), bld.smem(aco_opcode::s_load_dword, Definition(PhysReg(0), s1), Operand(PhysReg(0), s2),
Operand::zero()); Operand::zero());
} }
static void create_smem_buffer(Temp desc=Temp(0, s4)) static void
create_smem_buffer(Temp desc = Temp(0, s4))
{ {
Operand desc_op(desc); Operand desc_op(desc);
desc_op.setFixed(PhysReg(0)); desc_op.setFixed(PhysReg(0));

View file

@ -25,22 +25,25 @@
using namespace aco; using namespace aco;
void create_mubuf(unsigned offset, PhysReg dst=PhysReg(256), PhysReg vaddr=PhysReg(256)) void
create_mubuf(unsigned offset, PhysReg dst = PhysReg(256), PhysReg vaddr = PhysReg(256))
{ {
bld.mubuf(aco_opcode::buffer_load_dword, Definition(dst, v1), Operand(PhysReg(0), s4), bld.mubuf(aco_opcode::buffer_load_dword, Definition(dst, v1), Operand(PhysReg(0), s4),
Operand(vaddr, v1), Operand::zero(), offset, true); Operand(vaddr, v1), Operand::zero(), offset, true);
} }
void create_mubuf_store(PhysReg src=PhysReg(256)) void
create_mubuf_store(PhysReg src = PhysReg(256))
{ {
bld.mubuf(aco_opcode::buffer_store_dword, Operand(PhysReg(0), s4), bld.mubuf(aco_opcode::buffer_store_dword, Operand(PhysReg(0), s4), Operand(src, v1),
Operand(src, v1), Operand::zero(), Operand(src, v1), 0, true); Operand::zero(), Operand(src, v1), 0, true);
} }
void create_mimg(bool nsa, unsigned addrs, unsigned instr_dwords) void
create_mimg(bool nsa, unsigned addrs, unsigned instr_dwords)
{ {
aco_ptr<MIMG_instruction> mimg{create_instruction<MIMG_instruction>( aco_ptr<MIMG_instruction> mimg{
aco_opcode::image_sample, Format::MIMG, 3 + addrs, 1)}; create_instruction<MIMG_instruction>(aco_opcode::image_sample, Format::MIMG, 3 + addrs, 1)};
mimg->definitions[0] = Definition(PhysReg(256), v1); mimg->definitions[0] = Definition(PhysReg(256), v1);
mimg->operands[0] = Operand(PhysReg(0), s8); mimg->operands[0] = Operand(PhysReg(0), s8);
mimg->operands[1] = Operand(PhysReg(0), s4); mimg->operands[1] = Operand(PhysReg(0), s4);
@ -216,7 +219,8 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write)
//! s_waitcnt_depctr vm_vsrc(0) //! s_waitcnt_depctr vm_vsrc(0)
//! s1: %0:m0 = s_mov_b32 0 //! s1: %0:m0 = s_mov_b32 0
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4));
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(m0, s1)); bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
Operand(m0, s1));
bld.sop1(aco_opcode::s_mov_b32, Definition(m0, s1), Operand::zero()); bld.sop1(aco_opcode::s_mov_b32, Definition(m0, s1), Operand::zero());
//! p_unit_test 5 //! p_unit_test 5
@ -224,7 +228,8 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write)
//! s_waitcnt_depctr vm_vsrc(0) //! s_waitcnt_depctr vm_vsrc(0)
//! s2: %0:exec = s_mov_b64 -1 //! s2: %0:exec = s_mov_b64 -1
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5));
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(m0, s1)); bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
Operand(m0, s1));
bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand::c64(-1)); bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand::c64(-1));
/* no hazard: LDS */ /* no hazard: LDS */
@ -232,7 +237,8 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write)
//! v1: %0:v[0] = ds_read_b32 %0:v[0], %0:m0 //! v1: %0:v[0] = ds_read_b32 %0:v[0], %0:m0
//! s1: %0:s[0] = s_mov_b32 0 //! s1: %0:s[0] = s_mov_b32 0
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6));
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(m0, s1)); bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
Operand(m0, s1));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(0), s1), Operand::zero()); bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(0), s1), Operand::zero());
/* no hazard: LDS with VALU in-between */ /* no hazard: LDS with VALU in-between */
@ -241,7 +247,8 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write)
//! v_nop //! v_nop
//! s1: %0:m0 = s_mov_b32 0 //! s1: %0:m0 = s_mov_b32 0
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7));
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(m0, s1)); bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
Operand(m0, s1));
bld.vop1(aco_opcode::v_nop); bld.vop1(aco_opcode::v_nop);
bld.sop1(aco_opcode::s_mov_b32, Definition(m0, s1), Operand::zero()); bld.sop1(aco_opcode::s_mov_b32, Definition(m0, s1), Operand::zero());
@ -269,7 +276,8 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write)
//! s_waitcnt lgkmcnt(0) //! s_waitcnt lgkmcnt(0)
//! s1: %0:m0 = s_mov_b32 0 //! s1: %0:m0 = s_mov_b32 0
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10));
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(m0, s1)); bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
Operand(m0, s1));
bld.sopp(aco_opcode::s_waitcnt, -1, 0xc07f); bld.sopp(aco_opcode::s_waitcnt, -1, 0xc07f);
bld.sop1(aco_opcode::s_mov_b32, Definition(m0, s1), Operand::zero()); bld.sop1(aco_opcode::s_mov_b32, Definition(m0, s1), Operand::zero());
@ -300,7 +308,8 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write)
//! s_waitcnt_depctr vm_vsrc(0) //! s_waitcnt_depctr vm_vsrc(0)
//! s1: %0:m0 = s_mov_b32 0 //! s1: %0:m0 = s_mov_b32 0
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13));
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(m0, s1)); bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
Operand(m0, s1));
bld.sopp(aco_opcode::s_waitcnt, -1, 0x3f70); bld.sopp(aco_opcode::s_waitcnt, -1, 0x3f70);
bld.sop1(aco_opcode::s_mov_b32, Definition(m0, s1), Operand::zero()); bld.sop1(aco_opcode::s_mov_b32, Definition(m0, s1), Operand::zero());
@ -932,8 +941,8 @@ BEGIN_TEST(insert_nops.valu_mask_write)
//! s_waitcnt_depctr sa_sdst(0) //! s_waitcnt_depctr sa_sdst(0)
//! s1: %0:s[2] = s_mov_b32 %0:s[1] //! s1: %0:s[2] = s_mov_b32 %0:s[1]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0));
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand::zero(),
Operand::zero(), Operand::zero(), Operand(PhysReg(0), s2)); Operand::zero(), Operand(PhysReg(0), s2));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(1), s1), Operand::zero()); bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(1), s1), Operand::zero());
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1)); bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1));
@ -944,8 +953,8 @@ BEGIN_TEST(insert_nops.valu_mask_write)
//! s1: %0:s[1] = s_mov_b32 0 //! s1: %0:s[1] = s_mov_b32 0
//! s1: %0:s[2] = s_mov_b32 %0:s[1] //! s1: %0:s[2] = s_mov_b32 %0:s[1]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1));
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand::zero(),
Operand::zero(), Operand::zero(), Operand(PhysReg(0), s2)); Operand::zero(), Operand(PhysReg(0), s2));
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(257), v1), Operand(PhysReg(1), s1)); bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(257), v1), Operand(PhysReg(1), s1));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(1), s1), Operand::zero()); bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(1), s1), Operand::zero());
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1)); bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1));
@ -957,8 +966,8 @@ BEGIN_TEST(insert_nops.valu_mask_write)
//! s1: %0:s[2] = s_mov_b32 %0:s[1] //! s1: %0:s[2] = s_mov_b32 %0:s[1]
//! s1: %0:s[2] = s_mov_b32 %0:s[1] //! s1: %0:s[2] = s_mov_b32 %0:s[1]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2));
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand::zero(),
Operand::zero(), Operand::zero(), Operand(PhysReg(0), s2)); Operand::zero(), Operand(PhysReg(0), s2));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(1), s1), Operand::zero()); bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(1), s1), Operand::zero());
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1)); bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1)); bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1));
@ -969,8 +978,8 @@ BEGIN_TEST(insert_nops.valu_mask_write)
//! s_waitcnt_depctr sa_sdst(0) //! s_waitcnt_depctr sa_sdst(0)
//! s1: %0:s[2] = s_mov_b32 %0:s[1] //! s1: %0:s[2] = s_mov_b32 %0:s[1]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3));
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand::zero(),
Operand::zero(), Operand::zero(), Operand(PhysReg(0), s2)); Operand::zero(), Operand(PhysReg(0), s2));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(1), s1), Operand::zero()); bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(1), s1), Operand::zero());
bld.sopp(aco_opcode::s_waitcnt_depctr, -1, 0xfffe); bld.sopp(aco_opcode::s_waitcnt_depctr, -1, 0xfffe);
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1)); bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1));
@ -982,8 +991,8 @@ BEGIN_TEST(insert_nops.valu_mask_write)
//! s_waitcnt_depctr sa_sdst(0) //! s_waitcnt_depctr sa_sdst(0)
//! s1: %0:s[2] = s_mov_b32 %0:s[1] //! s1: %0:s[2] = s_mov_b32 %0:s[1]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4));
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand(PhysReg(2), s1),
Operand(PhysReg(2), s1), Operand::zero(), Operand(PhysReg(0), s2)); Operand::zero(), Operand(PhysReg(0), s2));
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(1), s1), Operand::zero()); bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(1), s1), Operand::zero());
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1)); bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1));

View file

@ -36,15 +36,14 @@ BEGIN_TEST(insert_waitcnt.ds_ordered_count)
Operand chan_counter(PhysReg(260), v1); Operand chan_counter(PhysReg(260), v1);
Operand m(m0, s1); Operand m(m0, s1);
Instruction *ds_instr; Instruction* ds_instr;
//>> ds_ordered_count %0:v[0], %0:v[3], %0:m0 offset0:3072 gds storage:gds semantics:volatile //>> ds_ordered_count %0:v[0], %0:v[3], %0:m0 offset0:3072 gds storage:gds semantics:volatile
//! s_waitcnt lgkmcnt(0) //! s_waitcnt lgkmcnt(0)
ds_instr = bld.ds(aco_opcode::ds_ordered_count, def0, gds_base, m, 3072u, 0u, true); ds_instr = bld.ds(aco_opcode::ds_ordered_count, def0, gds_base, m, 3072u, 0u, true);
ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_volatile); ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_volatile);
//! ds_add_rtn_u32 %0:v[1], %0:v[3], %0:v[4], %0:m0 gds storage:gds semantics:volatile,atomic,rmw //! ds_add_rtn_u32 %0:v[1], %0:v[3], %0:v[4], %0:m0 gds storage:gds semantics:volatile,atomic,rmw
ds_instr = bld.ds(aco_opcode::ds_add_rtn_u32, def1, ds_instr = bld.ds(aco_opcode::ds_add_rtn_u32, def1, gds_base, chan_counter, m, 0u, 0u, true);
gds_base, chan_counter, m, 0u, 0u, true);
ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_atomicrmw); ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_atomicrmw);
//! s_waitcnt lgkmcnt(0) //! s_waitcnt lgkmcnt(0)

View file

@ -21,19 +21,18 @@
* IN THE SOFTWARE. * IN THE SOFTWARE.
* *
*/ */
#include <llvm/Config/llvm-config.h>
#include "helpers.h" #include "helpers.h"
#include "test_isel-spirv.h" #include "test_isel-spirv.h"
#include <llvm/Config/llvm-config.h>
using namespace aco; using namespace aco;
BEGIN_TEST(isel.interp.simple) BEGIN_TEST(isel.interp.simple)
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX, QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
layout(location = 0) in vec4 in_color; layout(location = 0) in vec4 in_color;
layout(location = 0) out vec4 out_color; layout(location = 0) out vec4 out_color;
void main() { void main() { out_color = in_color;
out_color = in_color;
} }
); );
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT, QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,

View file

@ -61,7 +61,8 @@ BEGIN_TEST(optimize.neg)
//! v1: %res5 = v_mul_f32 -%a, %b row_shl:1 bound_ctrl:1 //! v1: %res5 = v_mul_f32 -%a, %b row_shl:1 bound_ctrl:1
//! p_unit_test 5, %res5 //! p_unit_test 5, %res5
writeout(5, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), neg_a, inputs[1], dpp_row_sl(1))); writeout(5,
bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), neg_a, inputs[1], dpp_row_sl(1)));
//! v1: %res6 = v_subrev_f32 %a, %b //! v1: %res6 = v_subrev_f32 %a, %b
//! p_unit_test 6, %res6 //! p_unit_test 6, %res6
@ -264,7 +265,8 @@ BEGIN_TEST(optimize.output_modifiers)
finish_opt_test(); finish_opt_test();
END_TEST END_TEST
Temp create_subbrev_co(Operand op0, Operand op1, Operand op2) Temp
create_subbrev_co(Operand op0, Operand op1, Operand op2)
{ {
return bld.vop2_e64(aco_opcode::v_subbrev_co_u32, bld.def(v1), bld.def(bld.lm), op0, op1, op2); return bld.vop2_e64(aco_opcode::v_subbrev_co_u32, bld.def(v1), bld.def(bld.lm), op0, op1, op2);
} }
@ -438,7 +440,7 @@ BEGIN_TEST(optimize.bcnt)
END_TEST END_TEST
struct clamp_config { struct clamp_config {
const char *name; const char* name;
aco_opcode min, max, med3; aco_opcode min, max, med3;
Operand lb, ub; Operand lb, ub;
}; };
@ -863,7 +865,7 @@ enum denorm_op {
denorm_fnegabs = 3, denorm_fnegabs = 3,
}; };
static const char *denorm_op_names[] = { static const char* denorm_op_names[] = {
"mul1", "mul1",
"fneg", "fneg",
"fabs", "fabs",
@ -877,31 +879,27 @@ struct denorm_config {
aco_opcode dest; aco_opcode dest;
}; };
static const char *srcdest_op_name(aco_opcode op) static const char*
srcdest_op_name(aco_opcode op)
{ {
switch (op) { switch (op) {
case aco_opcode::v_cndmask_b32: case aco_opcode::v_cndmask_b32: return "cndmask";
return "cndmask"; case aco_opcode::v_min_f32: return "min";
case aco_opcode::v_min_f32: case aco_opcode::v_rcp_f32: return "rcp";
return "min"; default: return "none";
case aco_opcode::v_rcp_f32:
return "rcp";
default:
return "none";
} }
} }
static Temp emit_denorm_srcdest(aco_opcode op, Temp val) static Temp
emit_denorm_srcdest(aco_opcode op, Temp val)
{ {
switch (op) { switch (op) {
case aco_opcode::v_cndmask_b32: case aco_opcode::v_cndmask_b32:
return bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), val, inputs[1]); return bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), val, inputs[1]);
case aco_opcode::v_min_f32: case aco_opcode::v_min_f32:
return bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand::zero(), val); return bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand::zero(), val);
case aco_opcode::v_rcp_f32: case aco_opcode::v_rcp_f32: return bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), val);
return bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), val); default: return val;
default:
return val;
} }
} }
@ -917,7 +915,8 @@ BEGIN_TEST(optimize.denorm_propagation)
configs.push_back({flush, op, aco_opcode::num_opcodes, dest}); configs.push_back({flush, op, aco_opcode::num_opcodes, dest});
} }
for (aco_opcode src : {aco_opcode::v_cndmask_b32, aco_opcode::v_min_f32, aco_opcode::v_rcp_f32}) { for (aco_opcode src :
{aco_opcode::v_cndmask_b32, aco_opcode::v_min_f32, aco_opcode::v_rcp_f32}) {
for (denorm_op op : {denorm_mul1, denorm_fneg, denorm_fabs, denorm_fnegabs}) for (denorm_op op : {denorm_mul1, denorm_fneg, denorm_fabs, denorm_fnegabs})
configs.push_back({flush, op, src, aco_opcode::num_opcodes}); configs.push_back({flush, op, src, aco_opcode::num_opcodes});
} }
@ -925,18 +924,18 @@ BEGIN_TEST(optimize.denorm_propagation)
for (denorm_config cfg : configs) { for (denorm_config cfg : configs) {
char subvariant[128]; char subvariant[128];
sprintf(subvariant, "_%s_%s_%s_%s", sprintf(subvariant, "_%s_%s_%s_%s", cfg.flush ? "flush" : "keep", srcdest_op_name(cfg.src),
cfg.flush ? "flush" : "keep", srcdest_op_name(cfg.src),
denorm_op_names[(int)cfg.op], srcdest_op_name(cfg.dest)); denorm_op_names[(int)cfg.op], srcdest_op_name(cfg.dest));
if (!setup_cs("v1 s2", (amd_gfx_level)i, CHIP_UNKNOWN, subvariant)) if (!setup_cs("v1 s2", (amd_gfx_level)i, CHIP_UNKNOWN, subvariant))
continue; continue;
bool can_propagate = cfg.src == aco_opcode::v_rcp_f32 || (i >= GFX9 && cfg.src == aco_opcode::v_min_f32) || bool can_propagate = cfg.src == aco_opcode::v_rcp_f32 ||
cfg.dest == aco_opcode::v_rcp_f32 || (i >= GFX9 && cfg.dest == aco_opcode::v_min_f32) || (i >= GFX9 && cfg.src == aco_opcode::v_min_f32) ||
!cfg.flush; cfg.dest == aco_opcode::v_rcp_f32 ||
(i >= GFX9 && cfg.dest == aco_opcode::v_min_f32) || !cfg.flush;
fprintf(output, "src, dest, op: %s %s %s\n", fprintf(output, "src, dest, op: %s %s %s\n", srcdest_op_name(cfg.src),
srcdest_op_name(cfg.src), srcdest_op_name(cfg.dest), denorm_op_names[(int)cfg.op]); srcdest_op_name(cfg.dest), denorm_op_names[(int)cfg.op]);
fprintf(output, "can_propagate: %u\n", can_propagate); fprintf(output, "can_propagate: %u\n", can_propagate);
//! src, dest, op: $src $dest $op //! src, dest, op: $src $dest $op
//! can_propagate: #can_propagate //! can_propagate: #can_propagate
@ -976,15 +975,9 @@ BEGIN_TEST(optimize.denorm_propagation)
case denorm_mul1: case denorm_mul1:
val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x3f800000u), val); val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x3f800000u), val);
break; break;
case denorm_fneg: case denorm_fneg: val = fneg(val); break;
val = fneg(val); case denorm_fabs: val = fabs(val); break;
break; case denorm_fnegabs: val = fneg(fabs(val)); break;
case denorm_fabs:
val = fabs(val);
break;
case denorm_fnegabs:
val = fneg(fabs(val));
break;
} }
val = emit_denorm_srcdest(cfg.dest, val); val = emit_denorm_srcdest(cfg.dest, val);
writeout( writeout(
@ -1123,13 +1116,15 @@ BEGIN_TEST(optimize.dpp_prop)
//! v1: %res2 = v_mul_f32 0x12345678, %a //! v1: %res2 = v_mul_f32 0x12345678, %a
//! p_unit_test 2, %res2 //! p_unit_test 2, %res2
Temp literal1 = bld.copy(bld.def(v1), Operand::c32(0x12345678u)); Temp literal1 = bld.copy(bld.def(v1), Operand::c32(0x12345678u));
writeout(2, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), literal1, inputs[0], dpp_row_sl(1))); writeout(2,
bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), literal1, inputs[0], dpp_row_sl(1)));
//! v1: %literal2 = p_parallelcopy 0x12345679 //! v1: %literal2 = p_parallelcopy 0x12345679
//! v1: %res3 = v_mul_f32 %a, %literal row_shl:1 bound_ctrl:1 //! v1: %res3 = v_mul_f32 %a, %literal row_shl:1 bound_ctrl:1
//! p_unit_test 3, %res3 //! p_unit_test 3, %res3
Temp literal2 = bld.copy(bld.def(v1), Operand::c32(0x12345679u)); Temp literal2 = bld.copy(bld.def(v1), Operand::c32(0x12345679u));
writeout(3, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], literal2, dpp_row_sl(1))); writeout(3,
bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], literal2, dpp_row_sl(1)));
//! v1: %b_v = p_parallelcopy %b //! v1: %b_v = p_parallelcopy %b
//! v1: %res4 = v_mul_f32 %b, %a //! v1: %res4 = v_mul_f32 %b, %a
@ -1171,7 +1166,9 @@ BEGIN_TEST(optimize.casts)
//! v1: %res2_tmp = v_mul_f32 -1.0, %a16 //! v1: %res2_tmp = v_mul_f32 -1.0, %a16
//! v2b: %res2 = v_mul_f16 %res2_tmp, %a16 //! v2b: %res2 = v_mul_f16 %res2_tmp, %a16
//! p_unit_test 2, %res2 //! p_unit_test 2, %res2
writeout(2, fmul(u2u16(bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0xbf800000u), bld.as_uniform(a16))), a16)); writeout(2, fmul(u2u16(bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1),
Operand::c32(0xbf800000u), bld.as_uniform(a16))),
a16));
//! v1: %res3_tmp = v_mul_f32 %a, %a //! v1: %res3_tmp = v_mul_f32 %a, %a
//! v2b: %res3 = v_add_f16 %res3_tmp, 0 clamp //! v2b: %res3 = v_add_f16 %res3_tmp, 0 clamp
@ -1191,7 +1188,8 @@ BEGIN_TEST(optimize.casts)
//! v2b: %res6_tmp = v_mul_f16 %a16, %a16 //! v2b: %res6_tmp = v_mul_f16 %a16, %a16
//! v1: %res6 = v_mul_f32 2.0, %res6_tmp //! v1: %res6 = v_mul_f32 2.0, %res6_tmp
//! p_unit_test 6, %res6 //! p_unit_test 6, %res6
writeout(6, fmul(bld.as_uniform(fmul(a16, a16)), bld.copy(bld.def(v1), Operand::c32(0x40000000)))); writeout(6,
fmul(bld.as_uniform(fmul(a16, a16)), bld.copy(bld.def(v1), Operand::c32(0x40000000))));
//! v1: %res7_tmp = v_mul_f32 %a, %a //! v1: %res7_tmp = v_mul_f32 %a, %a
//! v2b: %res7 = v_add_f16 %res7_tmp, %a16 //! v2b: %res7 = v_add_f16 %res7_tmp, %a16
@ -1211,7 +1209,8 @@ BEGIN_TEST(optimize.casts)
//! v2b: %res10_tmp = v_mul_f16 %a16, %a16 //! v2b: %res10_tmp = v_mul_f16 %a16, %a16
//! v1: %res10 = v_mul_f32 -1.0, %res10_tmp //! v1: %res10 = v_mul_f32 -1.0, %res10_tmp
//! p_unit_test 10, %res10 //! p_unit_test 10, %res10
writeout(10, bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0xbf800000u), bld.as_uniform(fmul(a16, a16)))); writeout(10, bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0xbf800000u),
bld.as_uniform(fmul(a16, a16))));
finish_opt_test(); finish_opt_test();
END_TEST END_TEST
@ -1549,7 +1548,8 @@ BEGIN_TEST(optimize.mad_mix.fma.basic)
//! v1: %res2_mul = v_fma_mix_f32 lo(%a16), %b, -0 //! v1: %res2_mul = v_fma_mix_f32 lo(%a16), %b, -0
//! v1: %res2 = v_add_f32 %res2_mul, %c *2 //! v1: %res2 = v_add_f32 %res2_mul, %c *2
//! p_unit_test 2, %res2 //! p_unit_test 2, %res2
writeout(2, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000), fadd(fmul(f2f32(a16), b), c))); writeout(2, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000),
fadd(fmul(f2f32(a16), b), c)));
/* neg/abs modifiers */ /* neg/abs modifiers */
//! v1: %res3 = v_fma_mix_f32 -lo(%a16), %b, |lo(%c16)| //! v1: %res3 = v_fma_mix_f32 -lo(%a16), %b, |lo(%c16)|
@ -1730,7 +1730,8 @@ BEGIN_TEST(optimize.mad_mix.cast)
} }
END_TEST END_TEST
static void vop3p_constant(unsigned *idx, aco_opcode op, const char *swizzle, uint32_t val) static void
vop3p_constant(unsigned* idx, aco_opcode op, const char* swizzle, uint32_t val)
{ {
uint32_t halves[2] = {val & 0xffff, val >> 16}; uint32_t halves[2] = {val & 0xffff, val >> 16};
uint32_t expected = halves[swizzle[0] - 'x'] | (halves[swizzle[1] - 'x'] << 16); uint32_t expected = halves[swizzle[0] - 'x'] | (halves[swizzle[1] - 'x'] << 16);
@ -1744,7 +1745,7 @@ static void vop3p_constant(unsigned *idx, aco_opcode op, const char *swizzle, ui
BEGIN_TEST(optimize.vop3p_constants) BEGIN_TEST(optimize.vop3p_constants)
for (aco_opcode op : {aco_opcode::v_pk_add_f16, aco_opcode::v_pk_add_u16}) { for (aco_opcode op : {aco_opcode::v_pk_add_f16, aco_opcode::v_pk_add_u16}) {
for (const char *swizzle : {"xx", "yy", "xy", "yx"}) { for (const char* swizzle : {"xx", "yy", "xy", "yx"}) {
char variant[16]; char variant[16];
strcpy(variant, op == aco_opcode::v_pk_add_f16 ? "_f16" : "_u16"); strcpy(variant, op == aco_opcode::v_pk_add_f16 ? "_f16" : "_u16");
strcat(variant, "_"); strcat(variant, "_");

View file

@ -36,7 +36,7 @@ BEGIN_TEST(optimizer_postRA.vcmp)
ASSERTED bool setup_ok = setup_cs("v1", GFX8); ASSERTED bool setup_ok = setup_cs("v1", GFX8);
assert(setup_ok); assert(setup_ok);
auto &startpgm = bld.instructions->at(0); auto& startpgm = bld.instructions->at(0);
assert(startpgm->opcode == aco_opcode::p_startpgm); assert(startpgm->opcode == aco_opcode::p_startpgm);
startpgm->definitions[0].setFixed(reg_v0); startpgm->definitions[0].setFixed(reg_v0);
@ -50,8 +50,10 @@ BEGIN_TEST(optimizer_postRA.vcmp)
//! p_unit_test 0, %e:s[2-3] //! p_unit_test 0, %e:s[2-3]
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(), auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
Operand(v_in, reg_v0)); Operand(v_in, reg_v0));
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm)); auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp),
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp())); Operand(exec, bld.lm));
auto br =
bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
writeout(0, Operand(br, reg_s2)); writeout(0, Operand(br, reg_s2));
} }
@ -67,9 +69,11 @@ BEGIN_TEST(optimizer_postRA.vcmp)
//! p_unit_test 1, %e:s[2-3], %f:vcc //! p_unit_test 1, %e:s[2-3], %f:vcc
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(), auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
Operand(v_in, reg_v0)); Operand(v_in, reg_v0));
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm)); auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp),
Operand(exec, bld.lm));
auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, vcc), Operand::zero()); auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, vcc), Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp())); auto br =
bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
writeout(1, Operand(br, reg_s2), Operand(ovrwr, vcc)); writeout(1, Operand(br, reg_s2), Operand(ovrwr, vcc));
} }
@ -85,9 +89,11 @@ BEGIN_TEST(optimizer_postRA.vcmp)
//! p_unit_test 1, %e:s[2-3], %f:vcc //! p_unit_test 1, %e:s[2-3], %f:vcc
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(), auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
Operand(v_in, reg_v0)); Operand(v_in, reg_v0));
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm)); auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp),
Operand(exec, bld.lm));
auto ovrwr = bld.sop1(aco_opcode::s_mov_b32, bld.def(s1, vcc_hi), Operand::zero()); auto ovrwr = bld.sop1(aco_opcode::s_mov_b32, bld.def(s1, vcc_hi), Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp())); auto br =
bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
writeout(1, Operand(br, reg_s2), Operand(ovrwr, vcc)); writeout(1, Operand(br, reg_s2), Operand(ovrwr, vcc));
} }
@ -102,8 +108,10 @@ BEGIN_TEST(optimizer_postRA.vcmp)
//! p_unit_test 2, %e:s[2-3] //! p_unit_test 2, %e:s[2-3]
auto vcmp = bld.vopc_e64(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, reg_s4), Operand::zero(), auto vcmp = bld.vopc_e64(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, reg_s4), Operand::zero(),
Operand(v_in, reg_v0)); Operand(v_in, reg_v0));
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), Operand(vcmp, reg_s4), Operand(exec, bld.lm)); auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc),
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp())); Operand(vcmp, reg_s4), Operand(exec, bld.lm));
auto br =
bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
writeout(2, Operand(br, reg_s2)); writeout(2, Operand(br, reg_s2));
} }
@ -116,10 +124,12 @@ BEGIN_TEST(optimizer_postRA.vcmp)
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
//! s2: %e:s[2-3] = p_cbranch_z %d:scc //! s2: %e:s[2-3] = p_cbranch_z %d:scc
//! p_unit_test 2, %e:s[2-3] //! p_unit_test 2, %e:s[2-3]
auto salu = bld.sop2(Builder::s_or, bld.def(bld.lm, vcc), bld.def(s1, scc), auto salu = bld.sop2(Builder::s_or, bld.def(bld.lm, vcc), bld.def(s1, scc), Operand::c32(1u),
Operand::c32(1u), Operand(reg_s4, bld.lm)); Operand(reg_s4, bld.lm));
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), Operand(salu, vcc), Operand(exec, bld.lm)); auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc),
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp())); Operand(salu, vcc), Operand(exec, bld.lm));
auto br =
bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
writeout(2, Operand(br, reg_s2)); writeout(2, Operand(br, reg_s2));
} }
@ -135,9 +145,11 @@ BEGIN_TEST(optimizer_postRA.vcmp)
//! p_unit_test 4, %e:s[2-3], %f:exec //! p_unit_test 4, %e:s[2-3], %f:exec
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(), auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
Operand(v_in, reg_v0)); Operand(v_in, reg_v0));
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm)); auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp),
Operand(exec, bld.lm));
auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, exec), Operand::c32(42u)); auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, exec), Operand::c32(42u));
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp())); auto br =
bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
writeout(4, Operand(br, reg_s2), Operand(ovrwr, exec)); writeout(4, Operand(br, reg_s2), Operand(ovrwr, exec));
} }
@ -304,7 +316,8 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_opt)
Operand::c32(0x40018u)); Operand::c32(0x40018u));
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero()); Operand::zero());
auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0), Operand(op_in_2), bld.scc(scmp)); auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0),
Operand(op_in_2), bld.scc(scmp));
writeout(6, Operand(br, reg_s4)); writeout(6, Operand(br, reg_s4));
} }
@ -324,7 +337,8 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_opt)
Operand::c32(1u)); Operand::c32(1u));
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero()); Operand::zero());
auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0), Operand(op_in_2), bld.scc(scmp)); auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0),
Operand(op_in_2), bld.scc(scmp));
writeout(7, Operand(br, reg_s4), Operand(ovrw, reg_s3)); writeout(7, Operand(br, reg_s4), Operand(ovrw, reg_s3));
} }
@ -368,7 +382,8 @@ BEGIN_TEST(optimizer_postRA.dpp)
//! v1: %res2:v[2] = v_sub_f32 %b:v[1], %tmp2:v[2] row_half_mirror bound_ctrl:1 //! v1: %res2:v[2] = v_sub_f32 %b:v[1], %tmp2:v[2] row_half_mirror bound_ctrl:1
//! p_unit_test 2, %res2:v[2] //! p_unit_test 2, %res2:v[2]
Temp tmp2 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); Temp tmp2 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
Temp res2 = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp2, reg_v2), dpp_row_half_mirror); Temp res2 = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp2, reg_v2),
dpp_row_half_mirror);
writeout(2, Operand(res2, reg_v2)); writeout(2, Operand(res2, reg_v2));
/* modifiers */ /* modifiers */
@ -429,14 +444,16 @@ BEGIN_TEST(optimizer_postRA.dpp)
//! v1: %res8:v[2] = v_cndmask_b32 %a:v[0], %b:v[1], %c:vcc row_mirror bound_ctrl:1 //! v1: %res8:v[2] = v_cndmask_b32 %a:v[0], %b:v[1], %c:vcc row_mirror bound_ctrl:1
//! p_unit_test 8, %res8:v[2] //! p_unit_test 8, %res8:v[2]
Temp tmp8 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); Temp tmp8 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
Temp res8 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp8, reg_v2), b, c); Temp res8 =
bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp8, reg_v2), b, c);
writeout(8, Operand(res8, reg_v2)); writeout(8, Operand(res8, reg_v2));
//! v1: %tmp9:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 //! v1: %tmp9:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1
//! v1: %res9:v[2] = v_cndmask_b32 %tmp9:v[2], %b:v[1], %d:s[0-1] //! v1: %res9:v[2] = v_cndmask_b32 %tmp9:v[2], %b:v[1], %d:s[0-1]
//! p_unit_test 9, %res9:v[2] //! p_unit_test 9, %res9:v[2]
Temp tmp9 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); Temp tmp9 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
Temp res9 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp9, reg_v2), b, d); Temp res9 =
bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp9, reg_v2), b, d);
writeout(9, Operand(res9, reg_v2)); writeout(9, Operand(res9, reg_v2));
/* control flow */ /* control flow */
@ -492,7 +509,10 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf)
//! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec //! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec
//! s2: %0:vcc = p_cbranch_nz BB1, BB2 //! s2: %0:vcc = p_cbranch_nz BB1, BB2
emit_divergent_if_else(program.get(), bld, e, [&]() -> void { emit_divergent_if_else(
program.get(), bld, e,
[&]() -> void
{
/* --- logical then --- */ /* --- logical then --- */
//! BB1 //! BB1
//! /* logical preds: BB0, / linear preds: BB0, / kind: */ //! /* logical preds: BB0, / linear preds: BB0, / kind: */
@ -514,7 +534,9 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf)
//! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */ //! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
//! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec //! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
//! s2: %0:vcc = p_cbranch_nz BB4, BB5 //! s2: %0:vcc = p_cbranch_nz BB4, BB5
}, [&]() -> void { },
[&]() -> void
{
/* --- logical else --- */ /* --- logical else --- */
//! BB4 //! BB4
//! /* logical preds: BB0, / linear preds: BB3, / kind: */ //! /* logical preds: BB0, / linear preds: BB3, / kind: */
@ -535,7 +557,8 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf)
//! v1: %res10:v[12] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1 //! v1: %res10:v[12] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1
//! p_unit_test 10, %res10:v[12] //! p_unit_test 10, %res10:v[12]
Temp result = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v12), Operand(dpp_tmp, reg_v12), b); Temp result =
bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v12), Operand(dpp_tmp, reg_v12), b);
writeout(10, Operand(result, reg_v12)); writeout(10, Operand(result, reg_v12));
finish_optimizer_postRA_test(); finish_optimizer_postRA_test();
@ -568,7 +591,10 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf_overwritten)
//! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec //! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec
//! s2: %0:vcc = p_cbranch_nz BB1, BB2 //! s2: %0:vcc = p_cbranch_nz BB1, BB2
emit_divergent_if_else(program.get(), bld, e, [&]() -> void { emit_divergent_if_else(
program.get(), bld, e,
[&]() -> void
{
/* --- logical then --- */ /* --- logical then --- */
//! BB1 //! BB1
//! /* logical preds: BB0, / linear preds: BB0, / kind: */ //! /* logical preds: BB0, / linear preds: BB0, / kind: */
@ -578,7 +604,8 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf_overwritten)
Temp addr = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(v1, a.physReg()), f); Temp addr = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(v1, a.physReg()), f);
//! buffer_store_dword %addr:v[0], 0, %d:v[3], 0 offen //! buffer_store_dword %addr:v[0], 0, %d:v[3], 0 offen
bld.mubuf(aco_opcode::buffer_store_dword, Operand(addr, a.physReg()), Operand::zero(), d, Operand::zero(), 0, true); bld.mubuf(aco_opcode::buffer_store_dword, Operand(addr, a.physReg()), Operand::zero(), d,
Operand::zero(), 0, true);
//! p_logical_end //! p_logical_end
//! s2: %0:vcc = p_branch BB3 //! s2: %0:vcc = p_branch BB3
@ -593,7 +620,9 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf_overwritten)
//! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */ //! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
//! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec //! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
//! s2: %0:vcc = p_cbranch_nz BB4, BB5 //! s2: %0:vcc = p_cbranch_nz BB4, BB5
}, [&]() -> void { },
[&]() -> void
{
/* --- logical else --- */ /* --- logical else --- */
//! BB4 //! BB4
//! /* logical preds: BB0, / linear preds: BB3, / kind: */ //! /* logical preds: BB0, / linear preds: BB3, / kind: */
@ -613,7 +642,8 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf_overwritten)
//! s2: %0:exec = p_parallelcopy %saved_exec:s[84-85] //! s2: %0:exec = p_parallelcopy %saved_exec:s[84-85]
//! v1: %result:v[12] = v_add_f32 %dpp_mov_tmp:v[12], %b:v[1] //! v1: %result:v[12] = v_add_f32 %dpp_mov_tmp:v[12], %b:v[1]
Temp result = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v12), Operand(dpp_tmp, reg_v12), b); Temp result =
bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v12), Operand(dpp_tmp, reg_v12), b);
//! p_unit_test 10, %result:v[12] //! p_unit_test 10, %result:v[12]
writeout(10, Operand(result, reg_v12)); writeout(10, Operand(result, reg_v12));
@ -643,7 +673,10 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf)
//! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec //! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec
//! s2: %0:vcc = p_cbranch_nz BB1, BB2 //! s2: %0:vcc = p_cbranch_nz BB1, BB2
emit_divergent_if_else(program.get(), bld, e, [&]() -> void { emit_divergent_if_else(
program.get(), bld, e,
[&]() -> void
{
/* --- logical then --- */ /* --- logical then --- */
//! BB1 //! BB1
//! /* logical preds: BB0, / linear preds: BB0, / kind: */ //! /* logical preds: BB0, / linear preds: BB0, / kind: */
@ -665,7 +698,9 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf)
//! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */ //! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
//! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec //! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
//! s2: %0:vcc = p_cbranch_nz BB4, BB5 //! s2: %0:vcc = p_cbranch_nz BB4, BB5
}, [&]() -> void { },
[&]() -> void
{
/* --- logical else --- */ /* --- logical else --- */
//! BB4 //! BB4
//! /* logical preds: BB0, / linear preds: BB3, / kind: */ //! /* logical preds: BB0, / linear preds: BB3, / kind: */
@ -695,7 +730,6 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf)
finish_optimizer_postRA_test(); finish_optimizer_postRA_test();
END_TEST END_TEST
BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf_partially_overwritten) BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf_partially_overwritten)
//>> s2: %a:s[2-3], v1: %c:v[2], v1: %d:v[3], s2: %e:s[0-1], s1: %f:s[4] = p_startpgm //>> s2: %a:s[2-3], v1: %c:v[2], v1: %d:v[3], s2: %e:s[0-1], s1: %f:s[4] = p_startpgm
if (!setup_cs("s2 v1 v1 s2 s1", GFX10_3)) if (!setup_cs("s2 v1 v1 s2 s1", GFX10_3))
@ -723,7 +757,10 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf_partially_overwritten)
//! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec //! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec
//! s2: %0:vcc = p_cbranch_nz BB1, BB2 //! s2: %0:vcc = p_cbranch_nz BB1, BB2
emit_divergent_if_else(program.get(), bld, e, [&]() -> void { emit_divergent_if_else(
program.get(), bld, e,
[&]() -> void
{
/* --- logical then --- */ /* --- logical then --- */
//! BB1 //! BB1
//! /* logical preds: BB0, / linear preds: BB0, / kind: */ //! /* logical preds: BB0, / linear preds: BB0, / kind: */
@ -733,7 +770,8 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf_partially_overwritten)
Temp s_addr = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(s1, reg_s3), f); Temp s_addr = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(s1, reg_s3), f);
//! buffer_store_dword %c:v[2], %ovrwr:s[3], %d:v[3], 0 offen //! buffer_store_dword %c:v[2], %ovrwr:s[3], %d:v[3], 0 offen
bld.mubuf(aco_opcode::buffer_store_dword, c, Operand(s_addr, reg_s3), d, Operand::zero(), 0, true); bld.mubuf(aco_opcode::buffer_store_dword, c, Operand(s_addr, reg_s3), d, Operand::zero(),
0, true);
//! p_logical_end //! p_logical_end
//! s2: %0:vcc = p_branch BB3 //! s2: %0:vcc = p_branch BB3
@ -748,7 +786,9 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf_partially_overwritten)
//! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */ //! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
//! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec //! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
//! s2: %0:vcc = p_cbranch_nz BB4, BB5 //! s2: %0:vcc = p_cbranch_nz BB4, BB5
}, [&]() -> void { },
[&]() -> void
{
/* --- logical else --- */ /* --- logical else --- */
//! BB4 //! BB4
//! /* logical preds: BB0, / linear preds: BB3, / kind: */ //! /* logical preds: BB0, / linear preds: BB3, / kind: */

View file

@ -36,19 +36,24 @@ BEGIN_TEST(setup_reduce_temp.divergent_if_phi)
* } * }
* ... = phi ... * ... = phi ...
*/ */
//TODO: fix the RA validator to spot this // TODO: fix the RA validator to spot this
//>> s2: %_, v1: %a = p_startpgm //>> s2: %_, v1: %a = p_startpgm
if (!setup_cs("s2 v1", GFX9)) if (!setup_cs("s2 v1", GFX9))
return; return;
//>> lv1: %lv = p_start_linear_vgpr //>> lv1: %lv = p_start_linear_vgpr
emit_divergent_if_else(program.get(), bld, Operand(inputs[0]), [&]() -> void { emit_divergent_if_else(
program.get(), bld, Operand(inputs[0]),
[&]() -> void
{
//>> s1: %_, s2: %_, s1: %_:scc = p_reduce %a, %lv, lv1: undef op:umin32 cluster_size:64 //>> s1: %_, s2: %_, s1: %_:scc = p_reduce %a, %lv, lv1: undef op:umin32 cluster_size:64
Instruction* reduce = bld.reduction(aco_opcode::p_reduce, bld.def(s1), Instruction* reduce =
bld.def(bld.lm), bld.def(s1, scc), inputs[1], bld.reduction(aco_opcode::p_reduce, bld.def(s1), bld.def(bld.lm), bld.def(s1, scc),
Operand(v1.as_linear()), Operand(v1.as_linear()), umin32); inputs[1], Operand(v1.as_linear()), Operand(v1.as_linear()), umin32);
reduce->reduction().cluster_size = bld.lm.bytes() * 8; reduce->reduction().cluster_size = bld.lm.bytes() * 8;
}, [&]() -> void { },
[&]() -> void
{
/* nothing */ /* nothing */
}); });
bld.pseudo(aco_opcode::p_phi, bld.def(v1), Operand::c32(1), Operand::zero()); bld.pseudo(aco_opcode::p_phi, bld.def(v1), Operand::c32(1), Operand::zero());

View file

@ -37,7 +37,7 @@ BEGIN_TEST(regalloc.subdword_alloc.reuse_16bit_operands)
/* TODO: is this possible to do on GFX11? */ /* TODO: is this possible to do on GFX11? */
for (amd_gfx_level cc = GFX8; cc <= GFX10_3; cc = (amd_gfx_level)((unsigned)cc + 1)) { for (amd_gfx_level cc = GFX8; cc <= GFX10_3; cc = (amd_gfx_level)((unsigned)cc + 1)) {
for (bool pessimistic : { false, true }) { for (bool pessimistic : {false, true}) {
const char* subvariant = pessimistic ? "/pessimistic" : "/optimistic"; const char* subvariant = pessimistic ? "/pessimistic" : "/optimistic";
//>> v1: %_:v[#a] = p_startpgm //>> v1: %_:v[#a] = p_startpgm
@ -45,7 +45,8 @@ BEGIN_TEST(regalloc.subdword_alloc.reuse_16bit_operands)
return; return;
//! v2b: %_:v[#a][0:16], v2b: %res1:v[#a][16:32] = p_split_vector %_:v[#a] //! v2b: %_:v[#a][0:16], v2b: %res1:v[#a][16:32] = p_split_vector %_:v[#a]
Builder::Result tmp = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]); Builder::Result tmp =
bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]);
//! v1: %_:v[#b] = v_cvt_f32_f16 %_:v[#a][16:32] dst_sel:dword src0_sel:uword1 //! v1: %_:v[#b] = v_cvt_f32_f16 %_:v[#a][16:32] dst_sel:dword src0_sel:uword1
//! v1: %_:v[#a] = v_cvt_f32_f16 %_:v[#a][0:16] //! v1: %_:v[#a] = v_cvt_f32_f16 %_:v[#a][0:16]
@ -55,7 +56,7 @@ BEGIN_TEST(regalloc.subdword_alloc.reuse_16bit_operands)
writeout(0, result1); writeout(0, result1);
writeout(1, result2); writeout(1, result2);
finish_ra_test(ra_test_policy { pessimistic }); finish_ra_test(ra_test_policy{pessimistic});
} }
} }
END_TEST END_TEST
@ -67,7 +68,8 @@ BEGIN_TEST(regalloc._32bit_partial_write)
/* ensure high 16 bits are occupied */ /* ensure high 16 bits are occupied */
//! v2b: %_:v[0][0:16], v2b: %_:v[0][16:32] = p_split_vector %_:v[0] //! v2b: %_:v[0][0:16], v2b: %_:v[0][16:32] = p_split_vector %_:v[0]
Temp hi = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]).def(1).getTemp(); Temp hi =
bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]).def(1).getTemp();
/* This test checks if this instruction uses SDWA. */ /* This test checks if this instruction uses SDWA. */
//! v2b: %_:v[0][0:16] = v_not_b32 0 dst_sel:uword0 dst_preserve src0_sel:dword //! v2b: %_:v[0][0:16] = v_not_b32 0 dst_sel:uword0 dst_preserve src0_sel:dword
@ -168,9 +170,9 @@ BEGIN_TEST(regalloc.precolor.multiple_operands)
//! v1: %tmp3_2:v[0], v1: %tmp0_2:v[1], v1: %tmp1_2:v[2], v1: %tmp2_2:v[3] = p_parallelcopy %tmp3:v[3], %tmp0:v[0], %tmp1:v[1], %tmp2:v[2] //! v1: %tmp3_2:v[0], v1: %tmp0_2:v[1], v1: %tmp1_2:v[2], v1: %tmp2_2:v[3] = p_parallelcopy %tmp3:v[3], %tmp0:v[0], %tmp1:v[1], %tmp2:v[2]
//! p_unit_test %tmp3_2:v[0], %tmp0_2:v[1], %tmp1_2:v[2], %tmp2_2:v[3] //! p_unit_test %tmp3_2:v[0], %tmp0_2:v[1], %tmp1_2:v[2], %tmp2_2:v[3]
bld.pseudo(aco_opcode::p_unit_test, Operand(inputs[3], PhysReg(256+0)), bld.pseudo(aco_opcode::p_unit_test, Operand(inputs[3], PhysReg(256 + 0)),
Operand(inputs[0], PhysReg(256+1)), Operand(inputs[1], PhysReg(256+2)), Operand(inputs[0], PhysReg(256 + 1)), Operand(inputs[1], PhysReg(256 + 2)),
Operand(inputs[2], PhysReg(256+3))); Operand(inputs[2], PhysReg(256 + 3)));
finish_ra_test(ra_test_policy()); finish_ra_test(ra_test_policy());
END_TEST END_TEST
@ -182,8 +184,8 @@ BEGIN_TEST(regalloc.precolor.different_regs)
//! v1: %tmp1:v[1], v1: %tmp2:v[2] = p_parallelcopy %tmp0:v[0], %tmp0:v[0] //! v1: %tmp1:v[1], v1: %tmp2:v[2] = p_parallelcopy %tmp0:v[0], %tmp0:v[0]
//! p_unit_test %tmp0:v[0], %tmp1:v[1], %tmp2:v[2] //! p_unit_test %tmp0:v[0], %tmp1:v[1], %tmp2:v[2]
bld.pseudo(aco_opcode::p_unit_test, Operand(inputs[0], PhysReg(256+0)), bld.pseudo(aco_opcode::p_unit_test, Operand(inputs[0], PhysReg(256 + 0)),
Operand(inputs[0], PhysReg(256+1)), Operand(inputs[0], PhysReg(256+2))); Operand(inputs[0], PhysReg(256 + 1)), Operand(inputs[0], PhysReg(256 + 2)));
finish_ra_test(ra_test_policy()); finish_ra_test(ra_test_policy());
END_TEST END_TEST
@ -256,7 +258,8 @@ BEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_reg_impl)
//! s1: %scc_tmp:scc, s1: %1:s[0] = p_unit_test //! s1: %scc_tmp:scc, s1: %1:s[0] = p_unit_test
Temp s0_tmp = bld.tmp(s1); Temp s0_tmp = bld.tmp(s1);
Temp scc_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(s1, scc), Definition(s0_tmp.id(), PhysReg{0}, s1)); Temp scc_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(s1, scc),
Definition(s0_tmp.id(), PhysReg{0}, s1));
//! lv1: %tmp1:v[1] = p_unit_test //! lv1: %tmp1:v[1] = p_unit_test
Temp tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v1)); Temp tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v1));
@ -273,7 +276,8 @@ BEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_reg_impl)
//>> lv1: %5:v[2] = p_parallelcopy %3:v[1] scc:1 scratch:s1 //>> lv1: %5:v[2] = p_parallelcopy %3:v[1] scc:1 scratch:s1
Pseudo_instruction& parallelcopy = program->blocks[0].instructions[3]->pseudo(); Pseudo_instruction& parallelcopy = program->blocks[0].instructions[3]->pseudo();
aco_print_instr(program->gfx_level, &parallelcopy, output); aco_print_instr(program->gfx_level, &parallelcopy, output);
fprintf(output, " scc:%u scratch:s%u\n", parallelcopy.tmp_in_scc, parallelcopy.scratch_sgpr.reg()); fprintf(output, " scc:%u scratch:s%u\n", parallelcopy.tmp_in_scc,
parallelcopy.scratch_sgpr.reg());
END_TEST END_TEST
BEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_regs_for_copies) BEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_regs_for_copies)
@ -392,13 +396,15 @@ BEGIN_TEST(regalloc.vinterp_fp16)
//! v1: %tmp0:v[1] = v_interp_p10_f16_f32_inreg %lo:v[3][0:16], %in1:v[1], hi(%hi:v[3][16:32]) //! v1: %tmp0:v[1] = v_interp_p10_f16_f32_inreg %lo:v[3][0:16], %in1:v[1], hi(%hi:v[3][16:32])
//! p_unit_test %tmp0:v[1] //! p_unit_test %tmp0:v[1]
Temp tmp0 = bld.vinterp_inreg(aco_opcode::v_interp_p10_f16_f32_inreg, bld.def(v1), lo, inputs[1], hi); Temp tmp0 =
bld.vinterp_inreg(aco_opcode::v_interp_p10_f16_f32_inreg, bld.def(v1), lo, inputs[1], hi);
bld.pseudo(aco_opcode::p_unit_test, tmp0); bld.pseudo(aco_opcode::p_unit_test, tmp0);
//! v2b: %tmp1:v[0][16:32] = v_interp_p2_f16_f32_inreg %in0:v[0], %in2:v[2], %tmp0:v[1] opsel_hi //! v2b: %tmp1:v[0][16:32] = v_interp_p2_f16_f32_inreg %in0:v[0], %in2:v[2], %tmp0:v[1] opsel_hi
//! v1: %tmp2:v[0] = p_create_vector 0, %tmp1:v[0][16:32] //! v1: %tmp2:v[0] = p_create_vector 0, %tmp1:v[0][16:32]
//! p_unit_test %tmp2:v[0] //! p_unit_test %tmp2:v[0]
Temp tmp1 = bld.vinterp_inreg(aco_opcode::v_interp_p2_f16_f32_inreg, bld.def(v2b), inputs[0], inputs[2], tmp0); Temp tmp1 = bld.vinterp_inreg(aco_opcode::v_interp_p2_f16_f32_inreg, bld.def(v2b), inputs[0],
inputs[2], tmp0);
Temp tmp2 = bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), Operand::zero(2), tmp1); Temp tmp2 = bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), Operand::zero(2), tmp1);
bld.pseudo(aco_opcode::p_unit_test, tmp2); bld.pseudo(aco_opcode::p_unit_test, tmp2);

View file

@ -34,7 +34,8 @@ BEGIN_TEST(validate.sdwa.allow)
//>> Validation results: //>> Validation results:
//! Validation passed //! Validation passed
SDWA_instruction *sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1])->sdwa(); SDWA_instruction* sdwa =
&bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1])->sdwa();
sdwa->neg[0] = sdwa->neg[1] = sdwa->abs[0] = sdwa->abs[1] = true; sdwa->neg[0] = sdwa->neg[1] = sdwa->abs[0] = sdwa->abs[1] = true;
bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1b), inputs[0], inputs[1]); bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1b), inputs[0], inputs[1]);
@ -105,7 +106,9 @@ BEGIN_TEST(validate.sdwa.vopc)
bld.vopc_sdwa(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), inputs[0], inputs[1]); bld.vopc_sdwa(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), inputs[0], inputs[1]);
//~gfx(9|10)! SDWA VOPC clamp only supported on GFX8: s2: %_:vcc = v_cmp_eq_f32 %vgpr0, %vgpr1 clamp src0_sel:dword src1_sel:dword //~gfx(9|10)! SDWA VOPC clamp only supported on GFX8: s2: %_:vcc = v_cmp_eq_f32 %vgpr0, %vgpr1 clamp src0_sel:dword src1_sel:dword
bld.vopc_sdwa(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm, vcc), inputs[0], inputs[1])->sdwa().clamp = true; bld.vopc_sdwa(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm, vcc), inputs[0], inputs[1])
->sdwa()
.clamp = true;
//! Validation failed //! Validation failed
@ -138,11 +141,13 @@ BEGIN_TEST(validate.sdwa.vcc)
//! 3rd operand must be fixed to vcc with SDWA: v1: %_ = v_cndmask_b32 %vgpr0, %vgpr1, %_ dst_sel:dword src0_sel:dword src1_sel:dword //! 3rd operand must be fixed to vcc with SDWA: v1: %_ = v_cndmask_b32 %vgpr0, %vgpr1, %_ dst_sel:dword src0_sel:dword src1_sel:dword
bld.vop2_sdwa(aco_opcode::v_cndmask_b32, bld.def(v1), inputs[0], inputs[1], inputs[2]); bld.vop2_sdwa(aco_opcode::v_cndmask_b32, bld.def(v1), inputs[0], inputs[1], inputs[2]);
bld.vop2_sdwa(aco_opcode::v_cndmask_b32, bld.def(v1), inputs[0], inputs[1], bld.vcc(inputs[2])); bld.vop2_sdwa(aco_opcode::v_cndmask_b32, bld.def(v1), inputs[0], inputs[1],
bld.vcc(inputs[2]));
//! 2nd definition must be fixed to vcc with SDWA: v1: %_, s2: %_ = v_add_co_u32 %vgpr0, %vgpr1 dst_sel:dword src0_sel:dword src1_sel:dword //! 2nd definition must be fixed to vcc with SDWA: v1: %_, s2: %_ = v_add_co_u32 %vgpr0, %vgpr1 dst_sel:dword src0_sel:dword src1_sel:dword
bld.vop2_sdwa(aco_opcode::v_add_co_u32, bld.def(v1), bld.def(bld.lm), inputs[0], inputs[1]); bld.vop2_sdwa(aco_opcode::v_add_co_u32, bld.def(v1), bld.def(bld.lm), inputs[0], inputs[1]);
bld.vop2_sdwa(aco_opcode::v_add_co_u32, bld.def(v1), bld.def(bld.lm, vcc), inputs[0], inputs[1]); bld.vop2_sdwa(aco_opcode::v_add_co_u32, bld.def(v1), bld.def(bld.lm, vcc), inputs[0],
inputs[1]);
//! Validation failed //! Validation failed
@ -154,7 +159,8 @@ BEGIN_TEST(optimize.sdwa.extract)
for (unsigned i = GFX7; i <= GFX10; i++) { for (unsigned i = GFX7; i <= GFX10; i++) {
for (unsigned is_signed = 0; is_signed <= 1; is_signed++) { for (unsigned is_signed = 0; is_signed <= 1; is_signed++) {
//>> v1: %a, v1: %b, s1: %c, s1: %d = p_startpgm //>> v1: %a, v1: %b, s1: %c, s1: %d = p_startpgm
if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i, CHIP_UNKNOWN, is_signed ? "_signed" : "_unsigned")) if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i, CHIP_UNKNOWN,
is_signed ? "_signed" : "_unsigned"))
continue; continue;
//; def standard_test(index, sel): //; def standard_test(index, sel):
@ -168,28 +174,28 @@ BEGIN_TEST(optimize.sdwa.extract)
{ {
//~gfx[^7].*! @standard_test(0,byte0) //~gfx[^7].*! @standard_test(0,byte0)
Temp bfe_byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u), Temp bfe_byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(),
Operand::c32(is_signed)); Operand::c32(8u), Operand::c32(is_signed));
writeout(0, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte0_b)); writeout(0, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte0_b));
//~gfx[^7].*! @standard_test(1,byte1) //~gfx[^7].*! @standard_test(1,byte1)
Temp bfe_byte1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u), Operand::c32(8u), Temp bfe_byte1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u),
Operand::c32(is_signed)); Operand::c32(8u), Operand::c32(is_signed));
writeout(1, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte1_b)); writeout(1, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte1_b));
//~gfx[^7].*! @standard_test(2,byte2) //~gfx[^7].*! @standard_test(2,byte2)
Temp bfe_byte2_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(2u), Operand::c32(8u), Temp bfe_byte2_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(2u),
Operand::c32(is_signed)); Operand::c32(8u), Operand::c32(is_signed));
writeout(2, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte2_b)); writeout(2, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte2_b));
//~gfx[^7].*! @standard_test(3,byte3) //~gfx[^7].*! @standard_test(3,byte3)
Temp bfe_byte3_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(3u), Operand::c32(8u), Temp bfe_byte3_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(3u),
Operand::c32(is_signed)); Operand::c32(8u), Operand::c32(is_signed));
writeout(3, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte3_b)); writeout(3, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte3_b));
//~gfx[^7].*! @standard_test(4,word0) //~gfx[^7].*! @standard_test(4,word0)
Temp bfe_word0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(16u), Temp bfe_word0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(),
Operand::c32(is_signed)); Operand::c32(16u), Operand::c32(is_signed));
writeout(4, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_word0_b)); writeout(4, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_word0_b));
//~gfx[^7].*! @standard_test(5,word1) //~gfx[^7].*! @standard_test(5,word1)
@ -198,7 +204,8 @@ BEGIN_TEST(optimize.sdwa.extract)
writeout(5, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_word1_b)); writeout(5, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_word1_b));
//~gfx[^7]_unsigned! @standard_test(6,byte0) //~gfx[^7]_unsigned! @standard_test(6,byte0)
Temp bfi_byte0_b = bld.pseudo(ins, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u)); Temp bfi_byte0_b =
bld.pseudo(ins, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u));
writeout(6, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfi_byte0_b)); writeout(6, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfi_byte0_b));
//~gfx[^7]_unsigned! @standard_test(7,word0) //~gfx[^7]_unsigned! @standard_test(7,word0)
@ -224,8 +231,8 @@ BEGIN_TEST(optimize.sdwa.extract)
//~gfx[^7]+_signed! v1: %res9 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte0 //~gfx[^7]+_signed! v1: %res9 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte0
//~gfx\d+_unsigned! v1: %res9 = v_cvt_f32_ubyte0 %b //~gfx\d+_unsigned! v1: %res9 = v_cvt_f32_ubyte0 %b
//! p_unit_test 9, %res9 //! p_unit_test 9, %res9
Temp bfe_byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u), Temp bfe_byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(),
Operand::c32(is_signed)); Operand::c32(8u), Operand::c32(is_signed));
writeout(9, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte0_b)); writeout(9, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte0_b));
//~gfx7_signed! v1: %bfe_byte1_b = p_extract %b, 1, 8, 1 //~gfx7_signed! v1: %bfe_byte1_b = p_extract %b, 1, 8, 1
@ -233,8 +240,8 @@ BEGIN_TEST(optimize.sdwa.extract)
//~gfx[^7]+_signed! v1: %res10 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte1 //~gfx[^7]+_signed! v1: %res10 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte1
//~gfx\d+_unsigned! v1: %res10 = v_cvt_f32_ubyte1 %b //~gfx\d+_unsigned! v1: %res10 = v_cvt_f32_ubyte1 %b
//! p_unit_test 10, %res10 //! p_unit_test 10, %res10
Temp bfe_byte1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u), Operand::c32(8u), Temp bfe_byte1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u),
Operand::c32(is_signed)); Operand::c32(8u), Operand::c32(is_signed));
writeout(10, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte1_b)); writeout(10, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte1_b));
//~gfx7_signed! v1: %bfe_byte2_b = p_extract %b, 2, 8, 1 //~gfx7_signed! v1: %bfe_byte2_b = p_extract %b, 2, 8, 1
@ -242,8 +249,8 @@ BEGIN_TEST(optimize.sdwa.extract)
//~gfx[^7]+_signed! v1: %res11 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte2 //~gfx[^7]+_signed! v1: %res11 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte2
//~gfx\d+_unsigned! v1: %res11 = v_cvt_f32_ubyte2 %b //~gfx\d+_unsigned! v1: %res11 = v_cvt_f32_ubyte2 %b
//! p_unit_test 11, %res11 //! p_unit_test 11, %res11
Temp bfe_byte2_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(2u), Operand::c32(8u), Temp bfe_byte2_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(2u),
Operand::c32(is_signed)); Operand::c32(8u), Operand::c32(is_signed));
writeout(11, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte2_b)); writeout(11, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte2_b));
//~gfx7_signed! v1: %bfe_byte3_b = p_extract %b, 3, 8, 1 //~gfx7_signed! v1: %bfe_byte3_b = p_extract %b, 3, 8, 1
@ -251,15 +258,15 @@ BEGIN_TEST(optimize.sdwa.extract)
//~gfx[^7]+_signed! v1: %res12 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte3 //~gfx[^7]+_signed! v1: %res12 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte3
//~gfx\d+_unsigned! v1: %res12 = v_cvt_f32_ubyte3 %b //~gfx\d+_unsigned! v1: %res12 = v_cvt_f32_ubyte3 %b
//! p_unit_test 12, %res12 //! p_unit_test 12, %res12
Temp bfe_byte3_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(3u), Operand::c32(8u), Temp bfe_byte3_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(3u),
Operand::c32(is_signed)); Operand::c32(8u), Operand::c32(is_signed));
writeout(12, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte3_b)); writeout(12, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte3_b));
/* VOP3-only instructions can't use SDWA but they can use opsel on GFX9+ instead */ /* VOP3-only instructions can't use SDWA but they can use opsel on GFX9+ instead */
//~gfx(9|10).*! v1: %res13 = v_add_i16 %a, %b //~gfx(9|10).*! v1: %res13 = v_add_i16 %a, %b
//~gfx(9|10).*! p_unit_test 13, %res13 //~gfx(9|10).*! p_unit_test 13, %res13
Temp bfe_word0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(16u), Temp bfe_word0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(),
Operand::c32(is_signed)); Operand::c32(16u), Operand::c32(is_signed));
writeout(13, bld.vop3(aco_opcode::v_add_i16, bld.def(v1), inputs[0], bfe_word0_b)); writeout(13, bld.vop3(aco_opcode::v_add_i16, bld.def(v1), inputs[0], bfe_word0_b));
//~gfx(9|10).*! v1: %res14 = v_add_i16 %a, hi(%b) //~gfx(9|10).*! v1: %res14 = v_add_i16 %a, hi(%b)

View file

@ -52,8 +52,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
bld.pseudo(aco_opcode::p_parallelcopy, bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v1_lo, v2b),
Definition(v0_lo, v2b), Definition(v1_lo, v2b),
Operand(v1_lo, v2b), Operand(v0_lo, v2b)); Operand(v1_lo, v2b), Operand(v0_lo, v2b));
//~gfx[67]! p_unit_test 1 //~gfx[67]! p_unit_test 1
@ -61,9 +60,8 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2
//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
bld.pseudo(aco_opcode::p_create_vector, bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v1), Operand(v1_lo, v2b),
Definition(v0_lo, v1), Operand(v0_lo, v2b));
Operand(v1_lo, v2b), Operand(v0_lo, v2b));
//~gfx[67]! p_unit_test 2 //~gfx[67]! p_unit_test 2
//~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
@ -71,8 +69,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
//~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[2][0:16] //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[2][0:16]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
bld.pseudo(aco_opcode::p_create_vector, bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v6b), Operand(v1_lo, v2b),
Definition(v0_lo, v6b), Operand(v1_lo, v2b),
Operand(v0_lo, v2b), Operand(v2_lo, v2b)); Operand(v0_lo, v2b), Operand(v2_lo, v2b));
//~gfx[67]! p_unit_test 3 //~gfx[67]! p_unit_test 3
@ -82,10 +79,8 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[2][0:16] //~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[2][0:16]
//~gfx[67]! v1: %0:v[1] = v_alignbyte_b32 %0:v[3][0:16], %0:v[1][16:32], 2 //~gfx[67]! v1: %0:v[1] = v_alignbyte_b32 %0:v[3][0:16], %0:v[1][16:32], 2
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));
bld.pseudo(aco_opcode::p_create_vector, bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v2), Operand(v1_lo, v2b),
Definition(v0_lo, v2), Operand(v0_lo, v2b), Operand(v2_lo, v2b), Operand(v3_lo, v2b));
Operand(v1_lo, v2b), Operand(v0_lo, v2b),
Operand(v2_lo, v2b), Operand(v3_lo, v2b));
//~gfx[67]! p_unit_test 4 //~gfx[67]! p_unit_test 4
//~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[1][0:16] //~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[1][0:16]
@ -96,17 +91,14 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
bld.pseudo(aco_opcode::p_create_vector, bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v2), Operand(v1_lo, v2b),
Definition(v0_lo, v2), Operand(v2_lo, v2b), Operand(v0_lo, v2b), Operand(v3_lo, v2b));
Operand(v1_lo, v2b), Operand(v2_lo, v2b),
Operand(v0_lo, v2b), Operand(v3_lo, v2b));
//~gfx[67]! p_unit_test 5 //~gfx[67]! p_unit_test 5
//~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16]
//~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32] //~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u));
bld.pseudo(aco_opcode::p_split_vector, bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v2b), Definition(v0_lo, v2b),
Definition(v1_lo, v2b), Definition(v0_lo, v2b),
Operand(v0_lo, v1)); Operand(v0_lo, v1));
//~gfx[67]! p_unit_test 6 //~gfx[67]! p_unit_test 6
@ -114,8 +106,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16]
//~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32] //~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u));
bld.pseudo(aco_opcode::p_split_vector, bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v2b), Definition(v0_lo, v2b),
Definition(v1_lo, v2b), Definition(v0_lo, v2b),
Definition(v2_lo, v2b), Operand(v0_lo, v6b)); Definition(v2_lo, v2b), Operand(v0_lo, v6b));
//~gfx[67]! p_unit_test 7 //~gfx[67]! p_unit_test 7
@ -124,10 +115,8 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32] //~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]
//~gfx[67]! v2b: %0:v[3][0:16] = v_lshrrev_b32 16, %0:v[2][16:32] //~gfx[67]! v2b: %0:v[3][0:16] = v_lshrrev_b32 16, %0:v[2][16:32]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u));
bld.pseudo(aco_opcode::p_split_vector, bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v2b), Definition(v0_lo, v2b),
Definition(v1_lo, v2b), Definition(v0_lo, v2b), Definition(v2_lo, v2b), Definition(v3_lo, v2b), Operand(v0_lo, v2));
Definition(v2_lo, v2b), Definition(v3_lo, v2b),
Operand(v0_lo, v2));
//~gfx[67]! p_unit_test 8 //~gfx[67]! p_unit_test 8
//~gfx[67]! v2b: %0:v[2][0:16] = v_lshrrev_b32 16, %0:v[0][16:32] //~gfx[67]! v2b: %0:v[2][0:16] = v_lshrrev_b32 16, %0:v[0][16:32]
@ -136,18 +125,15 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u));
bld.pseudo(aco_opcode::p_split_vector, bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v2b), Definition(v2_lo, v2b),
Definition(v1_lo, v2b), Definition(v2_lo, v2b), Definition(v0_lo, v2b), Definition(v3_lo, v2b), Operand(v0_lo, v2));
Definition(v0_lo, v2b), Definition(v3_lo, v2b),
Operand(v0_lo, v2));
//~gfx[67]! p_unit_test 9 //~gfx[67]! p_unit_test 9
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
//~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u));
bld.pseudo(aco_opcode::p_parallelcopy, bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Definition(v1_lo, v1b),
Definition(v0_lo, v1b), Definition(v1_lo, v1b),
Operand(v1_lo, v1b), Operand(v0_lo, v1b)); Operand(v1_lo, v1b), Operand(v0_lo, v1b));
//~gfx[67]! p_unit_test 10 //~gfx[67]! p_unit_test 10
@ -155,9 +141,8 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3 //~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3
//~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16] //~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u));
bld.pseudo(aco_opcode::p_create_vector, bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v2b), Operand(v1_lo, v1b),
Definition(v0_lo, v2b), Operand(v0_lo, v1b));
Operand(v1_lo, v1b), Operand(v0_lo, v1b));
//~gfx[67]! p_unit_test 11 //~gfx[67]! p_unit_test 11
//~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8] //~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8]
@ -166,8 +151,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
//~gfx[67]! v3b: %0:v[0][0:24] = v_alignbyte_b32 %0:v[2][0:8], %0:v[0][16:32], 2 //~gfx[67]! v3b: %0:v[0][0:24] = v_alignbyte_b32 %0:v[2][0:8], %0:v[0][16:32], 2
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u));
bld.pseudo(aco_opcode::p_create_vector, bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v3b), Operand(v1_lo, v1b),
Definition(v0_lo, v3b), Operand(v1_lo, v1b),
Operand(v0_lo, v1b), Operand(v2_lo, v1b)); Operand(v0_lo, v1b), Operand(v2_lo, v1b));
//~gfx[67]! p_unit_test 12 //~gfx[67]! p_unit_test 12
@ -179,10 +163,8 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v3b: %0:v[0][8:32] = v_lshlrev_b32 8, %0:v[0][0:24] //~gfx[67]! v3b: %0:v[0][8:32] = v_lshlrev_b32 8, %0:v[0][0:24]
//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[3][0:8], %0:v[0][8:32], 1 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[3][0:8], %0:v[0][8:32], 1
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u));
bld.pseudo(aco_opcode::p_create_vector, bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v1), Operand(v1_lo, v1b),
Definition(v0_lo, v1), Operand(v0_lo, v1b), Operand(v2_lo, v1b), Operand(v3_lo, v1b));
Operand(v1_lo, v1b), Operand(v0_lo, v1b),
Operand(v2_lo, v1b), Operand(v3_lo, v1b));
//~gfx[67]! p_unit_test 13 //~gfx[67]! p_unit_test 13
//~gfx[67]! v1b: %0:v[0][0:8] = v_and_b32 0xff, %0:v[0][0:8] //~gfx[67]! v1b: %0:v[0][0:8] = v_and_b32 0xff, %0:v[0][0:8]
@ -193,18 +175,16 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! s1: %0:m0 = s_mov_b32 0x1000001 //~gfx[67]! s1: %0:m0 = s_mov_b32 0x1000001
//~gfx[67]! v1: %0:v[0] = v_mul_lo_u32 %0:m0, %0:v[0][0:8] //~gfx[67]! v1: %0:v[0] = v_mul_lo_u32 %0:m0, %0:v[0][0:8]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13u));
Instruction* pseudo = bld.pseudo(aco_opcode::p_create_vector, Instruction* pseudo =
Definition(v0_lo, v1), bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v1), Operand(v0_lo, v1b),
Operand(v0_lo, v1b), Operand(v0_lo, v1b), Operand(v0_lo, v1b), Operand(v0_lo, v1b), Operand(v0_lo, v1b));
Operand(v0_lo, v1b), Operand(v0_lo, v1b));
pseudo->pseudo().scratch_sgpr = m0; pseudo->pseudo().scratch_sgpr = m0;
//~gfx[67]! p_unit_test 14 //~gfx[67]! p_unit_test 14
//~gfx[67]! v1b: %0:v[1][0:8] = v_mov_b32 %0:v[0][0:8] //~gfx[67]! v1b: %0:v[1][0:8] = v_mov_b32 %0:v[0][0:8]
//~gfx[67]! v1b: %0:v[0][0:8] = v_lshrrev_b32 8, %0:v[1][8:16] //~gfx[67]! v1b: %0:v[0][0:8] = v_lshrrev_b32 8, %0:v[1][8:16]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14u));
bld.pseudo(aco_opcode::p_split_vector, bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v1b), Definition(v0_lo, v1b),
Definition(v1_lo, v1b), Definition(v0_lo, v1b),
Operand(v0_lo, v2b)); Operand(v0_lo, v2b));
//~gfx[67]! p_unit_test 15 //~gfx[67]! p_unit_test 15
@ -213,10 +193,8 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[67]! v1b: %0:v[2][0:8] = v_lshrrev_b32 16, %0:v[1][16:24] //~gfx[67]! v1b: %0:v[2][0:8] = v_lshrrev_b32 16, %0:v[1][16:24]
//~gfx[67]! v1b: %0:v[3][0:8] = v_lshrrev_b32 24, %0:v[1][24:32] //~gfx[67]! v1b: %0:v[3][0:8] = v_lshrrev_b32 24, %0:v[1][24:32]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(15u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(15u));
bld.pseudo(aco_opcode::p_split_vector, bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v1b), Definition(v0_lo, v1b),
Definition(v1_lo, v1b), Definition(v0_lo, v1b), Definition(v2_lo, v1b), Definition(v3_lo, v1b), Operand(v0_lo, v1));
Definition(v2_lo, v1b), Definition(v3_lo, v1b),
Operand(v0_lo, v1));
//~gfx[67]! s_endpgm //~gfx[67]! s_endpgm
@ -231,8 +209,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx8! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 //~gfx8! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
//~gfx(9|11)! v1: %0:v[0] = v_pack_b32_f16 hi(%0:v[0][16:32]), %0:v[0][0:16] //~gfx(9|11)! v1: %0:v[0] = v_pack_b32_f16 hi(%0:v[0][16:32]), %0:v[0][0:16]
bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
bld.pseudo(aco_opcode::p_parallelcopy, bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
Definition(v0_lo, v2b), Definition(v0_hi, v2b),
Operand(v0_hi, v2b), Operand(v0_lo, v2b)); Operand(v0_hi, v2b), Operand(v0_lo, v2b));
//~gfx(8|9|11)! p_unit_test 1 //~gfx(8|9|11)! p_unit_test 1
@ -243,8 +220,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_sel:uword1 dst_preserve src0_sel:uword1 //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_sel:uword1 dst_preserve src0_sel:uword1
//~gfx11! v2b: %0:v[1][16:32] = v_mov_b16 hi(%0:v[0][16:32]) opsel_hi //~gfx11! v2b: %0:v[1][16:32] = v_mov_b16 hi(%0:v[0][16:32]) opsel_hi
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
bld.pseudo(aco_opcode::p_parallelcopy, bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v2b),
Definition(v0_lo, v1), Definition(v1_lo, v2b),
Operand(v1_lo, v1), Operand(v0_lo, v2b)); Operand(v1_lo, v1), Operand(v0_lo, v2b));
//~gfx(8|9|11)! p_unit_test 2 //~gfx(8|9|11)! p_unit_test 2
@ -259,9 +235,9 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx11! v2b: %0:v[1][0:16] = v_sub_u16_e64 %0:v[0][0:16], %0:v[1][0:16] //~gfx11! v2b: %0:v[1][0:16] = v_sub_u16_e64 %0:v[0][0:16], %0:v[1][0:16]
//~gfx11! v2b: %0:v[0][0:16] = v_sub_u16_e64 %0:v[0][0:16], %0:v[1][0:16] //~gfx11! v2b: %0:v[0][0:16] = v_sub_u16_e64 %0:v[0][0:16], %0:v[1][0:16]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
bld.pseudo(aco_opcode::p_parallelcopy, bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v2b),
Definition(v0_lo, v1), Definition(v1_lo, v2b), Definition(v1_hi, v2b), Definition(v1_hi, v2b), Operand(v1_lo, v1), Operand(v0_lo, v2b),
Operand(v1_lo, v1), Operand(v0_lo, v2b), Operand(v0_lo, v2b)); Operand(v0_lo, v2b));
//~gfx(8|9|11)! p_unit_test 3 //~gfx(8|9|11)! p_unit_test 3
//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
@ -273,8 +249,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx11! v2b: %0:v[1][0:16] = v_mov_b16 %0:v[0][0:16] //~gfx11! v2b: %0:v[1][0:16] = v_mov_b16 %0:v[0][0:16]
//~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x7020504 //~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x7020504
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));
bld.pseudo(aco_opcode::p_parallelcopy, bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_b3, v1b),
Definition(v0_lo, v1), Definition(v1_b3, v1b),
Operand(v1_lo, v1), Operand(v0_b3, v1b)); Operand(v1_lo, v1), Operand(v0_b3, v1b));
//~gfx(8|9|11)! p_unit_test 4 //~gfx(8|9|11)! p_unit_test 4
@ -287,8 +262,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x7060104 //~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x7060104
//~gfx11! v2b: %0:v[1][16:32] = v_mov_b16 hi(%0:v[0][16:32]) opsel_hi //~gfx11! v2b: %0:v[1][16:32] = v_mov_b16 hi(%0:v[0][16:32]) opsel_hi
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
bld.pseudo(aco_opcode::p_parallelcopy, bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v1b),
Definition(v0_lo, v1), Definition(v1_lo, v1b),
Operand(v1_lo, v1), Operand(v0_lo, v1b)); Operand(v1_lo, v1), Operand(v0_lo, v1b));
//~gfx(8|9|11)! p_unit_test 5 //~gfx(8|9|11)! p_unit_test 5
@ -301,9 +275,9 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x7060104 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x7060104
//~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x3060504 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x3060504
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u));
bld.pseudo(aco_opcode::p_parallelcopy, bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Definition(v0_hi, v1b),
Definition(v0_lo, v1b), Definition(v0_hi, v1b), Definition(v1_lo, v1), Definition(v1_lo, v1), Operand(v1_lo, v1b), Operand(v1_hi, v1b),
Operand(v1_lo, v1b), Operand(v1_hi, v1b), Operand(v0_lo, v1)); Operand(v0_lo, v1));
//~gfx(8|9|11)! p_unit_test 6 //~gfx(8|9|11)! p_unit_test 6
//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
@ -311,9 +285,9 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
//~gfx(9|11)! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] //~gfx(9|11)! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u));
bld.pseudo(aco_opcode::p_parallelcopy, bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
Definition(v0_lo, v2b), Definition(v0_hi, v2b), Definition(v1_lo, v1), Definition(v1_lo, v1), Operand(v1_lo, v2b), Operand(v1_hi, v2b),
Operand(v1_lo, v2b), Operand(v1_hi, v2b), Operand(v0_lo, v1)); Operand(v0_lo, v1));
//~gfx(8|9|11)! p_unit_test 7 //~gfx(8|9|11)! p_unit_test 7
//~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1] //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1]
@ -322,9 +296,9 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx(9|11)! v1: %0:v[1], v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1] //~gfx(9|11)! v1: %0:v[1], v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1]
//~gfx(8|9|11)! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 //~gfx(8|9|11)! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u));
bld.pseudo(aco_opcode::p_parallelcopy, bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
Definition(v0_lo, v2b), Definition(v0_hi, v2b), Definition(v1_lo, v1), Definition(v1_lo, v1), Operand(v1_hi, v2b), Operand(v1_lo, v2b),
Operand(v1_hi, v2b), Operand(v1_lo, v2b), Operand(v0_lo, v1)); Operand(v0_lo, v1));
//~gfx(8|9|11)! p_unit_test 8 //~gfx(8|9|11)! p_unit_test 8
//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
@ -342,8 +316,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx11! v2b: %0:v[1][16:32] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) opsel_hi //~gfx11! v2b: %0:v[1][16:32] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) opsel_hi
//~gfx11! v2b: %0:v[0][0:16] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) //~gfx11! v2b: %0:v[0][0:16] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32])
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u));
bld.pseudo(aco_opcode::p_parallelcopy, bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v3b), Definition(v1_lo, v3b),
Definition(v0_lo, v3b), Definition(v1_lo, v3b),
Operand(v1_lo, v3b), Operand(v0_lo, v3b)); Operand(v1_lo, v3b), Operand(v0_lo, v3b));
//~gfx(8|9|11)! p_unit_test 9 //~gfx(8|9|11)! p_unit_test 9
@ -354,9 +327,9 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[89]! v1b: %0:v[1][24:32] = v_mov_b32 %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 //~gfx[89]! v1b: %0:v[1][24:32] = v_mov_b32 %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3
//~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x3060504 //~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x3060504
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u));
bld.pseudo(aco_opcode::p_parallelcopy, bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v3b), Definition(v1_lo, v3b),
Definition(v0_lo, v3b), Definition(v1_lo, v3b), Definition(v0_b3, v1b), Definition(v0_b3, v1b), Operand(v1_lo, v3b), Operand(v0_lo, v3b),
Operand(v1_lo, v3b), Operand(v0_lo, v3b), Operand(v1_b3, v1b)); Operand(v1_b3, v1b));
//~gfx(8|9|11)! p_unit_test 10 //~gfx(8|9|11)! p_unit_test 10
//~gfx[89]! v1b: %0:v[1][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 src1_sel:ubyte1 //~gfx[89]! v1b: %0:v[1][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 src1_sel:ubyte1
@ -380,8 +353,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx11! v2b: %0:v[1][16:32] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) opsel_hi //~gfx11! v2b: %0:v[1][16:32] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) opsel_hi
//~gfx11! v2b: %0:v[0][0:16] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) //~gfx11! v2b: %0:v[0][0:16] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32])
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u));
bld.pseudo(aco_opcode::p_parallelcopy, bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Definition(v1_b1, v2b),
Definition(v0_b1, v2b), Definition(v1_b1, v2b),
Operand(v1_b1, v2b), Operand(v0_b1, v2b)); Operand(v1_b1, v2b), Operand(v0_b1, v2b));
//~gfx(8|9|11)! p_unit_test 11 //~gfx(8|9|11)! p_unit_test 11
@ -398,8 +370,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
//~gfx[89]! v1b: %0:v[0][24:32] = v_xor_b32 %0:v[0][24:32], %0:v[0][8:16] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte1 //~gfx[89]! v1b: %0:v[0][24:32] = v_xor_b32 %0:v[0][24:32], %0:v[0][8:16] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte1
//~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], 0, 0x5060704 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], 0, 0x5060704
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u));
bld.pseudo(aco_opcode::p_parallelcopy, bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v1b), Definition(v0_b3, v1b),
Definition(v0_b1, v1b), Definition(v0_b3, v1b),
Operand(v0_b3, v1b), Operand(v0_b1, v1b)); Operand(v0_b3, v1b), Operand(v0_b1, v1b));
//~gfx(8|9|11)! s_endpgm //~gfx(8|9|11)! s_endpgm
@ -535,8 +506,7 @@ BEGIN_TEST(to_hw_instr.subdword_constant)
//~gfx10! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060c0d //~gfx10! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060c0d
//~gfx11! v2b: %0:v[0][0:16] = v_mov_b16 0xff //~gfx11! v2b: %0:v[0][0:16] = v_mov_b16 0xff
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13u));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::c16(0x00ff));
Operand::c16(0x00ff));
//! p_unit_test 14 //! p_unit_test 14
//~gfx9! v1: %_:v[0] = v_and_b32 0xffff, %_:v[0] //~gfx9! v1: %_:v[0] = v_and_b32 0xffff, %_:v[0]
@ -544,29 +514,25 @@ BEGIN_TEST(to_hw_instr.subdword_constant)
//~gfx10! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0xd0c0504 //~gfx10! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0xd0c0504
//~gfx11! v2b: %0:v[0][16:32] = v_mov_b16 0xffffff00 opsel_hi //~gfx11! v2b: %0:v[0][16:32] = v_mov_b16 0xffffff00 opsel_hi
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14u));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_hi, v2b), bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_hi, v2b), Operand::c16(0xff00));
Operand::c16(0xff00));
//! p_unit_test 15 //! p_unit_test 15
//~gfx(9|10)! v2b: %_:v[0][0:16] = v_mov_b32 0 dst_sel:uword0 dst_preserve src0_sel:dword //~gfx(9|10)! v2b: %_:v[0][0:16] = v_mov_b32 0 dst_sel:uword0 dst_preserve src0_sel:dword
//~gfx11! v2b: %0:v[0][0:16] = v_mov_b16 0 //~gfx11! v2b: %0:v[0][0:16] = v_mov_b16 0
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(15u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(15u));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::zero(2));
Operand::zero(2));
//! p_unit_test 16 //! p_unit_test 16
//~gfx(9|10)! v1b: %_:v[0][0:8] = v_mov_b32 -1 dst_sel:ubyte0 dst_preserve src0_sel:dword //~gfx(9|10)! v1b: %_:v[0][0:8] = v_mov_b32 -1 dst_sel:ubyte0 dst_preserve src0_sel:dword
//~gfx11! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x706050d //~gfx11! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x706050d
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(16u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(16u));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::c8(0xff));
Operand::c8(0xff));
//! p_unit_test 17 //! p_unit_test 17
//~gfx(9|10)! v1b: %_:v[0][0:8] = v_mov_b32 0 dst_sel:ubyte0 dst_preserve src0_sel:dword //~gfx(9|10)! v1b: %_:v[0][0:8] = v_mov_b32 0 dst_sel:ubyte0 dst_preserve src0_sel:dword
//~gfx11! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x706050c //~gfx11! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x706050c
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(17u)); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(17u));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::zero(1));
Operand::zero(1));
//! s_endpgm //! s_endpgm
@ -589,12 +555,12 @@ BEGIN_TEST(to_hw_instr.self_intersecting_swap)
//! v1: %0:v[3], v1: %0:v[7] = v_swap_b32 %0:v[7], %0:v[3] //! v1: %0:v[3], v1: %0:v[7] = v_swap_b32 %0:v[7], %0:v[3]
//! s_endpgm //! s_endpgm
bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
//v[1:2] = v[2:3] // v[1:2] = v[2:3]
//v3 = v7 // v3 = v7
//v7 = v1 // v7 = v1
bld.pseudo(aco_opcode::p_parallelcopy, bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v1, v2), Definition(reg_v3, v1),
Definition(reg_v1, v2), Definition(reg_v3, v1), Definition(reg_v7, v1), Definition(reg_v7, v1), Operand(reg_v2, v2), Operand(reg_v7, v1),
Operand(reg_v2, v2), Operand(reg_v7, v1), Operand(reg_v1, v1)); Operand(reg_v1, v1));
finish_to_hw_instr_test(); finish_to_hw_instr_test();
END_TEST END_TEST
@ -637,7 +603,7 @@ BEGIN_TEST(to_hw_instr.extract)
//! v1: %_:v[0] = @v_shr 16, %_:v[1] //! v1: %_:v[0] = @v_shr 16, %_:v[1]
EXT(1, 16) EXT(1, 16)
#undef EXT #undef EXT
#define EXT(idx, size) \ #define EXT(idx, size) \
bld.pseudo(aco_opcode::p_extract, Definition(s0_lo, s1), Definition(scc, s1), \ bld.pseudo(aco_opcode::p_extract, Definition(s0_lo, s1), Definition(scc, s1), \
@ -661,7 +627,7 @@ BEGIN_TEST(to_hw_instr.extract)
//! s1: %_:s[0], s1: %_:scc = @s_shr %_:s[1], 16 //! s1: %_:s[0], s1: %_:scc = @s_shr %_:s[1], 16
EXT(1, 16) EXT(1, 16)
#undef EXT #undef EXT
#define EXT(idx, src_b) \ #define EXT(idx, src_b) \
bld.pseudo(aco_opcode::p_extract, Definition(v0_lo, v2b), Operand(v1_lo.advance(src_b), v2b), \ bld.pseudo(aco_opcode::p_extract, Definition(v0_lo, v2b), Operand(v1_lo.advance(src_b), v2b), \
@ -692,7 +658,7 @@ BEGIN_TEST(to_hw_instr.extract)
if (lvl != GFX7) if (lvl != GFX7)
EXT(1, 2) EXT(1, 2)
#undef EXT #undef EXT
finish_to_hw_instr_test(); finish_to_hw_instr_test();
@ -736,7 +702,7 @@ BEGIN_TEST(to_hw_instr.insert)
//! v1: %0:v[0] = v_lshlrev_b32 16, %0:v[1] //! v1: %0:v[0] = v_lshlrev_b32 16, %0:v[1]
INS(1, 16) INS(1, 16)
#undef INS #undef INS
#define INS(idx, size) \ #define INS(idx, size) \
bld.pseudo(aco_opcode::p_insert, Definition(s0_lo, s1), Definition(scc, s1), \ bld.pseudo(aco_opcode::p_insert, Definition(s0_lo, s1), Definition(scc, s1), \
@ -759,7 +725,7 @@ BEGIN_TEST(to_hw_instr.insert)
//! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[1], 16 //! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[1], 16
INS(1, 16) INS(1, 16)
#undef INS #undef INS
#define INS(idx, def_b) \ #define INS(idx, def_b) \
bld.pseudo(aco_opcode::p_insert, Definition(v0_lo.advance(def_b), v2b), Operand(v1_lo, v2b), \ bld.pseudo(aco_opcode::p_insert, Definition(v0_lo.advance(def_b), v2b), Operand(v1_lo, v2b), \
@ -784,7 +750,7 @@ BEGIN_TEST(to_hw_instr.insert)
if (lvl != GFX7) if (lvl != GFX7)
INS(1, 2) INS(1, 2)
#undef INS #undef INS
finish_to_hw_instr_test(); finish_to_hw_instr_test();
@ -816,9 +782,8 @@ BEGIN_TEST(to_hw_instr.copy_linear_vgpr_scc)
//! lv1: %0:v[0] = v_mov_b32 %0:v[1] //! lv1: %0:v[0] = v_mov_b32 %0:v[1]
//! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec
//! s1: %0:scc = s_cmp_lg_i32 %0:m0, 0 //! s1: %0:scc = s_cmp_lg_i32 %0:m0, 0
Instruction *instr = bld.pseudo( Instruction* instr =
aco_opcode::p_parallelcopy, bld.pseudo(aco_opcode::p_parallelcopy, Definition(scc, s1), Definition(v0_lo, v1.as_linear()),
Definition(scc, s1), Definition(v0_lo, v1.as_linear()),
Operand(reg_s0, s1), Operand(v1_lo, v1.as_linear())); Operand(reg_s0, s1), Operand(v1_lo, v1.as_linear()));
instr->pseudo().scratch_sgpr = m0; instr->pseudo().scratch_sgpr = m0;
@ -836,10 +801,9 @@ BEGIN_TEST(to_hw_instr.swap_linear_vgpr)
//>> p_unit_test 0 //>> p_unit_test 0
bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
Instruction *instr = bld.pseudo( Instruction* instr = bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v0, v1_linear),
aco_opcode::p_parallelcopy, Definition(reg_v1, v1_linear), Operand(reg_v1, v1_linear),
Definition(reg_v0, v1_linear), Definition(reg_v1, v1_linear), Operand(reg_v0, v1_linear));
Operand(reg_v1, v1_linear), Operand(reg_v0, v1_linear));
instr->pseudo().scratch_sgpr = m0; instr->pseudo().scratch_sgpr = m0;
finish_to_hw_instr_test(); finish_to_hw_instr_test();