aco: validate temporary reachability

These errors are nicer to read than the ones from live variable analysis.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30440>
This commit is contained in:
Rhys Perry 2024-07-30 17:08:19 +01:00 committed by Marge Bot
parent 39270a8be3
commit db39685e31
5 changed files with 97 additions and 18 deletions

View file

@ -328,10 +328,8 @@ process_live_temps_per_block(live_ctx& ctx, Block* block)
block->logical_preds.back() <= block->linear_preds.back());
ctx.worklist = std::max<int>(ctx.worklist, block->linear_preds.back());
} else {
for (unsigned t : live) {
aco_err(ctx.program, "Temporary never defined or are defined after use: %%%d in BB%d",
t, block->index);
}
ASSERTED bool is_valid = validate_ir(ctx.program);
assert(!is_valid);
}
}

View file

@ -74,6 +74,63 @@ validate_ir(Program* program)
}
};
/* check reachability */
if (program->progress < CompilationProgress::after_lower_to_hw) {
std::map<uint32_t, std::pair<uint32_t, bool>> def_blocks;
for (Block& block : program->blocks) {
for (aco_ptr<Instruction>& instr : block.instructions) {
for (Definition def : instr->definitions) {
if (!def.isTemp())
continue;
check(!def_blocks.count(def.tempId()), "Temporary defined twice", instr.get());
def_blocks[def.tempId()] = std::make_pair(block.index, false);
}
}
}
for (Block& block : program->blocks) {
for (aco_ptr<Instruction>& instr : block.instructions) {
for (unsigned i = 0; i < instr->operands.size(); i++) {
Operand op = instr->operands[i];
if (!op.isTemp())
continue;
uint32_t use_block_idx = block.index;
if (instr->opcode == aco_opcode::p_phi || instr->opcode == aco_opcode::p_boolean_phi)
use_block_idx = block.logical_preds[i];
else if (instr->opcode == aco_opcode::p_linear_phi)
use_block_idx = block.linear_preds[i];
auto it = def_blocks.find(op.tempId());
if (it != def_blocks.end()) {
Block& def_block = program->blocks[it->second.first];
Block& use_block = program->blocks[use_block_idx];
bool dominates =
def_block.index == use_block_idx
? (use_block_idx == block.index ? it->second.second : true)
: (op.regClass().is_linear() ? dominates_linear(def_block, use_block)
: dominates_logical(def_block, use_block));
if (!dominates) {
char msg[256];
snprintf(msg, sizeof(msg), "Definition of %%%u does not dominate use",
op.tempId());
check(false, msg, instr.get());
}
} else {
char msg[256];
snprintf(msg, sizeof(msg), "%%%u never defined", op.tempId());
check(false, msg, instr.get());
}
}
for (Definition def : instr->definitions) {
if (def.isTemp())
def_blocks[def.tempId()].second = true;
}
}
}
}
for (Block& block : program->blocks) {
for (aco_ptr<Instruction>& instr : block.instructions) {

View file

@ -158,7 +158,7 @@ setup_nir_cs(enum amd_gfx_level gfx_level, gl_shader_stage stage, enum radeon_fa
}
void
finish_program(Program* prog, bool endpgm)
finish_program(Program* prog, bool endpgm, bool dominance)
{
for (Block& BB : prog->blocks) {
for (unsigned idx : BB.linear_preds)
@ -174,12 +174,15 @@ finish_program(Program* prog, bool endpgm)
Builder(prog, &block).sopp(aco_opcode::s_endpgm);
}
}
if (dominance)
dominator_tree(program.get());
}
void
finish_validator_test()
{
finish_program(program.get());
finish_program(program.get(), true, true);
aco_print_program(program.get(), output);
fprintf(output, "Validation results:\n");
if (aco::validate_ir(program.get()))
@ -191,7 +194,7 @@ finish_validator_test()
void
finish_opt_test()
{
finish_program(program.get());
finish_program(program.get(), true, true);
if (!aco::validate_ir(program.get())) {
fail_test("Validation before optimization failed");
return;
@ -207,7 +210,7 @@ finish_opt_test()
void
finish_setup_reduce_temp_test()
{
finish_program(program.get());
finish_program(program.get(), true, true);
if (!aco::validate_ir(program.get())) {
fail_test("Validation before setup_reduce_temp failed");
return;
@ -223,7 +226,7 @@ finish_setup_reduce_temp_test()
void
finish_lower_subdword_test()
{
finish_program(program.get());
finish_program(program.get(), true, true);
if (!aco::validate_ir(program.get())) {
fail_test("Validation before lower_subdword failed");
return;
@ -239,7 +242,7 @@ finish_lower_subdword_test()
void
finish_ra_test(ra_test_policy policy)
{
finish_program(program.get());
finish_program(program.get(), true, true);
if (!aco::validate_ir(program.get())) {
fail_test("Validation before register allocation failed");
return;
@ -260,7 +263,7 @@ finish_ra_test(ra_test_policy policy)
void
finish_optimizer_postRA_test()
{
finish_program(program.get());
finish_program(program.get(), true, true);
if (!aco::validate_ir(program.get())) {
fail_test("Validation before optimize_postRA failed");
@ -280,7 +283,7 @@ finish_optimizer_postRA_test()
void
finish_to_hw_instr_test()
{
finish_program(program.get());
finish_program(program.get(), true, true);
if (!aco::validate_ir(program.get())) {
fail_test("Validation before lower_to_hw_instr failed");

View file

@ -66,7 +66,7 @@ bool
setup_nir_cs(enum amd_gfx_level gfx_level, gl_shader_stage stage = MESA_SHADER_COMPUTE,
enum radeon_family family = CHIP_UNKNOWN, const char* subvariant = "");
void finish_program(aco::Program* program, bool endpgm = true);
void finish_program(aco::Program* program, bool endpgm = true, bool dominance = false);
void finish_validator_test();
void finish_opt_test();
void finish_setup_reduce_temp_test();

View file

@ -173,10 +173,14 @@ BEGIN_TEST(optimize.output_modifiers)
/* omod has no effect if denormals are enabled but clamp is fine */
//>> BB1
//! /* logical preds: / linear preds: / kind: uniform, */
//! /* logical preds: BB0, / linear preds: BB0, / kind: */
program->next_fp_mode.denorm32 = fp_denorm_keep;
program->next_fp_mode.denorm16_64 = fp_denorm_flush;
bld.reset(program->create_and_insert_block());
program->blocks[0].linear_succs.push_back(1);
program->blocks[0].logical_succs.push_back(1);
program->blocks[1].linear_preds.push_back(0);
program->blocks[1].logical_preds.push_back(0);
//! v1: %res14_tmp = v_add_f32 %a, %b
//! v1: %res14 = v_mul_f32 2.0, %res13_tmp
@ -191,10 +195,14 @@ BEGIN_TEST(optimize.output_modifiers)
Operand::c32(0x3f800000u), tmp));
//>> BB2
//! /* logical preds: / linear preds: / kind: uniform, */
//! /* logical preds: BB1, / linear preds: BB1, / kind: */
program->next_fp_mode.denorm32 = fp_denorm_flush;
program->next_fp_mode.denorm16_64 = fp_denorm_keep;
bld.reset(program->create_and_insert_block());
program->blocks[1].linear_succs.push_back(2);
program->blocks[1].logical_succs.push_back(2);
program->blocks[2].linear_preds.push_back(1);
program->blocks[2].logical_preds.push_back(1);
//! v2b: %res16_tmp = v_add_f16 %a, %b
//! v2b: %res16 = v_mul_f16 2.0, %res15_tmp
@ -211,12 +219,16 @@ BEGIN_TEST(optimize.output_modifiers)
/* omod flushes -0.0 to +0.0 */
//>> BB3
//! /* logical preds: / linear preds: / kind: uniform, */
//! /* logical preds: BB2, / linear preds: BB2, / kind: */
program->next_fp_mode.denorm32 = fp_denorm_keep;
program->next_fp_mode.denorm16_64 = fp_denorm_keep;
program->next_fp_mode.preserve_signed_zero_inf_nan32 = true;
program->next_fp_mode.preserve_signed_zero_inf_nan16_64 = false;
bld.reset(program->create_and_insert_block());
program->blocks[2].linear_succs.push_back(3);
program->blocks[2].logical_succs.push_back(3);
program->blocks[3].linear_preds.push_back(2);
program->blocks[3].logical_preds.push_back(2);
//! v1: %res18_tmp = v_add_f32 %a, %b
//! v1: %res18 = v_mul_f32 2.0, %res18_tmp
@ -230,10 +242,15 @@ BEGIN_TEST(optimize.output_modifiers)
Operand::c32(0x3f800000u), tmp));
//>> BB4
//! /* logical preds: / linear preds: / kind: uniform, */
//! /* logical preds: BB3, / linear preds: BB3, / kind: uniform, */
program->next_fp_mode.preserve_signed_zero_inf_nan32 = false;
program->next_fp_mode.preserve_signed_zero_inf_nan16_64 = true;
bld.reset(program->create_and_insert_block());
program->blocks[3].linear_succs.push_back(4);
program->blocks[3].logical_succs.push_back(4);
program->blocks[4].linear_preds.push_back(3);
program->blocks[4].logical_preds.push_back(3);
//! v2b: %res20_tmp = v_add_f16 %a, %b
//! v2b: %res20 = v_mul_f16 2.0, %res20_tmp
//! p_unit_test 20, %res20
@ -1755,9 +1772,13 @@ BEGIN_TEST(optimize.fmamix_two_literals)
writeout(5, fma(a, c15, c_denorm));
//>> BB1
//! /* logical preds: / linear preds: / kind: uniform, */
//! /* logical preds: BB0, / linear preds: BB0, / kind: uniform, */
program->next_fp_mode.denorm16_64 = fp_denorm_flush;
bld.reset(program->create_and_insert_block());
program->blocks[0].linear_succs.push_back(1);
program->blocks[0].logical_succs.push_back(1);
program->blocks[1].linear_preds.push_back(0);
program->blocks[1].logical_preds.push_back(0);
//~gfx10; del c15
//! v1: %c15 = p_parallelcopy 0x3fc00000