mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 06:30:10 +01:00
aco/ra: rework linear VGPR allocation
We allocate them at the end of the register file and keep them separate from normal VGPRs. This is for two reasons: - Because we only ever move linear VGPRs into an empty space or a space previously occupied by a linear one, we never have to swap a normal VGPR and a linear one. This simplifies copy lowering. - As linear VGPR's live ranges only start and end on top-level blocks, we never have to move a linear VGPR in control flow. fossil-db (navi31): Totals from 5493 (6.93% of 79242) affected shaders: MaxWaves: 150365 -> 150343 (-0.01%) Instrs: 7974740 -> 7976073 (+0.02%); split: -0.06%, +0.08% CodeSize: 41296024 -> 41299024 (+0.01%); split: -0.06%, +0.06% VGPRs: 283192 -> 329560 (+16.37%) Latency: 64267936 -> 64268414 (+0.00%); split: -0.17%, +0.17% InvThroughput: 10954037 -> 10951735 (-0.02%); split: -0.09%, +0.07% VClause: 132792 -> 132956 (+0.12%); split: -0.06%, +0.18% SClause: 223854 -> 223841 (-0.01%); split: -0.01%, +0.01% Copies: 559574 -> 561395 (+0.33%); split: -0.24%, +0.56% Branches: 179630 -> 179636 (+0.00%); split: -0.02%, +0.02% VALU: 4572683 -> 4574487 (+0.04%); split: -0.03%, +0.07% SALU: 772076 -> 772111 (+0.00%); split: -0.01%, +0.01% VOPD: 1095 -> 1099 (+0.37%); split: +0.73%, -0.37% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27697>
This commit is contained in:
parent
2d49c79c7e
commit
b7738de4f9
6 changed files with 216 additions and 36 deletions
|
|
@ -176,7 +176,7 @@ aco_postprocess_shader(const struct aco_compiler_options* options,
|
|||
validate(program.get());
|
||||
|
||||
/* Register Allocation */
|
||||
aco::register_allocation(program.get(), live_vars.live_out);
|
||||
aco::register_allocation(program.get(), live_vars);
|
||||
|
||||
if (aco::validate_ra(program.get())) {
|
||||
aco_print_program(program.get(), stderr);
|
||||
|
|
|
|||
|
|
@ -2221,8 +2221,7 @@ void optimize(Program* program);
|
|||
void optimize_postRA(Program* program);
|
||||
void setup_reduce_temp(Program* program);
|
||||
void lower_to_cssa(Program* program, live& live_vars);
|
||||
void register_allocation(Program* program, std::vector<IDSet>& live_out_per_block,
|
||||
ra_test_policy = {});
|
||||
void register_allocation(Program* program, live& live_vars, ra_test_policy = {});
|
||||
void ssa_elimination(Program* program);
|
||||
void lower_to_hw_instr(Program* program);
|
||||
void schedule_program(Program* program, live& live_vars);
|
||||
|
|
|
|||
|
|
@ -89,6 +89,10 @@ struct ra_ctx {
|
|||
uint16_t vgpr_limit;
|
||||
std::bitset<512> war_hint;
|
||||
|
||||
uint16_t sgpr_bounds;
|
||||
uint16_t vgpr_bounds;
|
||||
uint16_t num_linear_vgprs;
|
||||
|
||||
ra_test_policy policy;
|
||||
|
||||
ra_ctx(Program* program_, ra_test_policy policy_)
|
||||
|
|
@ -101,6 +105,10 @@ struct ra_ctx {
|
|||
create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi, Format::PSEUDO, 0, 0));
|
||||
sgpr_limit = get_addr_sgpr_from_waves(program, program->min_waves);
|
||||
vgpr_limit = get_addr_vgpr_from_waves(program, program->min_waves);
|
||||
|
||||
sgpr_bounds = program->max_reg_demand.sgpr;
|
||||
vgpr_bounds = program->max_reg_demand.vgpr;
|
||||
num_linear_vgprs = 0;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -196,10 +204,13 @@ get_stride(RegClass rc)
|
|||
PhysRegInterval
|
||||
get_reg_bounds(ra_ctx& ctx, RegType type, bool linear_vgpr)
|
||||
{
|
||||
if (type == RegType::vgpr) {
|
||||
return {PhysReg{256}, (unsigned)ctx.program->max_reg_demand.vgpr};
|
||||
uint16_t linear_vgpr_start = ctx.vgpr_bounds - ctx.num_linear_vgprs;
|
||||
if (type == RegType::vgpr && linear_vgpr) {
|
||||
return PhysRegInterval{PhysReg(256 + linear_vgpr_start), ctx.num_linear_vgprs};
|
||||
} else if (type == RegType::vgpr) {
|
||||
return PhysRegInterval{PhysReg(256), linear_vgpr_start};
|
||||
} else {
|
||||
return {PhysReg{0}, (unsigned)ctx.program->max_reg_demand.sgpr};
|
||||
return PhysRegInterval{PhysReg(0), ctx.sgpr_bounds};
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -252,7 +263,7 @@ struct DefInfo {
|
|||
assert(ctx.program->gfx_level == GFX9 && "Image D16 on GFX8 not supported.");
|
||||
|
||||
if (imageGather4D16Bug)
|
||||
bounds.size -= rc.bytes() / 4;
|
||||
bounds.size -= MAX2(rc.bytes() / 4 - ctx.num_linear_vgprs, 0);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
@ -1294,7 +1305,8 @@ get_reg_impl(ra_ctx& ctx, const RegisterFile& reg_file,
|
|||
}
|
||||
}
|
||||
|
||||
assert(regs_free >= size);
|
||||
assert((regs_free + ctx.num_linear_vgprs) >= size);
|
||||
|
||||
/* we might have to move dead operands to dst in order to make space */
|
||||
unsigned op_moves = 0;
|
||||
|
||||
|
|
@ -1458,15 +1470,22 @@ get_reg_specified(ra_ctx& ctx, const RegisterFile& reg_file, RegClass rc,
|
|||
bool
|
||||
increase_register_file(ra_ctx& ctx, RegClass rc)
|
||||
{
|
||||
if (rc.type() == RegType::vgpr && ctx.program->max_reg_demand.vgpr < ctx.vgpr_limit) {
|
||||
update_vgpr_sgpr_demand(ctx.program, RegisterDemand(ctx.program->max_reg_demand.vgpr + 1,
|
||||
ctx.program->max_reg_demand.sgpr));
|
||||
if (rc.type() == RegType::vgpr && ctx.num_linear_vgprs == 0 &&
|
||||
ctx.vgpr_bounds < ctx.vgpr_limit) {
|
||||
/* If vgpr_bounds is less than max_reg_demand.vgpr, this should be a no-op. */
|
||||
update_vgpr_sgpr_demand(
|
||||
ctx.program, RegisterDemand(ctx.vgpr_bounds + 1, ctx.program->max_reg_demand.sgpr));
|
||||
|
||||
ctx.vgpr_bounds = ctx.program->max_reg_demand.vgpr;
|
||||
} else if (rc.type() == RegType::sgpr && ctx.program->max_reg_demand.sgpr < ctx.sgpr_limit) {
|
||||
update_vgpr_sgpr_demand(ctx.program, RegisterDemand(ctx.program->max_reg_demand.vgpr,
|
||||
ctx.program->max_reg_demand.sgpr + 1));
|
||||
update_vgpr_sgpr_demand(
|
||||
ctx.program, RegisterDemand(ctx.program->max_reg_demand.vgpr, ctx.sgpr_bounds + 1));
|
||||
|
||||
ctx.sgpr_bounds = ctx.program->max_reg_demand.sgpr;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -1632,6 +1651,124 @@ get_reg_vector(ra_ctx& ctx, const RegisterFile& reg_file, Temp temp, aco_ptr<Ins
|
|||
return {};
|
||||
}
|
||||
|
||||
bool
|
||||
compact_linear_vgprs(ra_ctx& ctx, const RegisterFile& reg_file,
|
||||
std::vector<std::pair<Operand, Definition>>& parallelcopies)
|
||||
{
|
||||
PhysRegInterval linear_vgpr_bounds = get_reg_bounds(ctx, RegType::vgpr, true);
|
||||
int zeros = reg_file.count_zero(linear_vgpr_bounds);
|
||||
if (zeros == 0)
|
||||
return false;
|
||||
|
||||
std::vector<IDAndRegClass> vars;
|
||||
for (unsigned id : find_vars(ctx, reg_file, linear_vgpr_bounds))
|
||||
vars.emplace_back(id, ctx.assignments[id].rc);
|
||||
|
||||
ctx.num_linear_vgprs -= zeros;
|
||||
compact_relocate_vars(ctx, vars, parallelcopies, get_reg_bounds(ctx, RegType::vgpr, true).lo());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Allocates a linear VGPR. We allocate them at the end of the register file and keep them separate
|
||||
* from normal VGPRs. This is for two reasons:
|
||||
* - Because we only ever move linear VGPRs into an empty space or a space previously occupied by a
|
||||
* linear one, we never have to swap a normal VGPR and a linear one.
|
||||
* - As linear VGPR's live ranges only start and end on top-level blocks, we never have to move a
|
||||
* linear VGPR in control flow.
|
||||
*/
|
||||
PhysReg
|
||||
alloc_linear_vgpr(ra_ctx& ctx, const RegisterFile& reg_file, aco_ptr<Instruction>& instr,
|
||||
std::vector<std::pair<Operand, Definition>>& parallelcopies)
|
||||
{
|
||||
assert(instr->opcode == aco_opcode::p_start_linear_vgpr);
|
||||
assert(instr->definitions.size() == 1 && instr->definitions[0].bytes() % 4 == 0);
|
||||
|
||||
RegClass rc = instr->definitions[0].regClass();
|
||||
|
||||
/* Try to choose an unused space in the linear VGPR bounds. */
|
||||
for (unsigned i = rc.size(); i <= ctx.num_linear_vgprs; i++) {
|
||||
PhysReg reg(256 + ctx.vgpr_bounds - i);
|
||||
if (!reg_file.test(reg, rc.bytes())) {
|
||||
adjust_max_used_regs(ctx, rc, reg);
|
||||
return reg;
|
||||
}
|
||||
}
|
||||
|
||||
PhysRegInterval old_normal_bounds = get_reg_bounds(ctx, RegType::vgpr, false);
|
||||
|
||||
/* Compact linear VGPRs, grow the bounds if necessary, and choose a space at the beginning: */
|
||||
compact_linear_vgprs(ctx, reg_file, parallelcopies);
|
||||
|
||||
PhysReg reg(256 + ctx.vgpr_bounds - (ctx.num_linear_vgprs + rc.size()));
|
||||
/* Space that was for normal VGPRs, but is now for linear VGPRs. */
|
||||
PhysRegInterval new_win = PhysRegInterval::from_until(reg, MAX2(old_normal_bounds.hi(), reg));
|
||||
|
||||
RegisterFile tmp_file(reg_file);
|
||||
PhysRegInterval reg_win{reg, rc.size()};
|
||||
std::vector<unsigned> blocking_vars = collect_vars(ctx, tmp_file, new_win);
|
||||
|
||||
/* Re-enable killed operands */
|
||||
for (Operand& op : instr->operands) {
|
||||
if (op.isTemp() && op.isFirstKillBeforeDef())
|
||||
tmp_file.fill(op);
|
||||
}
|
||||
|
||||
/* Find new assignments for blocking vars. */
|
||||
std::vector<std::pair<Operand, Definition>> pc;
|
||||
if (!ctx.policy.skip_optimistic_path &&
|
||||
get_regs_for_copies(ctx, tmp_file, pc, blocking_vars, instr, reg_win)) {
|
||||
parallelcopies.insert(parallelcopies.end(), pc.begin(), pc.end());
|
||||
} else {
|
||||
/* Fallback algorithm: reallocate all variables at once. */
|
||||
std::vector<IDAndRegClass> vars;
|
||||
for (unsigned id : find_vars(ctx, reg_file, old_normal_bounds))
|
||||
vars.emplace_back(id, ctx.assignments[id].rc);
|
||||
compact_relocate_vars(ctx, vars, parallelcopies, PhysReg(256));
|
||||
|
||||
std::vector<IDAndRegClass> killed_op_vars;
|
||||
for (Operand& op : instr->operands) {
|
||||
if (op.isTemp() && op.isFirstKillBeforeDef() && op.regClass().type() == RegType::vgpr)
|
||||
killed_op_vars.emplace_back(op.tempId(), op.regClass());
|
||||
}
|
||||
compact_relocate_vars(ctx, killed_op_vars, parallelcopies, reg_win.lo());
|
||||
}
|
||||
|
||||
/* If this is updated earlier, a killed operand can't be placed inside the definition. */
|
||||
ctx.num_linear_vgprs += rc.size();
|
||||
|
||||
adjust_max_used_regs(ctx, rc, reg);
|
||||
return reg;
|
||||
}
|
||||
|
||||
bool
|
||||
should_compact_linear_vgprs(ra_ctx& ctx, live& live_vars, const RegisterFile& reg_file)
|
||||
{
|
||||
if (!(ctx.block->kind & block_kind_top_level) || ctx.block->linear_succs.empty())
|
||||
return false;
|
||||
|
||||
/* Since we won't be able to copy linear VGPRs to make space when in control flow, we have to
|
||||
* ensure in advance that there is enough space for normal VGPRs. */
|
||||
unsigned max_vgpr_usage = 0;
|
||||
unsigned next_toplevel = ctx.block->index + 1;
|
||||
for (; !(ctx.program->blocks[next_toplevel].kind & block_kind_top_level); next_toplevel++) {
|
||||
max_vgpr_usage =
|
||||
MAX2(max_vgpr_usage, (unsigned)ctx.program->blocks[next_toplevel].register_demand.vgpr);
|
||||
}
|
||||
|
||||
std::vector<aco_ptr<Instruction>>& instructions =
|
||||
ctx.program->blocks[next_toplevel].instructions;
|
||||
if (!instructions.empty() && is_phi(instructions[0])) {
|
||||
max_vgpr_usage =
|
||||
MAX2(max_vgpr_usage, (unsigned)live_vars.register_demand[next_toplevel][0].vgpr);
|
||||
}
|
||||
|
||||
for (unsigned tmp : find_vars(ctx, reg_file, get_reg_bounds(ctx, RegType::vgpr, true)))
|
||||
max_vgpr_usage -= ctx.assignments[tmp].rc.size();
|
||||
|
||||
return max_vgpr_usage > get_reg_bounds(ctx, RegType::vgpr, false).size;
|
||||
}
|
||||
|
||||
PhysReg
|
||||
get_reg(ra_ctx& ctx, const RegisterFile& reg_file, Temp temp,
|
||||
std::vector<std::pair<Operand, Definition>>& parallelcopies, aco_ptr<Instruction>& instr,
|
||||
|
|
@ -1694,14 +1831,29 @@ get_reg(ra_ctx& ctx, const RegisterFile& reg_file, Temp temp,
|
|||
if (res)
|
||||
return *res;
|
||||
|
||||
/* try using more registers */
|
||||
/* try compacting the linear vgprs to make more space */
|
||||
std::vector<std::pair<Operand, Definition>> pc;
|
||||
if (info.rc.type() == RegType::vgpr && (ctx.block->kind & block_kind_top_level) &&
|
||||
compact_linear_vgprs(ctx, reg_file, pc)) {
|
||||
parallelcopies.insert(parallelcopies.end(), pc.begin(), pc.end());
|
||||
|
||||
/* We don't need to fill the copy definitions in because we don't care about the linear VGPR
|
||||
* space here. */
|
||||
RegisterFile tmp_file(reg_file);
|
||||
for (std::pair<Operand, Definition>& copy : pc)
|
||||
tmp_file.clear(copy.first);
|
||||
|
||||
return get_reg(ctx, tmp_file, temp, parallelcopies, instr, operand_index);
|
||||
}
|
||||
|
||||
/* We should only fail here because keeping under the limit would require
|
||||
* too many moves. */
|
||||
assert(reg_file.count_zero(info.bounds) >= info.size);
|
||||
|
||||
/* try using more registers */
|
||||
if (!increase_register_file(ctx, info.rc)) {
|
||||
/* fallback algorithm: reallocate all variables at once */
|
||||
/* fallback algorithm: reallocate all variables at once (linear VGPRs should already be
|
||||
* compact at the end) */
|
||||
unsigned def_size = info.rc.size();
|
||||
for (Definition def : instr->definitions) {
|
||||
if (ctx.assignments[def.tempId()].assigned && def.regClass().type() == info.rc.type())
|
||||
|
|
@ -2803,8 +2955,9 @@ emit_parallel_copy(ra_ctx& ctx, std::vector<std::pair<Operand, Definition>>& par
|
|||
} /* end namespace */
|
||||
|
||||
void
|
||||
register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra_test_policy policy)
|
||||
register_allocation(Program* program, live& live_vars, ra_test_policy policy)
|
||||
{
|
||||
std::vector<IDSet>& live_out_per_block = live_vars.live_out;
|
||||
ra_ctx ctx(program, policy);
|
||||
get_affinities(ctx, live_out_per_block);
|
||||
|
||||
|
|
@ -2882,19 +3035,17 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
|
|||
continue;
|
||||
}
|
||||
|
||||
/* unconditional branches are handled after phis of the target */
|
||||
std::vector<std::pair<Operand, Definition>> parallelcopy;
|
||||
bool temp_in_scc = register_file[scc];
|
||||
|
||||
if (instr->opcode == aco_opcode::p_branch) {
|
||||
/* last instruction of the block */
|
||||
/* unconditional branches are handled after phis of the target */
|
||||
instructions.emplace_back(std::move(instr));
|
||||
break;
|
||||
}
|
||||
|
||||
std::vector<std::pair<Operand, Definition>> parallelcopy;
|
||||
|
||||
assert(!is_phi(instr));
|
||||
|
||||
bool temp_in_scc = register_file[scc];
|
||||
|
||||
/* handle operands */
|
||||
bool fixed = false;
|
||||
for (unsigned i = 0; i < instr->operands.size(); ++i) {
|
||||
|
|
@ -3004,7 +3155,11 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
|
|||
continue;
|
||||
|
||||
/* find free reg */
|
||||
if (instr->opcode == aco_opcode::p_split_vector) {
|
||||
if (instr->opcode == aco_opcode::p_start_linear_vgpr) {
|
||||
/* Allocation of linear VGPRs is special. */
|
||||
definition->setFixed(alloc_linear_vgpr(ctx, register_file, instr, parallelcopy));
|
||||
update_renames(ctx, register_file, parallelcopy, instr, rename_not_killed_ops);
|
||||
} else if (instr->opcode == aco_opcode::p_split_vector) {
|
||||
PhysReg reg = instr->operands[0].physReg();
|
||||
RegClass rc = definition->regClass();
|
||||
for (unsigned j = 0; j < i; j++)
|
||||
|
|
@ -3162,6 +3317,26 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
|
|||
|
||||
} /* end for Instr */
|
||||
|
||||
if ((block.kind & block_kind_top_level) && block.linear_succs.empty()) {
|
||||
/* Reset this for block_kind_resume. */
|
||||
ASSERTED PhysRegInterval linear_vgpr_bounds = get_reg_bounds(ctx, RegType::vgpr, true);
|
||||
assert(register_file.count_zero(linear_vgpr_bounds) == linear_vgpr_bounds.size);
|
||||
ctx.num_linear_vgprs = 0;
|
||||
} else if (should_compact_linear_vgprs(ctx, live_vars, register_file)) {
|
||||
aco_ptr<Instruction> br = std::move(instructions.back());
|
||||
instructions.pop_back();
|
||||
|
||||
bool temp_in_scc =
|
||||
register_file[scc] || (!br->operands.empty() && br->operands[0].physReg() == scc);
|
||||
|
||||
std::vector<std::pair<Operand, Definition>> parallelcopy;
|
||||
compact_linear_vgprs(ctx, register_file, parallelcopy);
|
||||
update_renames(ctx, register_file, parallelcopy, br, rename_not_killed_ops);
|
||||
emit_parallel_copy(ctx, parallelcopy, br, instructions, temp_in_scc, register_file);
|
||||
|
||||
instructions.push_back(std::move(br));
|
||||
}
|
||||
|
||||
block.instructions = std::move(instructions);
|
||||
} /* end for BB */
|
||||
|
||||
|
|
|
|||
|
|
@ -211,7 +211,7 @@ finish_ra_test(ra_test_policy policy, bool lower)
|
|||
|
||||
program->workgroup_size = program->wave_size;
|
||||
aco::live live_vars = aco::live_var_analysis(program.get());
|
||||
aco::register_allocation(program.get(), live_vars.live_out, policy);
|
||||
aco::register_allocation(program.get(), live_vars, policy);
|
||||
|
||||
if (aco::validate_ra(program.get())) {
|
||||
fail_test("Validation after register allocation failed");
|
||||
|
|
|
|||
|
|
@ -62,8 +62,8 @@ BEGIN_TEST(d3d11_derivs.simple)
|
|||
|
||||
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
|
||||
//>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_
|
||||
//>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_
|
||||
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
|
||||
//>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_
|
||||
//>> image_sample v[#_:#_], v[#rx:#ry], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_2D ; $_ $_
|
||||
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
|
||||
END_TEST
|
||||
|
|
@ -101,8 +101,9 @@ BEGIN_TEST(d3d11_derivs.constant)
|
|||
//>> p_end_linear_vgpr (latekill)(kill)%wqm
|
||||
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
||||
|
||||
//>> v_interp_p2_f32_e32 v#rx, v#_, attr0.x ; $_
|
||||
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
|
||||
//>> v_mov_b32_e32 v#ry, -0.5 ; $_
|
||||
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
|
||||
//>> image_sample v[#_:#_], v[#rx:#ry], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_2D ; $_ $_
|
||||
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
|
||||
END_TEST
|
||||
|
|
@ -173,12 +174,12 @@ BEGIN_TEST(d3d11_derivs.bias)
|
|||
//>> p_end_linear_vgpr (latekill)(kill)%wqm
|
||||
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
||||
|
||||
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
|
||||
//>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_
|
||||
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
|
||||
//>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_
|
||||
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
|
||||
//>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_
|
||||
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
|
||||
//>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_
|
||||
//>> BB1:
|
||||
//>> image_sample_b v[#_:#_], [v2, v#rx, v#ry], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_2D ; $_ $_ $_
|
||||
//>> image_sample_b v[#_:#_], [v#rb, v#rx, v#ry], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_2D ; $_ $_ $_
|
||||
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
|
||||
END_TEST
|
||||
|
||||
|
|
@ -265,9 +266,9 @@ BEGIN_TEST(d3d11_derivs.array)
|
|||
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
|
||||
//>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_
|
||||
//>> v_rndne_f32_e32 v#rl_tmp, v#rl_tmp ; $_
|
||||
//>> v_mov_b32_e32 v#rl, v#rl_tmp ; $_
|
||||
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
|
||||
//>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_
|
||||
//>> v_mov_b32_e32 v#rl, v#rl_tmp ; $_
|
||||
//>> BB1:
|
||||
//; success = rx+1 == ry and rx+2 == rl
|
||||
//>> image_sample v[#_:#_], v[#rx:#rl], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; $_ $_
|
||||
|
|
@ -353,8 +354,9 @@ BEGIN_TEST(d3d11_derivs._1d_gfx9)
|
|||
//>> p_end_linear_vgpr (latekill)(kill)%wqm
|
||||
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
||||
|
||||
//>> v_interp_p2_f32_e32 v#rx, v#_, attr0.x ; $_
|
||||
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
|
||||
//>> v_mov_b32_e32 v#ry, 0.5 ; $_
|
||||
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
|
||||
//; success = rx+1 == ry
|
||||
//>> image_sample v[#_:#_], v#rx, s[#_:#_], s[#_:#_] dmask:0xf ; $_ $_
|
||||
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
|
||||
|
|
@ -398,8 +400,8 @@ BEGIN_TEST(d3d11_derivs._1d_array_gfx9)
|
|||
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
|
||||
//>> v_rndne_f32_e32 v#rl_tmp, v#rl_tmp ; $_
|
||||
//>> v_mov_b32_e32 v#ry, 0.5 ; $_
|
||||
//>> v_mov_b32_e32 v#rl, v#rl_tmp ; $_
|
||||
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
|
||||
//>> v_mov_b32_e32 v#rl, v#rl_tmp ; $_
|
||||
//>> BB1:
|
||||
//; success = rx+1 == ry and rx+2 == rl
|
||||
//>> image_sample v[#_:#_], v#rx, s[#_:#_], s[#_:#_] dmask:0xf da ; $_ $_
|
||||
|
|
@ -442,10 +444,11 @@ BEGIN_TEST(d3d11_derivs.cube)
|
|||
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
||||
|
||||
//>> v_cubeid_f32 v#rf_tmp, v#_, v#_, v#_ ; $_ $_
|
||||
//>> v_fmaak_f32 v#rx, v#_, v#_, 0x3fc00000 ; $_ $_
|
||||
//>> v_fmaak_f32 v#ry_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
|
||||
//>> v_mov_b32_e32 v#rf, v#rf_tmp ; $_
|
||||
//>> v_fmaak_f32 v#rx_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
|
||||
//>> v_fmaak_f32 v#ry_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
|
||||
//>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_
|
||||
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
|
||||
//; success = rx+1 == ry and rx+2 == rf
|
||||
//>> image_sample v[#_:#_], v[#rx:#rf], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; $_ $_
|
||||
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
|
||||
|
|
|
|||
|
|
@ -232,6 +232,8 @@ BEGIN_TEST(regalloc.branch_def_phis_at_merge_block)
|
|||
if (!setup_cs("", GFX10))
|
||||
return;
|
||||
|
||||
program->blocks[0].kind &= ~block_kind_top_level;
|
||||
|
||||
//! s2: %_:s[2-3] = p_branch
|
||||
bld.branch(aco_opcode::p_branch, bld.def(s2));
|
||||
|
||||
|
|
@ -277,6 +279,7 @@ BEGIN_TEST(regalloc.branch_def_phis_at_branch_block)
|
|||
bld.reset(program->create_and_insert_block());
|
||||
program->blocks[3].linear_preds.push_back(1);
|
||||
program->blocks[3].linear_preds.push_back(2);
|
||||
program->blocks[3].kind |= block_kind_top_level;
|
||||
|
||||
finish_ra_test(ra_test_policy());
|
||||
END_TEST
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue