aco: Generalize vs_inputs to args_pending_vmem.

Handle arguments that need a waitcnt without relying on
RADV specific VS input information.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21696>
This commit is contained in:
Timur Kristóf 2023-03-02 17:30:49 -08:00 committed by Marge Bot
parent 1583bea9da
commit 3058ab6090
4 changed files with 17 additions and 28 deletions

View file

@ -1032,11 +1032,9 @@ insert_wait_states(Program* program)
std::stack<unsigned, std::vector<unsigned>> loop_header_indices;
unsigned loop_progress = 0;
if (program->stage.has(SWStage::VS) && program->info.vs.dynamic_inputs) {
for (Definition def : program->vs_inputs) {
update_counters(in_ctx[0], event_vmem);
insert_wait_entry(in_ctx[0], def, event_vmem);
}
for (Definition def : program->args_pending_vmem) {
update_counters(in_ctx[0], event_vmem);
insert_wait_entry(in_ctx[0], def, event_vmem);
}
for (unsigned i = 0; i < program->blocks.size();) {

View file

@ -11271,10 +11271,15 @@ add_startpgm(struct isel_context* ctx)
ctx->arg_temps[i] = create_vec_from_array(ctx, elems, size, RegType::sgpr, 4);
} else {
Temp dst = ctx->program->allocateTmp(type);
Definition def(dst);
def.setFixed(PhysReg{file == AC_ARG_SGPR ? reg : reg + 256});
ctx->arg_temps[i] = dst;
startpgm->definitions[arg] = Definition(dst);
startpgm->definitions[arg].setFixed(PhysReg{file == AC_ARG_SGPR ? reg : reg + 256});
arg++;
startpgm->definitions[arg++] = def;
if (ctx->args->ac.args[i].pending_vmem) {
assert(file == AC_ARG_VGPR);
ctx->program->args_pending_vmem.push_back(def);
}
}
}
@ -11294,18 +11299,6 @@ add_startpgm(struct isel_context* ctx)
}
}
if (ctx->stage.has(SWStage::VS) && ctx->program->info.vs.dynamic_inputs) {
unsigned num_attributes = util_last_bit(ctx->program->info.vs.input_slot_usage_mask);
for (unsigned i = 0; i < num_attributes; i++) {
Definition def(get_arg(ctx, ctx->args->vs_inputs[i]));
unsigned idx = ctx->args->vs_inputs[i].arg_index;
def.setFixed(PhysReg(256 + ctx->args->ac.args[idx].offset));
ctx->program->vs_inputs.push_back(def);
}
}
return startpgm;
}

View file

@ -2120,7 +2120,7 @@ public:
unsigned next_divergent_if_logical_depth = 0;
unsigned next_uniform_if_depth = 0;
std::vector<Definition> vs_inputs;
std::vector<Definition> args_pending_vmem;
struct {
FILE* output = stderr;

View file

@ -544,13 +544,11 @@ collect_preasm_stats(Program* program)
double usage[(int)BlockCycleEstimator::resource_count] = {0};
std::vector<BlockCycleEstimator> blocks(program->blocks.size(), program);
if (program->stage.has(SWStage::VS) && program->info.vs.has_prolog) {
unsigned vs_input_latency = 320;
for (Definition def : program->vs_inputs) {
blocks[0].vm.push_back(vs_input_latency);
for (unsigned i = 0; i < def.size(); i++)
blocks[0].reg_available[def.physReg().reg() + i] = vs_input_latency;
}
constexpr const unsigned vmem_latency = 320;
for (const Definition def : program->args_pending_vmem) {
blocks[0].vm.push_back(vmem_latency);
for (unsigned i = 0; i < def.size(); i++)
blocks[0].reg_available[def.physReg().reg() + i] = vmem_latency;
}
for (Block& block : program->blocks) {