lima/ppir: rework store output

In many cases, it is possible to avoid creating a mov for the store
output node.
Additionally, nodes other than alu, such as load varying, can be valid
store output nodes too.

This is another small optimization, but helps a vast majority of
programs by 1 instruction.
Shaders with discard easily become complicated to handle properly.
Some example issues: ppir has to rely on instruction ordering; or a
node with ssa output could be required both before a discard_if (as a
condition) and after it (as the instruction with the 'stop' bit set).
So don't try to handle them here.

Signed-off-by: Erico Nunes <nunes.erico@gmail.com>
Reviewed-by: Vasily Khoruzhick <anarsoul@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4632>
This commit is contained in:
Erico Nunes 2020-04-14 02:45:38 +02:00
parent 570f1420db
commit 8c47640731
7 changed files with 40 additions and 28 deletions

View file

@ -201,7 +201,6 @@ static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code)
f->op = shift_to_op(alu->shift);
break;
case ppir_op_mov:
case ppir_op_store_color:
f->op = ppir_codegen_vec4_mul_op_mov;
break;
case ppir_op_max:
@ -344,7 +343,6 @@ static void ppir_codegen_encode_vec_add(ppir_node *node, void *code)
f->op = ppir_codegen_vec4_acc_op_add;
break;
case ppir_op_mov:
case ppir_op_store_color:
f->op = ppir_codegen_vec4_acc_op_mov;
break;
case ppir_op_sum3:

View file

@ -178,7 +178,6 @@ ppir_liveness_instr_dest(ppir_compiler *comp, ppir_instr *instr)
switch(node->op) {
case ppir_op_const:
case ppir_op_undef:
case ppir_op_store_color: /* never clear dest if its store output */
continue;
default:
break;

View file

@ -352,7 +352,26 @@ static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
return true;
case nir_intrinsic_store_output: {
alu_node = ppir_node_create_dest(block, ppir_op_store_color, NULL, 0);
/* In simple cases where the store_output is ssa, that register
* can be directly marked as the output.
* If discard is used or the source is not ssa, things can get a
* lot more complicated, so don't try to optimize those and fall
* back to inserting a mov at the end.
* If the source node will only be able to output to pipeline
* registers, fall back to the mov as well. */
if (!block->comp->uses_discard && instr->src->is_ssa) {
node = block->comp->var_nodes[instr->src->ssa->index];
switch (node->op) {
case ppir_op_load_uniform:
case ppir_op_const:
break;
default:
node->is_end = 1;
return true;
}
}
alu_node = ppir_node_create_dest(block, ppir_op_mov, NULL, 0);
if (!alu_node)
return false;
@ -370,6 +389,8 @@ static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
ppir_node_add_src(block->comp, &alu_node->node, alu_node->src, instr->src,
u_bit_consecutive(0, instr->num_components));
alu_node->node.is_end = 1;
list_addtail(&alu_node->node.list, &block->node_list);
return true;
}
@ -724,9 +745,9 @@ static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigne
static void ppir_add_ordering_deps(ppir_compiler *comp)
{
/* Some intrinsics do not have explicit dependencies and thus depend
* on instructions order. Consider discard_if and store_ouput as
* example. If we don't add fake dependency of discard_if to store_output
* scheduler may put store_output first and since store_output terminates
* on instructions order. Consider discard_if and the is_end node as
* example. If we don't add fake dependency of discard_if to is_end,
* scheduler may put the is_end first and since is_end terminates
* shader on Utgard PP, rest of it will never be executed.
* Add fake dependencies for discard/branch/store to preserve
* instruction order.
@ -753,8 +774,8 @@ static void ppir_add_ordering_deps(ppir_compiler *comp)
if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) {
ppir_node_add_dep(prev_node, node, ppir_dep_sequence);
}
if (node->op == ppir_op_discard ||
node->op == ppir_op_store_color ||
if (node->is_end ||
node->op == ppir_op_discard ||
node->op == ppir_op_store_temp ||
node->op == ppir_op_branch) {
prev_node = node;
@ -818,6 +839,7 @@ bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir,
return false;
comp->ra = ra;
comp->uses_discard = nir->info.fs.uses_discard;
/* 1st pass: create ppir blocks */
nir_foreach_function(function, nir) {

View file

@ -313,14 +313,6 @@ const ppir_op_info ppir_op_infos[] = {
.name = "const",
.type = ppir_node_type_const,
},
[ppir_op_store_color] = {
.name = "st_col",
.type = ppir_node_type_alu,
.slots = (int []) {
PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_ALU_VEC_MUL,
PPIR_INSTR_SLOT_END
},
},
[ppir_op_store_temp] = {
.name = "st_temp",
.type = ppir_node_type_store,
@ -632,6 +624,11 @@ ppir_node *ppir_node_insert_mov(ppir_node *node)
ppir_node_add_dep(move, node, ppir_dep_src);
list_addtail(&move->list, &node->list);
if (node->is_end) {
node->is_end = false;
move->is_end = true;
}
return move;
}

View file

@ -75,7 +75,7 @@ static bool ppir_do_node_to_instr_try_insert(ppir_block *block, ppir_node *node)
return ppir_instr_insert_node(succ->instr, node);
}
static bool ppir_do_one_node_to_instr(ppir_block *block, ppir_node *node, ppir_node **next)
static bool ppir_do_one_node_to_instr(ppir_block *block, ppir_node *node)
{
switch (node->type) {
case ppir_node_type_alu:
@ -105,9 +105,6 @@ static bool ppir_do_one_node_to_instr(ppir_block *block, ppir_node *node, ppir_n
if (!node->instr && !create_new_instr(block, node))
return false;
if (node->op == ppir_op_store_color)
node->instr->is_end = true;
break;
}
case ppir_node_type_load:
@ -195,15 +192,13 @@ static bool ppir_do_one_node_to_instr(ppir_block *block, ppir_node *node, ppir_n
static bool ppir_do_node_to_instr(ppir_block *block, ppir_node *node)
{
ppir_node *next = node;
/* first try pipeline sched, if that didn't succeed try normal scheduling */
if (!ppir_do_node_to_instr_try_insert(block, node))
if (!ppir_do_one_node_to_instr(block, node, &next))
if (!ppir_do_one_node_to_instr(block, node))
return false;
/* next may have been updated in ppir_do_one_node_to_instr */
node = next;
if (node->is_end)
node->instr->is_end = true;
/* we have to make sure the dep not be destroyed (due to
* succ change) in ppir_do_node_to_instr, otherwise we can't

View file

@ -108,7 +108,6 @@ typedef enum {
ppir_op_load_temp,
ppir_op_store_temp,
ppir_op_store_color,
ppir_op_const,
@ -162,6 +161,7 @@ typedef struct ppir_node {
struct ppir_instr *instr;
int instr_pos;
struct ppir_block *block;
bool is_end;
/* for scheduler */
struct list_head succ_list;
@ -385,6 +385,7 @@ typedef struct ppir_compiler {
struct ra_regs *ra;
struct lima_fs_shader_state *prog;
bool uses_discard;
/* for scheduler */
int sched_instr_base;

View file

@ -138,7 +138,7 @@ static void ppir_regalloc_update_reglist_ssa(ppir_compiler *comp)
{
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
list_for_each_entry(ppir_node, node, &block->node_list, list) {
if (node->op == ppir_op_store_color)
if (node->is_end)
continue;
if (!node->instr || node->op == ppir_op_const)