mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 07:28:11 +02:00
lima/ppir: rework store output
In many cases, it is possible to avoid creating a mov for the store output node. Additionally, nodes other than alu, such as load varying, can be valid store output nodes too. This is another small optimization, but helps a vast majority of programs by 1 instruction. Shaders with discard easily become complicated to handle properly. Some example issues: ppir has to rely on instruction ordering; or a node with ssa output could be required both before a discard_if (as a condition) and after it (as the instruction with the 'stop' bit set). So don't try to handle them here. Signed-off-by: Erico Nunes <nunes.erico@gmail.com> Reviewed-by: Vasily Khoruzhick <anarsoul@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4632>
This commit is contained in:
parent
570f1420db
commit
8c47640731
7 changed files with 40 additions and 28 deletions
|
|
@ -201,7 +201,6 @@ static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code)
|
|||
f->op = shift_to_op(alu->shift);
|
||||
break;
|
||||
case ppir_op_mov:
|
||||
case ppir_op_store_color:
|
||||
f->op = ppir_codegen_vec4_mul_op_mov;
|
||||
break;
|
||||
case ppir_op_max:
|
||||
|
|
@ -344,7 +343,6 @@ static void ppir_codegen_encode_vec_add(ppir_node *node, void *code)
|
|||
f->op = ppir_codegen_vec4_acc_op_add;
|
||||
break;
|
||||
case ppir_op_mov:
|
||||
case ppir_op_store_color:
|
||||
f->op = ppir_codegen_vec4_acc_op_mov;
|
||||
break;
|
||||
case ppir_op_sum3:
|
||||
|
|
|
|||
|
|
@ -178,7 +178,6 @@ ppir_liveness_instr_dest(ppir_compiler *comp, ppir_instr *instr)
|
|||
switch(node->op) {
|
||||
case ppir_op_const:
|
||||
case ppir_op_undef:
|
||||
case ppir_op_store_color: /* never clear dest if its store output */
|
||||
continue;
|
||||
default:
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -352,7 +352,26 @@ static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
|
|||
return true;
|
||||
|
||||
case nir_intrinsic_store_output: {
|
||||
alu_node = ppir_node_create_dest(block, ppir_op_store_color, NULL, 0);
|
||||
/* In simple cases where the store_output is ssa, that register
|
||||
* can be directly marked as the output.
|
||||
* If discard is used or the source is not ssa, things can get a
|
||||
* lot more complicated, so don't try to optimize those and fall
|
||||
* back to inserting a mov at the end.
|
||||
* If the source node will only be able to output to pipeline
|
||||
* registers, fall back to the mov as well. */
|
||||
if (!block->comp->uses_discard && instr->src->is_ssa) {
|
||||
node = block->comp->var_nodes[instr->src->ssa->index];
|
||||
switch (node->op) {
|
||||
case ppir_op_load_uniform:
|
||||
case ppir_op_const:
|
||||
break;
|
||||
default:
|
||||
node->is_end = 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
alu_node = ppir_node_create_dest(block, ppir_op_mov, NULL, 0);
|
||||
if (!alu_node)
|
||||
return false;
|
||||
|
||||
|
|
@ -370,6 +389,8 @@ static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
|
|||
ppir_node_add_src(block->comp, &alu_node->node, alu_node->src, instr->src,
|
||||
u_bit_consecutive(0, instr->num_components));
|
||||
|
||||
alu_node->node.is_end = 1;
|
||||
|
||||
list_addtail(&alu_node->node.list, &block->node_list);
|
||||
return true;
|
||||
}
|
||||
|
|
@ -724,9 +745,9 @@ static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigne
|
|||
static void ppir_add_ordering_deps(ppir_compiler *comp)
|
||||
{
|
||||
/* Some intrinsics do not have explicit dependencies and thus depend
|
||||
* on instructions order. Consider discard_if and store_ouput as
|
||||
* example. If we don't add fake dependency of discard_if to store_output
|
||||
* scheduler may put store_output first and since store_output terminates
|
||||
* on instructions order. Consider discard_if and the is_end node as
|
||||
* example. If we don't add fake dependency of discard_if to is_end,
|
||||
* scheduler may put the is_end first and since is_end terminates
|
||||
* shader on Utgard PP, rest of it will never be executed.
|
||||
* Add fake dependencies for discard/branch/store to preserve
|
||||
* instruction order.
|
||||
|
|
@ -753,8 +774,8 @@ static void ppir_add_ordering_deps(ppir_compiler *comp)
|
|||
if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) {
|
||||
ppir_node_add_dep(prev_node, node, ppir_dep_sequence);
|
||||
}
|
||||
if (node->op == ppir_op_discard ||
|
||||
node->op == ppir_op_store_color ||
|
||||
if (node->is_end ||
|
||||
node->op == ppir_op_discard ||
|
||||
node->op == ppir_op_store_temp ||
|
||||
node->op == ppir_op_branch) {
|
||||
prev_node = node;
|
||||
|
|
@ -818,6 +839,7 @@ bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir,
|
|||
return false;
|
||||
|
||||
comp->ra = ra;
|
||||
comp->uses_discard = nir->info.fs.uses_discard;
|
||||
|
||||
/* 1st pass: create ppir blocks */
|
||||
nir_foreach_function(function, nir) {
|
||||
|
|
|
|||
|
|
@ -313,14 +313,6 @@ const ppir_op_info ppir_op_infos[] = {
|
|||
.name = "const",
|
||||
.type = ppir_node_type_const,
|
||||
},
|
||||
[ppir_op_store_color] = {
|
||||
.name = "st_col",
|
||||
.type = ppir_node_type_alu,
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_ALU_VEC_MUL,
|
||||
PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_store_temp] = {
|
||||
.name = "st_temp",
|
||||
.type = ppir_node_type_store,
|
||||
|
|
@ -632,6 +624,11 @@ ppir_node *ppir_node_insert_mov(ppir_node *node)
|
|||
ppir_node_add_dep(move, node, ppir_dep_src);
|
||||
list_addtail(&move->list, &node->list);
|
||||
|
||||
if (node->is_end) {
|
||||
node->is_end = false;
|
||||
move->is_end = true;
|
||||
}
|
||||
|
||||
return move;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ static bool ppir_do_node_to_instr_try_insert(ppir_block *block, ppir_node *node)
|
|||
return ppir_instr_insert_node(succ->instr, node);
|
||||
}
|
||||
|
||||
static bool ppir_do_one_node_to_instr(ppir_block *block, ppir_node *node, ppir_node **next)
|
||||
static bool ppir_do_one_node_to_instr(ppir_block *block, ppir_node *node)
|
||||
{
|
||||
switch (node->type) {
|
||||
case ppir_node_type_alu:
|
||||
|
|
@ -105,9 +105,6 @@ static bool ppir_do_one_node_to_instr(ppir_block *block, ppir_node *node, ppir_n
|
|||
if (!node->instr && !create_new_instr(block, node))
|
||||
return false;
|
||||
|
||||
if (node->op == ppir_op_store_color)
|
||||
node->instr->is_end = true;
|
||||
|
||||
break;
|
||||
}
|
||||
case ppir_node_type_load:
|
||||
|
|
@ -195,15 +192,13 @@ static bool ppir_do_one_node_to_instr(ppir_block *block, ppir_node *node, ppir_n
|
|||
|
||||
static bool ppir_do_node_to_instr(ppir_block *block, ppir_node *node)
|
||||
{
|
||||
ppir_node *next = node;
|
||||
|
||||
/* first try pipeline sched, if that didn't succeed try normal scheduling */
|
||||
if (!ppir_do_node_to_instr_try_insert(block, node))
|
||||
if (!ppir_do_one_node_to_instr(block, node, &next))
|
||||
if (!ppir_do_one_node_to_instr(block, node))
|
||||
return false;
|
||||
|
||||
/* next may have been updated in ppir_do_one_node_to_instr */
|
||||
node = next;
|
||||
if (node->is_end)
|
||||
node->instr->is_end = true;
|
||||
|
||||
/* we have to make sure the dep not be destroyed (due to
|
||||
* succ change) in ppir_do_node_to_instr, otherwise we can't
|
||||
|
|
|
|||
|
|
@ -108,7 +108,6 @@ typedef enum {
|
|||
ppir_op_load_temp,
|
||||
|
||||
ppir_op_store_temp,
|
||||
ppir_op_store_color,
|
||||
|
||||
ppir_op_const,
|
||||
|
||||
|
|
@ -162,6 +161,7 @@ typedef struct ppir_node {
|
|||
struct ppir_instr *instr;
|
||||
int instr_pos;
|
||||
struct ppir_block *block;
|
||||
bool is_end;
|
||||
|
||||
/* for scheduler */
|
||||
struct list_head succ_list;
|
||||
|
|
@ -385,6 +385,7 @@ typedef struct ppir_compiler {
|
|||
|
||||
struct ra_regs *ra;
|
||||
struct lima_fs_shader_state *prog;
|
||||
bool uses_discard;
|
||||
|
||||
/* for scheduler */
|
||||
int sched_instr_base;
|
||||
|
|
|
|||
|
|
@ -138,7 +138,7 @@ static void ppir_regalloc_update_reglist_ssa(ppir_compiler *comp)
|
|||
{
|
||||
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(ppir_node, node, &block->node_list, list) {
|
||||
if (node->op == ppir_op_store_color)
|
||||
if (node->is_end)
|
||||
continue;
|
||||
|
||||
if (!node->instr || node->op == ppir_op_const)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue