lima/ppir: implement discard and discard_if

This commit also adds codegen for branch since we need it
for discard_if.

Reviewed-by: Qiang Yu <yuq825@gmail.com>
Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
This commit is contained in:
Vasily Khoruzhick 2019-05-10 19:17:40 -07:00
parent 7a7be61398
commit af0de6b91c
7 changed files with 253 additions and 10 deletions

View file

@ -507,6 +507,42 @@ static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code)
code[i] = util_float_to_half(constant->value[i].f); code[i] = util_float_to_half(constant->value[i].f);
} }
static void ppir_codegen_encode_discard(ppir_node *node, void *code)
{
ppir_codegen_field_branch *b = code;
assert(node->op = ppir_op_discard);
b->discard.word0 = PPIR_CODEGEN_DISCARD_WORD0;
b->discard.word1 = PPIR_CODEGEN_DISCARD_WORD1;
b->discard.word2 = PPIR_CODEGEN_DISCARD_WORD2;
}
static void ppir_codegen_encode_branch(ppir_node *node, void *code)
{
ppir_codegen_field_branch *b = code;
ppir_branch_node *branch;
ppir_instr *target_instr;
if (node->op == ppir_op_discard) {
ppir_codegen_encode_discard(node, code);
return;
}
assert(node->op = ppir_op_branch);
branch = ppir_node_to_branch(node);
b->branch.unknown_0 = 0x0;
b->branch.arg0_source = ppir_target_get_src_reg_index(&branch->src[0]);
b->branch.arg1_source = ppir_target_get_src_reg_index(&branch->src[1]);
b->branch.cond_gt = branch->cond_gt;
b->branch.cond_eq = branch->cond_eq;
b->branch.cond_lt = branch->cond_lt;
b->branch.unknown_1 = 0x0;
b->branch.unknown_2 = 0x3;
target_instr = list_first_entry(&branch->target->instr_list, ppir_instr, list);
b->branch.target = target_instr->offset - node->instr->offset;
}
typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *); typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *);
static const ppir_codegen_instr_slot_encode_func static const ppir_codegen_instr_slot_encode_func
@ -520,6 +556,7 @@ ppir_codegen_encode_slot[PPIR_INSTR_SLOT_NUM] = {
[PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add, [PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add,
[PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine, [PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine,
[PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp, [PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp,
[PPIR_INSTR_SLOT_BRANCH] = ppir_codegen_encode_branch,
}; };
static const int ppir_codegen_field_size[] = { static const int ppir_codegen_field_size[] = {
@ -634,7 +671,7 @@ static void ppir_codegen_print_prog(ppir_compiler *comp)
printf("========ppir codegen========\n"); printf("========ppir codegen========\n");
list_for_each_entry(ppir_block, block, &comp->block_list, list) { list_for_each_entry(ppir_block, block, &comp->block_list, list) {
list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
printf("%03d: ", instr->index); printf("%03d (@%6ld): ", instr->index, instr->offset);
int n = prog[0] & 0x1f; int n = prog[0] & 0x1f;
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
if (i && i % 6 == 0) if (i && i % 6 == 0)
@ -655,6 +692,7 @@ bool ppir_codegen_prog(ppir_compiler *comp)
int size = 0; int size = 0;
list_for_each_entry(ppir_block, block, &comp->block_list, list) { list_for_each_entry(ppir_block, block, &comp->block_list, list) {
list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
instr->offset = size;
size += get_instr_encode_size(instr); size += get_instr_encode_size(instr);
} }
} }

View file

@ -135,6 +135,20 @@ static bool ppir_instr_insert_const(ppir_const *dst, const ppir_const *src,
return true; return true;
} }
static void ppir_update_src_pipeline(ppir_pipeline pipeline, ppir_src *src,
ppir_dest *dest, uint8_t *swizzle)
{
if (ppir_node_target_equal(src, dest)) {
src->type = ppir_target_pipeline;
src->pipeline = pipeline;
if (swizzle) {
for (int k = 0; k < 4; k++)
src->swizzle[k] = swizzle[src->swizzle[k]];
}
}
}
/* make alu node src reflact the pipeline reg */ /* make alu node src reflact the pipeline reg */
static void ppir_instr_update_src_pipeline(ppir_instr *instr, ppir_pipeline pipeline, static void ppir_instr_update_src_pipeline(ppir_instr *instr, ppir_pipeline pipeline,
ppir_dest *dest, uint8_t *swizzle) ppir_dest *dest, uint8_t *swizzle)
@ -146,15 +160,16 @@ static void ppir_instr_update_src_pipeline(ppir_instr *instr, ppir_pipeline pipe
ppir_alu_node *alu = ppir_node_to_alu(instr->slots[i]); ppir_alu_node *alu = ppir_node_to_alu(instr->slots[i]);
for (int j = 0; j < alu->num_src; j++) { for (int j = 0; j < alu->num_src; j++) {
ppir_src *src = alu->src + j; ppir_src *src = alu->src + j;
if (ppir_node_target_equal(src, dest)) { ppir_update_src_pipeline(pipeline, src, dest, swizzle);
src->type = ppir_target_pipeline; }
src->pipeline = pipeline; }
if (swizzle) { ppir_node *branch_node = instr->slots[PPIR_INSTR_SLOT_BRANCH];
for (int k = 0; k < 4; k++) if (branch_node && (branch_node->type == ppir_node_type_branch)) {
src->swizzle[k] = swizzle[src->swizzle[k]]; ppir_branch_node *branch = ppir_node_to_branch(branch_node);
} for (int j = 0; j < 2; j++) {
} ppir_src *src = branch->src + j;
ppir_update_src_pipeline(pipeline, src, dest, swizzle);
} }
} }
} }
@ -234,6 +249,7 @@ static struct {
[PPIR_INSTR_SLOT_ALU_SCL_ADD] = { 4, "sadd" }, [PPIR_INSTR_SLOT_ALU_SCL_ADD] = { 4, "sadd" },
[PPIR_INSTR_SLOT_ALU_COMBINE] = { 4, "comb" }, [PPIR_INSTR_SLOT_ALU_COMBINE] = { 4, "comb" },
[PPIR_INSTR_SLOT_STORE_TEMP] = { 4, "stor" }, [PPIR_INSTR_SLOT_STORE_TEMP] = { 4, "stor" },
[PPIR_INSTR_SLOT_BRANCH] = { 4, "brch" },
}; };
void ppir_instr_print_list(ppir_compiler *comp) void ppir_instr_print_list(ppir_compiler *comp)

View file

@ -400,6 +400,40 @@ static bool ppir_lower_trunc(ppir_block *block, ppir_node *node)
return true; return true;
} }
static bool ppir_lower_branch(ppir_block *block, ppir_node *node)
{
ppir_branch_node *branch = ppir_node_to_branch(node);
ppir_const_node *zero = ppir_node_create(block, ppir_op_const, -1, 0);
if (!zero)
return false;
list_addtail(&zero->node.list, &node->list);
zero->constant.value[0].f = 0;
zero->constant.num = 1;
zero->dest.type = ppir_target_ssa;
zero->dest.ssa.num_components = 1;
zero->dest.ssa.live_in = INT_MAX;
zero->dest.ssa.live_out = 0;
zero->dest.write_mask = 0x01;
/* For now we're just comparing branch condition with 0,
* in future we should look whether it's possible to move
* comparision node into branch itself and use current
* way as a fallback for complex conditions.
*/
branch->src[1].type = ppir_target_ssa;
branch->src[1].ssa = &zero->dest.ssa;
branch->cond_gt = true;
branch->cond_lt = true;
ppir_node_add_dep(&branch->node, &zero->node);
return true;
}
static bool (*ppir_lower_funcs[ppir_op_num])(ppir_block *, ppir_node *) = { static bool (*ppir_lower_funcs[ppir_op_num])(ppir_block *, ppir_node *) = {
[ppir_op_const] = ppir_lower_const, [ppir_op_const] = ppir_lower_const,
[ppir_op_dot2] = ppir_lower_dot, [ppir_op_dot2] = ppir_lower_dot,
@ -417,6 +451,7 @@ static bool (*ppir_lower_funcs[ppir_op_num])(ppir_block *, ppir_node *) = {
[ppir_op_load_texture] = ppir_lower_texture, [ppir_op_load_texture] = ppir_lower_texture,
[ppir_op_select] = ppir_lower_select, [ppir_op_select] = ppir_lower_select,
[ppir_op_trunc] = ppir_lower_trunc, [ppir_op_trunc] = ppir_lower_trunc,
[ppir_op_branch] = ppir_lower_branch,
}; };
bool ppir_lower_prog(ppir_compiler *comp) bool ppir_lower_prog(ppir_compiler *comp)

View file

@ -204,6 +204,57 @@ static ppir_node *ppir_emit_alu(ppir_block *block, nir_instr *ni)
return &node->node; return &node->node;
} }
static ppir_block *ppir_block_create(ppir_compiler *comp);
static bool ppir_emit_discard_block(ppir_compiler *comp)
{
ppir_block *block = ppir_block_create(comp);
ppir_discard_node *discard;
if (!block)
return false;
comp->discard_block = block;
block->comp = comp;
discard = ppir_node_create(block, ppir_op_discard, -1, 0);
if (discard)
list_addtail(&discard->node.list, &block->node_list);
else
return false;
return true;
}
static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni)
{
nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
ppir_node *node;
ppir_compiler *comp = block->comp;
ppir_branch_node *branch;
if (!comp->discard_block && !ppir_emit_discard_block(comp))
return NULL;
node = ppir_node_create(block, ppir_op_branch, -1, 0);
if (!node)
return NULL;
branch = ppir_node_to_branch(node);
/* second src and condition will be updated during lowering */
ppir_node_add_src(block->comp, node, &branch->src[0],
&instr->src[0], u_bit_consecutive(0, instr->num_components));
branch->target = comp->discard_block;
return node;
}
static ppir_node *ppir_emit_discard(ppir_block *block, nir_instr *ni)
{
ppir_node *node = ppir_node_create(block, ppir_op_discard, -1, 0);
return node;
}
static ppir_node *ppir_emit_intrinsic(ppir_block *block, nir_instr *ni) static ppir_node *ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
{ {
nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni); nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
@ -264,6 +315,12 @@ static ppir_node *ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
return &snode->node; return &snode->node;
case nir_intrinsic_discard:
return ppir_emit_discard(block, ni);
case nir_intrinsic_discard_if:
return ppir_emit_discard_if(block, ni);
default: default:
ppir_error("unsupported nir_intrinsic_instr %s\n", ppir_error("unsupported nir_intrinsic_instr %s\n",
nir_intrinsic_infos[instr->intrinsic].name); nir_intrinsic_infos[instr->intrinsic].name);
@ -452,6 +509,46 @@ static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigne
return comp; return comp;
} }
static void ppir_add_ordering_deps(ppir_compiler *comp)
{
/* Some intrinsics do not have explicit dependencies and thus depend
* on instructions order. Consider discard_if and store_ouput as
* example. If we don't add fake dependency of discard_if to store_output
* scheduler may put store_output first and since store_output terminates
* shader on Utgard PP, rest of it will never be executed.
* Add fake dependencies for discard/branch/store to preserve
* instruction order.
*
* TODO: scheduler should schedule discard_if as early as possible otherwise
* we may end up with suboptimal code for cases like this:
*
* s3 = s1 < s2
* discard_if s3
* s4 = s1 + s2
* store s4
*
* In this case store depends on discard_if and s4, but since dependencies can
* be scheduled in any order it can result in code like this:
*
* instr1: s3 = s1 < s3
* instr2: s4 = s1 + s2
* instr3: discard_if s3
* instr4: store s4
*/
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
ppir_node *prev_node = NULL;
list_for_each_entry(ppir_node, node, &block->node_list, list) {
if (node->type == ppir_node_type_discard ||
node->type == ppir_node_type_store ||
node->type == ppir_node_type_branch) {
if (prev_node)
ppir_node_add_dep(node, prev_node);
prev_node = node;
}
}
}
}
bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir, bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir,
struct ra_regs *ra) struct ra_regs *ra)
{ {
@ -477,6 +574,13 @@ bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir,
if (!ppir_emit_cf_list(comp, &func->body)) if (!ppir_emit_cf_list(comp, &func->body))
goto err_out0; goto err_out0;
/* If we have discard block add it to the very end */
if (comp->discard_block)
list_addtail(&comp->discard_block->list, &comp->block_list);
ppir_add_ordering_deps(comp);
ppir_node_print_prog(comp); ppir_node_print_prog(comp);
if (!ppir_lower_prog(comp)) if (!ppir_lower_prog(comp))

View file

@ -281,6 +281,20 @@ const ppir_op_info ppir_op_infos[] = {
PPIR_INSTR_SLOT_STORE_TEMP, PPIR_INSTR_SLOT_END PPIR_INSTR_SLOT_STORE_TEMP, PPIR_INSTR_SLOT_END
}, },
}, },
[ppir_op_discard] = {
.name = "discard",
.type = ppir_node_type_discard,
.slots = (int []) {
PPIR_INSTR_SLOT_BRANCH, PPIR_INSTR_SLOT_END
},
},
[ppir_op_branch] = {
.name = "branch",
.type = ppir_node_type_branch,
.slots = (int []) {
PPIR_INSTR_SLOT_BRANCH, PPIR_INSTR_SLOT_END
},
},
}; };
void *ppir_node_create(ppir_block *block, ppir_op op, int index, unsigned mask) void *ppir_node_create(ppir_block *block, ppir_op op, int index, unsigned mask)
@ -292,6 +306,8 @@ void *ppir_node_create(ppir_block *block, ppir_op op, int index, unsigned mask)
[ppir_node_type_load] = sizeof(ppir_load_node), [ppir_node_type_load] = sizeof(ppir_load_node),
[ppir_node_type_store] = sizeof(ppir_store_node), [ppir_node_type_store] = sizeof(ppir_store_node),
[ppir_node_type_load_texture] = sizeof(ppir_load_texture_node), [ppir_node_type_load_texture] = sizeof(ppir_load_texture_node),
[ppir_node_type_discard] = sizeof(ppir_discard_node),
[ppir_node_type_branch] = sizeof(ppir_branch_node),
}; };
ppir_node_type type = ppir_op_infos[op].type; ppir_node_type type = ppir_op_infos[op].type;

View file

@ -93,7 +93,8 @@ static bool insert_to_each_succ_instr(ppir_block *block, ppir_node *node)
ppir_node_foreach_succ_safe(node, dep) { ppir_node_foreach_succ_safe(node, dep) {
ppir_node *succ = dep->succ; ppir_node *succ = dep->succ;
assert(succ->type == ppir_node_type_alu); assert(succ->type == ppir_node_type_alu ||
succ->type == ppir_node_type_branch);
if (!ppir_instr_insert_node(succ->instr, node)) { if (!ppir_instr_insert_node(succ->instr, node)) {
/* create a move node to insert for failed node */ /* create a move node to insert for failed node */
@ -323,6 +324,15 @@ static bool ppir_do_node_to_instr(ppir_block *block, ppir_node *node)
node = move; node = move;
break; break;
} }
case ppir_node_type_discard:
if (!create_new_instr(block, node))
return false;
node->instr->is_end = true;
break;
case ppir_node_type_branch:
if (!create_new_instr(block, node))
return false;
break;
default: default:
return false; return false;
} }

View file

@ -108,6 +108,9 @@ typedef enum {
ppir_op_const, ppir_op_const,
ppir_op_discard,
ppir_op_branch,
ppir_op_num, ppir_op_num,
} ppir_op; } ppir_op;
@ -117,6 +120,8 @@ typedef enum {
ppir_node_type_load, ppir_node_type_load,
ppir_node_type_store, ppir_node_type_store,
ppir_node_type_load_texture, ppir_node_type_load_texture,
ppir_node_type_discard,
ppir_node_type_branch,
} ppir_node_type; } ppir_node_type;
typedef struct { typedef struct {
@ -254,6 +259,10 @@ typedef struct {
int sampler_dim; int sampler_dim;
} ppir_load_texture_node; } ppir_load_texture_node;
typedef struct {
ppir_node node;
} ppir_discard_node;
enum ppir_instr_slot { enum ppir_instr_slot {
PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_VARYING,
PPIR_INSTR_SLOT_TEXLD, PPIR_INSTR_SLOT_TEXLD,
@ -264,6 +273,7 @@ enum ppir_instr_slot {
PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_SCL_ADD,
PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_ALU_COMBINE,
PPIR_INSTR_SLOT_STORE_TEMP, PPIR_INSTR_SLOT_STORE_TEMP,
PPIR_INSTR_SLOT_BRANCH,
PPIR_INSTR_SLOT_NUM, PPIR_INSTR_SLOT_NUM,
PPIR_INSTR_SLOT_END, PPIR_INSTR_SLOT_END,
PPIR_INSTR_SLOT_ALU_START = PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_START = PPIR_INSTR_SLOT_ALU_VEC_MUL,
@ -287,6 +297,7 @@ typedef struct ppir_instr {
int est; /* earliest start time */ int est; /* earliest start time */
int parent_index; int parent_index;
bool scheduled; bool scheduled;
off_t offset;
} ppir_instr; } ppir_instr;
typedef struct ppir_block { typedef struct ppir_block {
@ -300,6 +311,15 @@ typedef struct ppir_block {
int sched_instr_base; int sched_instr_base;
} ppir_block; } ppir_block;
typedef struct {
ppir_node node;
ppir_src src[2];
bool cond_gt;
bool cond_eq;
bool cond_lt;
ppir_block *target;
} ppir_branch_node;
struct ra_regs; struct ra_regs;
struct lima_fs_shader_state; struct lima_fs_shader_state;
@ -322,6 +342,8 @@ typedef struct ppir_compiler {
/* for regalloc spilling debug */ /* for regalloc spilling debug */
int force_spilling; int force_spilling;
ppir_block *discard_block;
} ppir_compiler; } ppir_compiler;
void *ppir_node_create(ppir_block *block, ppir_op op, int index, unsigned mask); void *ppir_node_create(ppir_block *block, ppir_op op, int index, unsigned mask);
@ -377,6 +399,8 @@ static inline ppir_node *ppir_node_first_pred(ppir_node *node)
#define ppir_node_to_load(node) ((ppir_load_node *)(node)) #define ppir_node_to_load(node) ((ppir_load_node *)(node))
#define ppir_node_to_store(node) ((ppir_store_node *)(node)) #define ppir_node_to_store(node) ((ppir_store_node *)(node))
#define ppir_node_to_load_texture(node) ((ppir_load_texture_node *)(node)) #define ppir_node_to_load_texture(node) ((ppir_load_texture_node *)(node))
#define ppir_node_to_discard(node) ((ppir_discard_node *)(node))
#define ppir_node_to_branch(node) ((ppir_branch_node *)(node))
static inline ppir_dest *ppir_node_get_dest(ppir_node *node) static inline ppir_dest *ppir_node_get_dest(ppir_node *node)
{ {