mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 05:18:08 +02:00
lima/ppir: implement gl_FragDepth support
Mali4x0 supports writing depth and stencil from fragment shader and we've been using it quite a while for depth/stencil buffer reload. The missing part was specifying output register for depth/stencil. To figure it out, I changed reload shader to use register $4 as output and poked RSW bits (or rather consecutive 4 bit groups) until tests that rely on reload started to pass again. It turns out that register number for gl_FragDepth/gl_FragStencil is in rsw->depth_test and register number for gl_FragColor is in rsw->multi_sample and it's repeated 4 times for some reason (likely for MSAA?) With this knowledge we now can modify ppir compiler to support multiple store_output intrinsics. To do that just add destination SSA for store_output to the registers list for regalloc and mark them explicitly as output. Since it's never read in shader we have to take care about it in liveness analysis - basically just mark it alive from the time when it's written to the end of the block. If it's live only in the last instruction, mark it as live_internal, so regalloc doesn't clobber it. Then just let regalloc do its job, and then copy register number to the shader state and program it in RSW. The tricky part is gl_FragStencil, since it resides in the same register as gl_FragDepth and with the current design of the compiler it's hard to merge them. However gl_FragStencil doesn't seem to be part of GL2 or GLES2, so we can just leave it not implemented. Also we need to take care of stop bit for instructions - now we can't just set it in every instruction that stores output, since there may be several outputs. So if there's any store_output instructions in the block just mark that block has a stop, and set stop bit in the last instruction in the block. The only exception is discard - we always need to set stop bit in discard instruction. Reviewed-by: Andreas Baierl <ichgeh@imkreisrum.de> Reviewed-by: Erico Nunes <nunes.erico@gmail.com> Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13830>
This commit is contained in:
parent
98a7c4c6f8
commit
3b15fb3575
11 changed files with 144 additions and 37 deletions
|
|
@ -773,7 +773,7 @@ static int encode_instr(ppir_instr *instr, void *code, void *last_code)
|
|||
size = align_to_word(size) + 1;
|
||||
|
||||
ctrl->count = size;
|
||||
if (instr->is_end)
|
||||
if (instr->stop)
|
||||
ctrl->stop = true;
|
||||
|
||||
if (last_code) {
|
||||
|
|
@ -818,6 +818,11 @@ bool ppir_codegen_prog(ppir_compiler *comp)
|
|||
instr->encode_size = get_instr_encode_size(instr);
|
||||
size += instr->encode_size;
|
||||
}
|
||||
/* Set stop flag for the last instruction if block has stop flag */
|
||||
if (block->stop) {
|
||||
ppir_instr *instr = list_last_entry(&block->instr_list, ppir_instr, list);
|
||||
instr->stop = true;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t *prog = rzalloc_size(comp->prog, size * sizeof(uint32_t));
|
||||
|
|
|
|||
|
|
@ -284,7 +284,7 @@ void ppir_instr_print_list(ppir_compiler *comp)
|
|||
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
|
||||
printf("-------block %3d-------\n", block->index);
|
||||
list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
|
||||
printf("%c%03d: ", instr->is_end ? '*' : ' ', instr->index);
|
||||
printf("%c%03d: ", instr->stop ? '*' : ' ', instr->index);
|
||||
for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
|
||||
ppir_node *node = instr->slots[i];
|
||||
if (node)
|
||||
|
|
|
|||
|
|
@ -121,7 +121,7 @@ ppir_liveness_instr_srcs(ppir_compiler *comp, ppir_instr *instr)
|
|||
/* Update the liveness information of the instruction by removing its
|
||||
* dests from the live_in set. */
|
||||
static void
|
||||
ppir_liveness_instr_dest(ppir_compiler *comp, ppir_instr *instr)
|
||||
ppir_liveness_instr_dest(ppir_compiler *comp, ppir_instr *instr, ppir_instr *last)
|
||||
{
|
||||
for (int i = PPIR_INSTR_SLOT_NUM-1; i >= 0; i--) {
|
||||
ppir_node *node = instr->slots[i];
|
||||
|
|
@ -146,9 +146,18 @@ ppir_liveness_instr_dest(ppir_compiler *comp, ppir_instr *instr)
|
|||
unsigned int index = reg->regalloc_index;
|
||||
bool live = BITSET_TEST(instr->live_set, index);
|
||||
|
||||
/* If it's an out reg, it's alive till the end of the block, so add it
|
||||
* to live_set of the last instruction */
|
||||
if (!live && reg->out_reg && (instr != last)) {
|
||||
BITSET_SET(last->live_set, index);
|
||||
BITSET_CLEAR(instr->live_set, index);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* If a register is written but wasn't read in a later instruction, it is
|
||||
* either dead code or a bug. For now, assign an interference to it to
|
||||
* ensure it doesn't get assigned a live register and overwrites it. */
|
||||
* either an output register in last instruction, dead code or a bug.
|
||||
* For now, assign an interference to it to ensure it doesn't get assigned
|
||||
* a live register and overwrites it. */
|
||||
if (!live) {
|
||||
BITSET_SET(instr->live_internal, index);
|
||||
continue;
|
||||
|
|
@ -230,7 +239,7 @@ ppir_liveness_compute_live_sets(ppir_compiler *comp)
|
|||
instr->live_mask, next_instr->live_mask);
|
||||
}
|
||||
|
||||
ppir_liveness_instr_dest(comp, instr);
|
||||
ppir_liveness_instr_dest(comp, instr, last);
|
||||
ppir_liveness_instr_srcs(comp, instr);
|
||||
|
||||
cont |= !ppir_liveness_set_equal(comp,
|
||||
|
|
|
|||
|
|
@ -345,6 +345,18 @@ static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
|
|||
* back to inserting a mov at the end.
|
||||
* If the source node will only be able to output to pipeline
|
||||
* registers, fall back to the mov as well. */
|
||||
assert(nir_src_is_const(instr->src[1]) &&
|
||||
"lima doesn't support indirect outputs");
|
||||
|
||||
nir_io_semantics io = nir_intrinsic_io_semantics(instr);
|
||||
unsigned offset = nir_src_as_uint(instr->src[1]);
|
||||
unsigned slot = io.location + offset;
|
||||
ppir_output_type out_type = ppir_nir_output_to_ppir(slot);
|
||||
if (out_type == ppir_output_invalid) {
|
||||
ppir_debug("Unsupported output type: %d\n", slot);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!block->comp->uses_discard && instr->src->is_ssa) {
|
||||
node = block->comp->var_nodes[instr->src->ssa->index];
|
||||
switch (node->op) {
|
||||
|
|
@ -352,9 +364,12 @@ static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
|
|||
case ppir_op_load_texture:
|
||||
case ppir_op_const:
|
||||
break;
|
||||
default:
|
||||
node->is_end = 1;
|
||||
default: {
|
||||
ppir_dest *dest = ppir_node_get_dest(node);
|
||||
dest->ssa.out_type = out_type;
|
||||
node->is_out = 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -367,6 +382,7 @@ static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
|
|||
dest->ssa.num_components = instr->num_components;
|
||||
dest->ssa.index = 0;
|
||||
dest->write_mask = u_bit_consecutive(0, instr->num_components);
|
||||
dest->ssa.out_type = out_type;
|
||||
|
||||
alu_node->num_src = 1;
|
||||
|
||||
|
|
@ -376,7 +392,7 @@ static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
|
|||
ppir_node_add_src(block->comp, &alu_node->node, alu_node->src, instr->src,
|
||||
u_bit_consecutive(0, instr->num_components));
|
||||
|
||||
alu_node->node.is_end = 1;
|
||||
alu_node->node.is_out = 1;
|
||||
|
||||
list_addtail(&alu_node->node.list, &block->node_list);
|
||||
return true;
|
||||
|
|
@ -798,6 +814,7 @@ static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigne
|
|||
comp->var_nodes = (ppir_node **)(comp + 1);
|
||||
comp->reg_base = num_ssa;
|
||||
comp->prog = prog;
|
||||
|
||||
return comp;
|
||||
}
|
||||
|
||||
|
|
@ -833,7 +850,7 @@ static void ppir_add_ordering_deps(ppir_compiler *comp)
|
|||
if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) {
|
||||
ppir_node_add_dep(prev_node, node, ppir_dep_sequence);
|
||||
}
|
||||
if (node->is_end ||
|
||||
if (node->is_out ||
|
||||
node->op == ppir_op_discard ||
|
||||
node->op == ppir_op_store_temp ||
|
||||
node->op == ppir_op_branch) {
|
||||
|
|
@ -930,18 +947,11 @@ bool ppir_compile_nir(struct lima_fs_compiled_shader *prog, struct nir_shader *n
|
|||
}
|
||||
}
|
||||
|
||||
/* Validate outputs, we support only gl_FragColor */
|
||||
nir_foreach_shader_out_variable(var, nir) {
|
||||
switch (var->data.location) {
|
||||
case FRAG_RESULT_COLOR:
|
||||
case FRAG_RESULT_DATA0:
|
||||
break;
|
||||
default:
|
||||
ppir_error("unsupported output type\n");
|
||||
goto err_out0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
comp->out_type_to_reg = rzalloc_size(comp, sizeof(int) * ppir_output_num);
|
||||
|
||||
/* -1 means reg is not written by the shader */
|
||||
for (int i = 0; i < ppir_output_num; i++)
|
||||
comp->out_type_to_reg[i] = -1;
|
||||
|
||||
foreach_list_typed(nir_register, reg, node, &func->registers) {
|
||||
ppir_reg *r = rzalloc(comp, ppir_reg);
|
||||
|
|
|
|||
|
|
@ -618,9 +618,9 @@ static ppir_node *ppir_node_insert_mov_local(ppir_node *node)
|
|||
ppir_node_add_dep(move, node, ppir_dep_src);
|
||||
list_addtail(&move->list, &node->list);
|
||||
|
||||
if (node->is_end) {
|
||||
node->is_end = false;
|
||||
move->is_end = true;
|
||||
if (node->is_out) {
|
||||
node->is_out = false;
|
||||
move->is_out = true;
|
||||
}
|
||||
|
||||
return move;
|
||||
|
|
|
|||
|
|
@ -203,7 +203,7 @@ static bool ppir_do_one_node_to_instr(ppir_block *block, ppir_node *node)
|
|||
case ppir_node_type_discard:
|
||||
if (!create_new_instr(block, node))
|
||||
return false;
|
||||
node->instr->is_end = true;
|
||||
block->stop = true;
|
||||
break;
|
||||
case ppir_node_type_branch:
|
||||
if (!create_new_instr(block, node))
|
||||
|
|
@ -276,8 +276,13 @@ static bool ppir_do_node_to_instr(ppir_block *block, ppir_node *root)
|
|||
if (!ppir_do_one_node_to_instr(block, node))
|
||||
return false;
|
||||
|
||||
if (node->is_end)
|
||||
node->instr->is_end = true;
|
||||
/* The node writes output register. We can't stop at this exact
|
||||
* instruction because there may be another node that writes another
|
||||
* output, so set stop flag for the block. We will set stop flag on
|
||||
* the last instruction of the block during codegen
|
||||
*/
|
||||
if (node->is_out)
|
||||
block->stop = true;
|
||||
|
||||
ppir_node_foreach_pred(node, dep) {
|
||||
ppir_node *pred = dep->pred;
|
||||
|
|
|
|||
|
|
@ -161,7 +161,7 @@ typedef struct ppir_node {
|
|||
struct ppir_instr *instr;
|
||||
int instr_pos;
|
||||
struct ppir_block *block;
|
||||
bool is_end;
|
||||
bool is_out;
|
||||
bool succ_different_block;
|
||||
|
||||
/* for scheduler */
|
||||
|
|
@ -179,9 +179,42 @@ typedef enum {
|
|||
ppir_pipeline_reg_discard, /* varying load */
|
||||
} ppir_pipeline;
|
||||
|
||||
typedef enum {
|
||||
ppir_output_color,
|
||||
ppir_output_depth,
|
||||
ppir_output_num,
|
||||
ppir_output_invalid = -1,
|
||||
} ppir_output_type;
|
||||
|
||||
static inline const char *ppir_output_type_to_str(ppir_output_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case ppir_output_color:
|
||||
return "OUTPUT_COLOR";
|
||||
case ppir_output_depth:
|
||||
return "OUTPUT_DEPTH";
|
||||
default:
|
||||
return "INVALID";
|
||||
}
|
||||
}
|
||||
|
||||
static inline ppir_output_type ppir_nir_output_to_ppir(gl_frag_result res)
|
||||
{
|
||||
switch (res) {
|
||||
case FRAG_RESULT_COLOR:
|
||||
case FRAG_RESULT_DATA0:
|
||||
return ppir_output_color;
|
||||
case FRAG_RESULT_DEPTH:
|
||||
return ppir_output_depth;
|
||||
default:
|
||||
return ppir_output_invalid;
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct ppir_reg {
|
||||
struct list_head list;
|
||||
int index;
|
||||
ppir_output_type out_type;
|
||||
int regalloc_index;
|
||||
int num_components;
|
||||
|
||||
|
|
@ -191,6 +224,7 @@ typedef struct ppir_reg {
|
|||
bool is_head;
|
||||
bool spilled;
|
||||
bool undef;
|
||||
bool out_reg;
|
||||
} ppir_reg;
|
||||
|
||||
typedef enum {
|
||||
|
|
@ -316,7 +350,7 @@ typedef struct ppir_instr {
|
|||
|
||||
ppir_node *slots[PPIR_INSTR_SLOT_NUM];
|
||||
ppir_const constant[2];
|
||||
bool is_end;
|
||||
bool stop;
|
||||
|
||||
/* for scheduler */
|
||||
struct list_head succ_list;
|
||||
|
|
@ -340,6 +374,7 @@ typedef struct ppir_block {
|
|||
struct list_head list;
|
||||
struct list_head node_list;
|
||||
struct list_head instr_list;
|
||||
bool stop;
|
||||
|
||||
struct ppir_block *successors[2];
|
||||
|
||||
|
|
@ -370,6 +405,7 @@ typedef struct ppir_compiler {
|
|||
struct hash_table_u64 *blocks;
|
||||
int cur_index;
|
||||
int cur_instr_index;
|
||||
int *out_type_to_reg;
|
||||
|
||||
struct list_head reg_list;
|
||||
int reg_num;
|
||||
|
|
|
|||
|
|
@ -82,9 +82,6 @@ static void ppir_regalloc_update_reglist_ssa(ppir_compiler *comp)
|
|||
{
|
||||
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(ppir_node, node, &block->node_list, list) {
|
||||
if (node->is_end)
|
||||
continue;
|
||||
|
||||
if (!node->instr || node->op == ppir_op_const)
|
||||
continue;
|
||||
|
||||
|
|
@ -94,6 +91,8 @@ static void ppir_regalloc_update_reglist_ssa(ppir_compiler *comp)
|
|||
|
||||
if (dest->type == ppir_target_ssa) {
|
||||
reg = &dest->ssa;
|
||||
if (node->is_out)
|
||||
reg->out_reg = true;
|
||||
list_addtail(®->list, &comp->reg_list);
|
||||
comp->reg_num++;
|
||||
}
|
||||
|
|
@ -133,6 +132,14 @@ static void ppir_regalloc_print_result(ppir_compiler *comp)
|
|||
}
|
||||
}
|
||||
printf("--------------------------\n");
|
||||
|
||||
printf("======ppir output regs======\n");
|
||||
for (int i = 0; i < ppir_output_num; i++) {
|
||||
if (comp->out_type_to_reg[i] != -1)
|
||||
printf("%s: $%d\n", ppir_output_type_to_str(i),
|
||||
(int)comp->out_type_to_reg[i]);
|
||||
}
|
||||
printf("--------------------------\n");
|
||||
}
|
||||
|
||||
static bool create_new_instr_after(ppir_block *block, ppir_instr *ref,
|
||||
|
|
@ -578,6 +585,11 @@ static bool ppir_regalloc_prog_try(ppir_compiler *comp, bool *spilled)
|
|||
n = 0;
|
||||
list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
|
||||
reg->index = ra_get_node_reg(g, n++);
|
||||
if (reg->out_reg) {
|
||||
/* We need actual reg number, we don't have swizzle for output regs */
|
||||
assert(!(reg->index & 0x3) && "ppir: output regs don't have swizzle");
|
||||
comp->out_type_to_reg[reg->out_type] = reg->index / 4;
|
||||
}
|
||||
}
|
||||
|
||||
ralloc_free(g);
|
||||
|
|
@ -604,8 +616,11 @@ bool ppir_regalloc_prog(ppir_compiler *comp)
|
|||
ppir_regalloc_update_reglist_ssa(comp);
|
||||
|
||||
/* No registers? Probably shader consists of discard instruction */
|
||||
if (list_is_empty(&comp->reg_list))
|
||||
if (list_is_empty(&comp->reg_list)) {
|
||||
comp->prog->state.frag_color_reg = 0;
|
||||
comp->prog->state.frag_depth_reg = -1;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* this will most likely succeed in the first
|
||||
* try, except for very complicated shaders */
|
||||
|
|
@ -613,5 +628,10 @@ bool ppir_regalloc_prog(ppir_compiler *comp)
|
|||
if (!spilled)
|
||||
return false;
|
||||
|
||||
comp->prog->state.frag_color_reg =
|
||||
comp->out_type_to_reg[ppir_output_color];
|
||||
comp->prog->state.frag_depth_reg =
|
||||
comp->out_type_to_reg[ppir_output_depth];
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -49,6 +49,8 @@ struct lima_fs_compiled_shader {
|
|||
struct {
|
||||
int shader_size;
|
||||
int stack_size;
|
||||
int frag_color_reg;
|
||||
int frag_depth_reg;
|
||||
bool uses_discard;
|
||||
} state;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -677,6 +677,12 @@ lima_pack_render_state(struct lima_context *ctx, const struct pipe_draw_info *in
|
|||
if (!rst->depth_clip_far || ctx->viewport.far == 1.0f)
|
||||
render->depth_test |= 0x20; /* don't clip depth far */
|
||||
|
||||
if (fs->state.frag_depth_reg != -1) {
|
||||
render->depth_test |= (fs->state.frag_depth_reg << 6);
|
||||
/* Shader writes depth */
|
||||
render->depth_test |= 0x801;
|
||||
}
|
||||
|
||||
ushort far, near;
|
||||
|
||||
near = float_to_ushort(ctx->viewport.near);
|
||||
|
|
@ -729,6 +735,12 @@ lima_pack_render_state(struct lima_context *ctx, const struct pipe_draw_info *in
|
|||
if (ctx->framebuffer.base.samples)
|
||||
render->multi_sample |= 0x68;
|
||||
|
||||
/* Set gl_FragColor register, need to specify it 4 times */
|
||||
render->multi_sample |= (fs->state.frag_color_reg << 28) |
|
||||
(fs->state.frag_color_reg << 24) |
|
||||
(fs->state.frag_color_reg << 20) |
|
||||
(fs->state.frag_color_reg << 16);
|
||||
|
||||
/* alpha test */
|
||||
if (ctx->zsa->base.alpha_enabled) {
|
||||
render->multi_sample |= ctx->zsa->base.alpha_func;
|
||||
|
|
@ -755,7 +767,8 @@ lima_pack_render_state(struct lima_context *ctx, const struct pipe_draw_info *in
|
|||
render->aux1 |= 0x00002000;
|
||||
|
||||
if (fs->state.uses_discard ||
|
||||
ctx->zsa->base.alpha_enabled) {
|
||||
ctx->zsa->base.alpha_enabled ||
|
||||
fs->state.frag_depth_reg != -1) {
|
||||
early_z = false;
|
||||
pixel_kill = false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -525,7 +525,7 @@ parse_rsw(FILE *fp, uint32_t *value, int i, uint32_t *helper)
|
|||
fprintf(fp, ": ignore depth clip near");
|
||||
if ((*value & 0x00000020) == 0x00000020)
|
||||
fprintf(fp, ", ignore depth clip far");
|
||||
fprintf(fp, ", unknown bits 6-9: 0x%08x", *value & 0x000003c0);
|
||||
fprintf(fp, ", register for gl_FragDepth: $%d", (*value & 0x000003c0) >> 6);
|
||||
fprintf(fp, ", unknown bits 13-15: 0x%08x */\n", *value & 0x00000e000);
|
||||
break;
|
||||
case 4: /* DEPTH RANGE */
|
||||
|
|
@ -594,7 +594,14 @@ parse_rsw(FILE *fp, uint32_t *value, int i, uint32_t *helper)
|
|||
fprintf(fp, " */\n");
|
||||
else
|
||||
fprintf(fp, ", UNKNOWN\n");
|
||||
fprintf(fp, "\t\t\t\t\t\t/* %s(2)", render_state_infos[i].info);
|
||||
|
||||
fprintf(fp, "\t\t\t\t\t\t/* %s(3)", render_state_infos[i].info);
|
||||
fprintf(fp, ", register for gl_FragColor: $%d $%d $%d $%d */\n",
|
||||
(*value & 0xf0000000) >> 28,
|
||||
(*value & 0x0f000000) >> 24,
|
||||
(*value & 0x00f00000) >> 20,
|
||||
(*value & 0x000f0000) >> 16);
|
||||
fprintf(fp, "\t\t\t\t\t\t/* %s(3)", render_state_infos[i].info);
|
||||
fprintf(fp, ": alpha_test_func: %d (%s) */\n",
|
||||
(*value & 0x00000007),
|
||||
lima_get_compare_func_string((*value & 0x00000007))); /* alpha_test_func */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue