mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 05:08:08 +02:00
pan/midgard: Handle fragment writeout in RA
Rather than using a pile of hacks and awkward constructs in MIR to ensure the writeout parameter gets written into r0, let's add a dedicated shadow register class for writeout (interfering with work register r0) so we can express the writeout condition succintly and directly. Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
This commit is contained in:
parent
116b17d2d1
commit
5e06d90c45
6 changed files with 49 additions and 24 deletions
|
|
@ -587,13 +587,14 @@ struct ra_graph;
|
|||
/* Broad types of register classes so we can handle special
|
||||
* registers */
|
||||
|
||||
#define NR_REG_CLASSES 5
|
||||
#define NR_REG_CLASSES 6
|
||||
|
||||
#define REG_CLASS_WORK 0
|
||||
#define REG_CLASS_LDST 1
|
||||
#define REG_CLASS_LDST27 2
|
||||
#define REG_CLASS_TEXR 3
|
||||
#define REG_CLASS_TEXW 4
|
||||
#define REG_CLASS_FRAGC 5
|
||||
|
||||
void mir_lower_special_reads(compiler_context *ctx);
|
||||
struct ra_graph* allocate_registers(compiler_context *ctx, bool *spilled);
|
||||
|
|
|
|||
|
|
@ -1379,16 +1379,7 @@ compute_builtin_arg(nir_op op)
|
|||
static void
|
||||
emit_fragment_store(compiler_context *ctx, unsigned src, unsigned rt)
|
||||
{
|
||||
/* First, move in whatever we're outputting */
|
||||
midgard_instruction move = v_mov(src, blank_alu_src, SSA_FIXED_REGISTER(0));
|
||||
if (rt != 0) {
|
||||
/* Force a tight schedule. TODO: Make the scheduler MRT aware */
|
||||
move.unit = UNIT_VMUL;
|
||||
move.precede_break = true;
|
||||
move.dont_eliminate = true;
|
||||
}
|
||||
|
||||
emit_mir_instruction(ctx, move);
|
||||
emit_explicit_constant(ctx, src, src);
|
||||
|
||||
/* If we're doing MRT, we need to specify the render target */
|
||||
|
||||
|
|
@ -1974,6 +1965,7 @@ inline_alu_constants(compiler_context *ctx, midgard_block *block)
|
|||
mir_foreach_instr_in_block(block, alu) {
|
||||
/* Other instructions cannot inline constants */
|
||||
if (alu->type != TAG_ALU_4) continue;
|
||||
if (alu->compact_branch) continue;
|
||||
|
||||
/* If there is already a constant here, we can do nothing */
|
||||
if (alu->has_constants) continue;
|
||||
|
|
|
|||
|
|
@ -42,10 +42,10 @@ struct midgard_screen {
|
|||
struct ra_regs *regs[9];
|
||||
|
||||
/* Work register classes corresponds to the above register sets. 20 per
|
||||
* set for 4 classes per work/ldst/ldst27/texr/texw. TODO: Unify with
|
||||
* set for 5 classes per work/ldst/ldst27/texr/texw/fragc. TODO: Unify with
|
||||
* compiler.h */
|
||||
|
||||
unsigned reg_classes[9][4 * 5];
|
||||
unsigned reg_classes[9][5 * 5];
|
||||
};
|
||||
|
||||
/* Define the general compiler entry point */
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@
|
|||
/* We have overlapping register classes for special registers, handled via
|
||||
* shadows */
|
||||
|
||||
#define SHADOW_R0 17
|
||||
#define SHADOW_R28 18
|
||||
#define SHADOW_R29 19
|
||||
|
||||
|
|
@ -159,6 +160,8 @@ index_to_reg(compiler_context *ctx, struct ra_graph *g, unsigned reg)
|
|||
|
||||
if (phys >= SHADOW_R28 && phys <= SHADOW_R29)
|
||||
phys += 28 - SHADOW_R28;
|
||||
else if (phys == SHADOW_R0)
|
||||
phys = 0;
|
||||
|
||||
struct phys_reg r = {
|
||||
.reg = phys,
|
||||
|
|
@ -180,12 +183,12 @@ index_to_reg(compiler_context *ctx, struct ra_graph *g, unsigned reg)
|
|||
* special register allocation */
|
||||
|
||||
static void
|
||||
add_shadow_conflicts (struct ra_regs *regs, unsigned base, unsigned shadow)
|
||||
add_shadow_conflicts (struct ra_regs *regs, unsigned base, unsigned shadow, unsigned shadow_count)
|
||||
{
|
||||
for (unsigned a = 0; a < WORK_STRIDE; ++a) {
|
||||
unsigned reg_a = (WORK_STRIDE * base) + a;
|
||||
|
||||
for (unsigned b = 0; b < WORK_STRIDE; ++b) {
|
||||
for (unsigned b = 0; b < shadow_count; ++b) {
|
||||
unsigned reg_b = (WORK_STRIDE * shadow) + b;
|
||||
|
||||
ra_add_reg_conflict(regs, reg_a, reg_b);
|
||||
|
|
@ -202,7 +205,7 @@ create_register_set(unsigned work_count, unsigned *classes)
|
|||
/* First, initialize the RA */
|
||||
struct ra_regs *regs = ra_alloc_reg_set(NULL, virtual_count, true);
|
||||
|
||||
for (unsigned c = 0; c < NR_REG_CLASSES; ++c) {
|
||||
for (unsigned c = 0; c < (NR_REG_CLASSES - 1); ++c) {
|
||||
int work_vec4 = ra_alloc_reg_class(regs);
|
||||
int work_vec3 = ra_alloc_reg_class(regs);
|
||||
int work_vec2 = ra_alloc_reg_class(regs);
|
||||
|
|
@ -253,10 +256,18 @@ create_register_set(unsigned work_count, unsigned *classes)
|
|||
}
|
||||
}
|
||||
|
||||
int fragc = ra_alloc_reg_class(regs);
|
||||
|
||||
classes[4*REG_CLASS_FRAGC + 0] = fragc;
|
||||
classes[4*REG_CLASS_FRAGC + 1] = fragc;
|
||||
classes[4*REG_CLASS_FRAGC + 2] = fragc;
|
||||
classes[4*REG_CLASS_FRAGC + 3] = fragc;
|
||||
ra_class_add_reg(regs, fragc, WORK_STRIDE * SHADOW_R0);
|
||||
|
||||
/* We have duplicate classes */
|
||||
add_shadow_conflicts(regs, 28, SHADOW_R28);
|
||||
add_shadow_conflicts(regs, 29, SHADOW_R29);
|
||||
add_shadow_conflicts(regs, 0, SHADOW_R0, 1);
|
||||
add_shadow_conflicts(regs, 28, SHADOW_R28, WORK_STRIDE);
|
||||
add_shadow_conflicts(regs, 29, SHADOW_R29, WORK_STRIDE);
|
||||
|
||||
/* We're done setting up */
|
||||
ra_set_finalize(regs, NULL);
|
||||
|
|
@ -399,6 +410,7 @@ mir_lower_special_reads(compiler_context *ctx)
|
|||
|
||||
unsigned *alur = calloc(sz, 1);
|
||||
unsigned *aluw = calloc(sz, 1);
|
||||
unsigned *brar = calloc(sz, 1);
|
||||
unsigned *ldst = calloc(sz, 1);
|
||||
unsigned *texr = calloc(sz, 1);
|
||||
unsigned *texw = calloc(sz, 1);
|
||||
|
|
@ -412,6 +424,10 @@ mir_lower_special_reads(compiler_context *ctx)
|
|||
mark_node_class(alur, ins->src[0]);
|
||||
mark_node_class(alur, ins->src[1]);
|
||||
mark_node_class(alur, ins->src[2]);
|
||||
|
||||
if (ins->compact_branch && ins->writeout)
|
||||
mark_node_class(brar, ins->src[0]);
|
||||
|
||||
break;
|
||||
|
||||
case TAG_LOAD_STORE_4:
|
||||
|
|
@ -443,6 +459,7 @@ mir_lower_special_reads(compiler_context *ctx)
|
|||
for (unsigned i = 0; i < ctx->temp_count; ++i) {
|
||||
bool is_alur = BITSET_TEST(alur, i);
|
||||
bool is_aluw = BITSET_TEST(aluw, i);
|
||||
bool is_brar = BITSET_TEST(brar, i);
|
||||
bool is_ldst = BITSET_TEST(ldst, i);
|
||||
bool is_texr = BITSET_TEST(texr, i);
|
||||
bool is_texw = BITSET_TEST(texw, i);
|
||||
|
|
@ -457,7 +474,8 @@ mir_lower_special_reads(compiler_context *ctx)
|
|||
(is_alur && (is_ldst || is_texr)) ||
|
||||
(is_ldst && (is_alur || is_texr || is_texw)) ||
|
||||
(is_texr && (is_alur || is_ldst || is_texw)) ||
|
||||
(is_texw && (is_aluw || is_ldst || is_texr));
|
||||
(is_texw && (is_aluw || is_ldst || is_texr)) ||
|
||||
(is_brar && is_texw);
|
||||
|
||||
if (!collision)
|
||||
continue;
|
||||
|
|
@ -465,8 +483,8 @@ mir_lower_special_reads(compiler_context *ctx)
|
|||
/* Use the index as-is as the work copy. Emit copies for
|
||||
* special uses */
|
||||
|
||||
unsigned classes[] = { TAG_LOAD_STORE_4, TAG_TEXTURE_4, TAG_TEXTURE_4 };
|
||||
bool collisions[] = { is_ldst, is_texr, is_texw && is_aluw };
|
||||
unsigned classes[] = { TAG_LOAD_STORE_4, TAG_TEXTURE_4, TAG_TEXTURE_4, TAG_ALU_4};
|
||||
bool collisions[] = { is_ldst, is_texr, is_texw && is_aluw, is_brar };
|
||||
|
||||
for (unsigned j = 0; j < ARRAY_SIZE(collisions); ++j) {
|
||||
if (!collisions[j]) continue;
|
||||
|
|
@ -517,6 +535,7 @@ mir_lower_special_reads(compiler_context *ctx)
|
|||
|
||||
free(alur);
|
||||
free(aluw);
|
||||
free(brar);
|
||||
free(ldst);
|
||||
free(texr);
|
||||
free(texw);
|
||||
|
|
@ -766,6 +785,12 @@ allocate_registers(compiler_context *ctx, bool *spilled)
|
|||
assert(check_read_class(found_class, ins->type, ins->src[2]));
|
||||
}
|
||||
|
||||
/* Mark writeout to r0 */
|
||||
mir_foreach_instr_global(ctx, ins) {
|
||||
if (ins->compact_branch && ins->writeout)
|
||||
set_class(found_class, ins->src[0], REG_CLASS_FRAGC);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < ctx->temp_count; ++i) {
|
||||
unsigned class = found_class[i];
|
||||
ra_set_node_class(g, i, classes[class]);
|
||||
|
|
|
|||
|
|
@ -60,6 +60,13 @@ mir_pipeline_ins(
|
|||
for (unsigned i = 0; i < bundle->instruction_count; ++i) {
|
||||
midgard_instruction *q = bundle->instructions[i];
|
||||
read_mask |= mir_mask_of_read_components(q, node);
|
||||
|
||||
/* The fragment colour can't be pipelined (well, it is
|
||||
* pipelined in r0, but this is a delicate dance with
|
||||
* scheduling and RA, not for us to worry about) */
|
||||
|
||||
if (q->compact_branch && q->writeout && mir_has_arg(q, node))
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Now analyze for a write mask */
|
||||
|
|
|
|||
|
|
@ -160,7 +160,7 @@ midgard_has_hazard(
|
|||
*/
|
||||
|
||||
static bool
|
||||
can_writeout_fragment(compiler_context *ctx, midgard_instruction **bundle, unsigned count, unsigned node_count)
|
||||
can_writeout_fragment(compiler_context *ctx, midgard_instruction **bundle, unsigned count, unsigned node_count, unsigned r0)
|
||||
{
|
||||
/* First scan for which components of r0 are written out. Initially
|
||||
* none are written */
|
||||
|
|
@ -176,7 +176,7 @@ can_writeout_fragment(compiler_context *ctx, midgard_instruction **bundle, unsig
|
|||
for (unsigned i = 0; i < count; ++i) {
|
||||
midgard_instruction *ins = bundle[i];
|
||||
|
||||
if (ins->dest != SSA_FIXED_REGISTER(0))
|
||||
if (ins->dest != r0)
|
||||
continue;
|
||||
|
||||
/* Record written out mask */
|
||||
|
|
@ -516,7 +516,7 @@ schedule_bundle(compiler_context *ctx, midgard_block *block, midgard_instruction
|
|||
/* All of r0 has to be written out along with
|
||||
* the branch writeout */
|
||||
|
||||
if (ains->writeout && !can_writeout_fragment(ctx, scheduled, index, ctx->temp_count)) {
|
||||
if (ains->writeout && !can_writeout_fragment(ctx, scheduled, index, ctx->temp_count, ains->src[0])) {
|
||||
/* We only work on full moves
|
||||
* at the beginning. We could
|
||||
* probably do better */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue