intel/brw: Always have a (non-DO) block after a DO in the CFG

Make the "block after DO" more stable so that adding instructions after
a DO doesn't require repairing the CFG.  Use a new SHADER_OPCODE_FLOW
instruction that is a placeholder representing "go to the next block"
and disappears at code generation.

For some context, there are a few facts about how CFG currently works

- Blocks are assumed to not be empty;
- DO is always by itself in a block, i.e. starts and ends a block;
- There are no empty blocks;
- Predicated WHILE and CONTINUE will link to the "block after DO";
- When nesting loops, it is possible that the "block after DO" is
  another "DO".

Reasons and further explanations for those are in the brw_cfg.c comments.

What makes this new change useful is that a pass might want to add
instructions between two DO instructions.  When that happens, a new
block must be created and any predicated WHILE and CONTINUE must be
repaired.

So, instead of requiring a repair (which has proven to be tricky in
the past), this change adds a block that can be "virtually" empty but
allow instructions to be added without further changes.

One alternative design would be allowing empty blocks, that would be
a deeper change since the blocks are currently assumed to be not empty
in various places.  We'll save that for when other changes are made to
the CFG.

The problem described happens in brw_opt_combine_constants, and a
different patch will clean that up.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33536>
This commit is contained in:
Caio Oliveira 2025-02-13 18:38:00 -08:00 committed by Marge Bot
parent d32a5ab0e4
commit d2c39b1779
11 changed files with 68 additions and 8 deletions

View file

@ -713,6 +713,7 @@ namespace {
case SHADER_OPCODE_UNDEF:
case SHADER_OPCODE_HALT_TARGET:
case FS_OPCODE_SCHEDULING_FENCE:
case SHADER_OPCODE_FLOW:
return calculate_desc(info, EU_UNIT_NULL, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0);

View file

@ -847,12 +847,22 @@ public:
brw_shader *shader;
brw_inst *BREAK() const { return emit(BRW_OPCODE_BREAK); }
brw_inst *DO() const { return emit(BRW_OPCODE_DO); }
brw_inst *ENDIF() const { return emit(BRW_OPCODE_ENDIF); }
brw_inst *NOP() const { return emit(BRW_OPCODE_NOP); }
brw_inst *WHILE() const { return emit(BRW_OPCODE_WHILE); }
brw_inst *CONTINUE() const { return emit(BRW_OPCODE_CONTINUE); }
void DO() const {
emit(BRW_OPCODE_DO);
/* Ensure that there'll always be a block after DO to add
* instructions and serve as sucessor for predicated WHILE
* and CONTINUE.
*
* See more details in brw_cfg::validate().
*/
emit(SHADER_OPCODE_FLOW);
}
bool has_writemask_all() const {
return force_writemask_all;
}

View file

@ -215,6 +215,13 @@ cfg_t::cfg_t(const brw_shader *s, exec_list *instructions) :
inst->exec_node::remove();
switch (inst->opcode) {
case SHADER_OPCODE_FLOW:
cur->instructions.push_tail(inst);
next = new_block();
cur->add_successor(mem_ctx, next, bblock_link_logical);
set_next_block(&cur, next, ip);
break;
case BRW_OPCODE_IF:
cur->instructions.push_tail(inst);
@ -736,6 +743,8 @@ cfg_t::validate(const char *stage_abbrev)
}
}
cfgv_assert(!block->instructions.is_empty());
brw_inst *first_inst = block->start();
if (first_inst->opcode == BRW_OPCODE_DO) {
/* DO instructions both begin and end a block, so the DO instruction
@ -763,6 +772,24 @@ cfg_t::validate(const char *stage_abbrev)
cfgv_assert(logical_block != nullptr);
cfgv_assert(physical_block != nullptr);
/* A flow block (block ending with SHADER_OPCODE_FLOW) is
* used to ensure that the block right after DO is always
* present even if it doesn't have actual instructions.
*
* This way predicated WHILE and CONTINUE don't need to be
* repaired when adding instructions right after the DO.
* They will point to the flow block whether is empty or not.
*/
cfgv_assert(logical_block->end()->opcode == SHADER_OPCODE_FLOW);
}
brw_inst *last_inst = block->end();
if (last_inst->opcode == SHADER_OPCODE_FLOW) {
/* A flow block only has one successor -- the instruction disappears
* when generating code.
*/
cfgv_assert(block->children.length() == 1);
}
}
}

View file

@ -214,7 +214,8 @@ bblock_ends_with_control_flow(const struct bblock_t *block)
op == BRW_OPCODE_ELSE ||
op == BRW_OPCODE_WHILE ||
op == BRW_OPCODE_BREAK ||
op == BRW_OPCODE_CONTINUE;
op == BRW_OPCODE_CONTINUE ||
op == SHADER_OPCODE_FLOW;
}
static inline brw_inst *

View file

@ -553,6 +553,9 @@ enum opcode {
SHADER_OPCODE_MEMORY_LOAD_LOGICAL,
SHADER_OPCODE_MEMORY_STORE_LOGICAL,
SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL,
/* Ends a block moving to the next one. See brw_cfg for details. */
SHADER_OPCODE_FLOW,
};
enum fb_write_logical_srcs {

View file

@ -471,6 +471,8 @@ brw_from_nir_emit_loop(nir_to_brw_state &ntb, nir_loop *loop)
assert(!nir_loop_has_continue_construct(loop));
bld.DO();
bld.emit(SHADER_OPCODE_FLOW);
brw_from_nir_emit_cf_list(ntb, &loop->body);
brw_inst *peep_while = bld.emit(BRW_OPCODE_WHILE);

View file

@ -1083,6 +1083,10 @@ brw_generator::generate_code(const cfg_t *cfg, int dispatch_width,
brw_DO(p, brw_get_default_exec_size(p));
break;
case SHADER_OPCODE_FLOW:
/* Do nothing. */
break;
case BRW_OPCODE_BREAK:
brw_BREAK(p);
break;

View file

@ -808,6 +808,7 @@ brw_inst::is_control_flow_end() const
case BRW_OPCODE_ELSE:
case BRW_OPCODE_WHILE:
case BRW_OPCODE_ENDIF:
case SHADER_OPCODE_FLOW:
return true;
default:
return false;
@ -825,6 +826,7 @@ brw_inst::is_control_flow() const
case BRW_OPCODE_ENDIF:
case BRW_OPCODE_BREAK:
case BRW_OPCODE_CONTINUE:
case SHADER_OPCODE_FLOW:
return true;
default:
return false;

View file

@ -185,6 +185,7 @@ namespace {
case SHADER_OPCODE_UNDEF:
case SHADER_OPCODE_HALT_TARGET:
case FS_OPCODE_SCHEDULING_FENCE:
case SHADER_OPCODE_FLOW:
return 0;
default:
/* Note that the following is inaccurate for virtual instructions

View file

@ -294,6 +294,9 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
return "read_from_live_channel";
case SHADER_OPCODE_READ_FROM_CHANNEL:
return "read_from_channel";
case SHADER_OPCODE_FLOW:
return "flow";
}
unreachable("not reached");

View file

@ -515,12 +515,14 @@ TEST_F(scoreboard_test, loop1)
brw_calculate_cfg(*v);
lower_scoreboard(v);
bblock_t *body = v->cfg->blocks[2];
const int num_blocks = v->cfg->num_blocks;
bblock_t *body = v->cfg->blocks[num_blocks - 2];
brw_inst *add = instruction(body, 0);
EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
EXPECT_EQ(add->sched, regdist(TGL_PIPE_FLOAT, 1));
bblock_t *last_block = v->cfg->blocks[3];
bblock_t *last_block = v->cfg->blocks[num_blocks - 1];
brw_inst *mul = instruction(last_block, 0);
EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
EXPECT_EQ(mul->sched, regdist(TGL_PIPE_FLOAT, 1));
@ -550,12 +552,14 @@ TEST_F(scoreboard_test, loop2)
/* Now the write in ADD has the tightest RegDist for both ADD and MUL. */
bblock_t *body = v->cfg->blocks[2];
const int num_blocks = v->cfg->num_blocks;
bblock_t *body = v->cfg->blocks[num_blocks - 2];
brw_inst *add = instruction(body, 0);
EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
EXPECT_EQ(add->sched, regdist(TGL_PIPE_FLOAT, 2));
bblock_t *last_block = v->cfg->blocks[3];
bblock_t *last_block = v->cfg->blocks[num_blocks - 1];
brw_inst *mul = instruction(last_block, 0);
EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
EXPECT_EQ(mul->sched, regdist(TGL_PIPE_FLOAT, 2));
@ -586,12 +590,14 @@ TEST_F(scoreboard_test, loop3)
brw_calculate_cfg(*v);
lower_scoreboard(v);
bblock_t *body = v->cfg->blocks[2];
const int num_blocks = v->cfg->num_blocks;
bblock_t *body = v->cfg->blocks[num_blocks - 2];
brw_inst *add = instruction(body, 4);
EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
EXPECT_EQ(add->sched, regdist(TGL_PIPE_FLOAT, 5));
bblock_t *last_block = v->cfg->blocks[3];
bblock_t *last_block = v->cfg->blocks[num_blocks - 1];
brw_inst *mul = instruction(last_block, 0);
EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
EXPECT_EQ(mul->sched, regdist(TGL_PIPE_FLOAT, 1));