mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 13:50:11 +01:00
ir3/legalize: schedule (eq) more accurately
Instructions that calculate derivatives (whether implicitly or explicitly) don't actually need helpers enabled as long as helpers were enabled while their coordinates were calculated. We currently don't track this and leave helpers enabled until the derivative instructions themselves. Improve this by adding a backwards data-flow analysis which tracks the last instruction that wrote the coordinates so that helpers can be disabled after that. Totals from 38306 (23.26% of 164705) affected shaders: Instrs: 19635952 -> 19647753 (+0.06%); split: -0.03%, +0.09% CodeSize: 40465212 -> 40489860 (+0.06%); split: -0.03%, +0.09% NOPs: 3493898 -> 3505699 (+0.34%); split: -0.16%, +0.49% (ss)-stall: 1755983 -> 1755365 (-0.04%); split: -0.04%, +0.01% (sy)-stall: 5345890 -> 5350570 (+0.09%); split: -0.03%, +0.12% Last helper: 8754510 -> 6313744 (-27.88%); split: -27.89%, +0.01% Cat0: 3821218 -> 3833019 (+0.31%); split: -0.14%, +0.45% Signed-off-by: Job Noorman <jnoorman@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36410>
This commit is contained in:
parent
54cde77163
commit
29f8277952
2 changed files with 226 additions and 85 deletions
|
|
@ -1299,58 +1299,6 @@ is_input(struct ir3_instruction *instr)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Whether non-helper invocations can read the value of helper invocations. We
|
|
||||||
* cannot insert (eq) before these instructions.
|
|
||||||
*/
|
|
||||||
static inline bool
|
|
||||||
uses_helpers(struct ir3_instruction *instr)
|
|
||||||
{
|
|
||||||
switch (instr->opc) {
|
|
||||||
/* These require helper invocations to be present */
|
|
||||||
case OPC_SAMB:
|
|
||||||
case OPC_GETLOD:
|
|
||||||
case OPC_DSX:
|
|
||||||
case OPC_DSY:
|
|
||||||
case OPC_DSXPP_1:
|
|
||||||
case OPC_DSYPP_1:
|
|
||||||
case OPC_DSXPP_MACRO:
|
|
||||||
case OPC_DSYPP_MACRO:
|
|
||||||
case OPC_QUAD_SHUFFLE_BRCST:
|
|
||||||
case OPC_QUAD_SHUFFLE_HORIZ:
|
|
||||||
case OPC_QUAD_SHUFFLE_VERT:
|
|
||||||
case OPC_QUAD_SHUFFLE_DIAG:
|
|
||||||
case OPC_META_TEX_PREFETCH:
|
|
||||||
return true;
|
|
||||||
|
|
||||||
/* sam requires helper invocations except for dummy prefetch instructions */
|
|
||||||
case OPC_SAM:
|
|
||||||
return !has_dummy_dst(instr);
|
|
||||||
|
|
||||||
/* Subgroup operations don't require helper invocations to be present, but
|
|
||||||
* will use helper invocations if they are present.
|
|
||||||
*/
|
|
||||||
case OPC_BALLOT_MACRO:
|
|
||||||
case OPC_ANY_MACRO:
|
|
||||||
case OPC_ALL_MACRO:
|
|
||||||
case OPC_READ_FIRST_MACRO:
|
|
||||||
case OPC_READ_COND_MACRO:
|
|
||||||
case OPC_MOVMSK:
|
|
||||||
case OPC_BRCST_ACTIVE:
|
|
||||||
return true;
|
|
||||||
|
|
||||||
/* Catch lowered READ_FIRST/READ_COND. For elect, don't include the getone
|
|
||||||
* in the preamble because it doesn't actually matter which fiber is
|
|
||||||
* selected.
|
|
||||||
*/
|
|
||||||
case OPC_MOV:
|
|
||||||
case OPC_ELECT_MACRO:
|
|
||||||
return instr->flags & IR3_INSTR_NEEDS_HELPERS;
|
|
||||||
|
|
||||||
default:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool
|
static inline bool
|
||||||
is_bool(struct ir3_instruction *instr)
|
is_bool(struct ir3_instruction *instr)
|
||||||
{
|
{
|
||||||
|
|
@ -3302,7 +3250,8 @@ regmask_or_shared(regmask_t *dst, regmask_t *a, regmask_t *b)
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
regmask_set(regmask_t *regmask, struct ir3_register *reg)
|
regmask_set_masked(regmask_t *regmask, struct ir3_register *reg,
|
||||||
|
unsigned wrmask)
|
||||||
{
|
{
|
||||||
unsigned size = reg_elem_size(reg);
|
unsigned size = reg_elem_size(reg);
|
||||||
enum ir3_reg_file file;
|
enum ir3_reg_file file;
|
||||||
|
|
@ -3311,12 +3260,18 @@ regmask_set(regmask_t *regmask, struct ir3_register *reg)
|
||||||
if (reg->flags & IR3_REG_RELATIV) {
|
if (reg->flags & IR3_REG_RELATIV) {
|
||||||
__regmask_set(regmask, file, n, size * reg->size);
|
__regmask_set(regmask, file, n, size * reg->size);
|
||||||
} else {
|
} else {
|
||||||
for (unsigned mask = reg->wrmask; mask; mask >>= 1, n += size)
|
for (unsigned mask = reg->wrmask & wrmask; mask; mask >>= 1, n += size)
|
||||||
if (mask & 1)
|
if (mask & 1)
|
||||||
__regmask_set(regmask, file, n, size);
|
__regmask_set(regmask, file, n, size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
regmask_set(regmask_t *regmask, struct ir3_register *reg)
|
||||||
|
{
|
||||||
|
regmask_set_masked(regmask, reg, ~0);
|
||||||
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
regmask_clear(regmask_t *regmask, struct ir3_register *reg)
|
regmask_clear(regmask_t *regmask, struct ir3_register *reg)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -1594,6 +1594,208 @@ dbg_expand_rpt(struct ir3 *ir)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct ir3_mark_helpers_data {
|
||||||
|
bool valid;
|
||||||
|
regmask_t needs_helpers;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void
|
||||||
|
instr_mark_helpers(struct ir3_mark_helpers_data *bd,
|
||||||
|
struct ir3_instruction *instr)
|
||||||
|
{
|
||||||
|
if (instr->flags & IR3_INSTR_NEEDS_HELPERS) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach_dst (dst, instr) {
|
||||||
|
if (dst->flags & (IR3_REG_RT | IR3_REG_DUMMY)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (regmask_get(&bd->needs_helpers, dst)) {
|
||||||
|
instr->flags |= IR3_INSTR_NEEDS_HELPERS;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (instr->opc) {
|
||||||
|
case OPC_MOVMSK:
|
||||||
|
case OPC_BRCST_ACTIVE:
|
||||||
|
case OPC_QUAD_SHUFFLE_BRCST:
|
||||||
|
case OPC_QUAD_SHUFFLE_HORIZ:
|
||||||
|
case OPC_QUAD_SHUFFLE_VERT:
|
||||||
|
case OPC_QUAD_SHUFFLE_DIAG:
|
||||||
|
case OPC_BALL:
|
||||||
|
case OPC_BANY:
|
||||||
|
/* Subgroup operations don't require helper invocations to be present, but
|
||||||
|
* will use helper invocations if they are present.
|
||||||
|
*/
|
||||||
|
instr->flags |= IR3_INSTR_NEEDS_HELPERS;
|
||||||
|
return;
|
||||||
|
|
||||||
|
case OPC_SAM:
|
||||||
|
case OPC_SAMB:
|
||||||
|
case OPC_GETLOD:
|
||||||
|
case OPC_DSX:
|
||||||
|
case OPC_DSY:
|
||||||
|
case OPC_DSXPP_1:
|
||||||
|
case OPC_DSYPP_1: {
|
||||||
|
if (instr->opc == OPC_SAM && has_dummy_dst(instr)) {
|
||||||
|
/* sam requires helper invocations except for dummy prefetch
|
||||||
|
* instructions.
|
||||||
|
*/
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* These instructions don't use helpers themselves but have a src that
|
||||||
|
* needs to be calculated using helpers (e.g., the coordinates used to
|
||||||
|
* calculate derivatives). Mark the src register as needing helpers so
|
||||||
|
* that we can keep them enabled until it is written.
|
||||||
|
*/
|
||||||
|
unsigned nsrcs;
|
||||||
|
|
||||||
|
if (instr->opc == OPC_SAM || instr->opc == OPC_SAMB ||
|
||||||
|
instr->opc == OPC_GETLOD) {
|
||||||
|
nsrcs = (instr->flags & IR3_INSTR_3D) ? 3 : 2;
|
||||||
|
} else {
|
||||||
|
/* dsx/dsy: derive the number of sources from the dst wrmask since the
|
||||||
|
* src itself may use aliases.
|
||||||
|
*/
|
||||||
|
nsrcs = util_last_bit(instr->dsts[0]->wrmask);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (instr->srcs[0]->flags & IR3_REG_FIRST_ALIAS) {
|
||||||
|
assert(nsrcs <= instr->srcs_count);
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < nsrcs; i++) {
|
||||||
|
struct ir3_register *src = instr->srcs[i];
|
||||||
|
|
||||||
|
if (is_reg_gpr(src)) {
|
||||||
|
regmask_set(&bd->needs_helpers, src);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
regmask_set_masked(&bd->needs_helpers, instr->srcs[0], MASK(nsrcs));
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Apply IR3_INSTR_NEEDS_HELPERS to instructions that need helper invocations to
|
||||||
|
* be active. Note that we don't necessarily apply it to all instructions that
|
||||||
|
* need helpers, just to the last one in each block, as that gives us enough
|
||||||
|
* information for inserting (eq) to kill helpers.
|
||||||
|
*
|
||||||
|
* We use a backwards data-flow analysis because we cannot always know whether
|
||||||
|
* an instruction needs helpers by just looking at the opcode. For example,
|
||||||
|
* instructions that calculate (implicit) derivatives don't need helpers to be
|
||||||
|
* active but the calculation of their src needs to be done with active helpers.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
mark_helpers(struct ir3_legalize_ctx *ctx, struct ir3 *ir,
|
||||||
|
struct ir3_shader_variant *so)
|
||||||
|
{
|
||||||
|
foreach_block (block, &ir->block_list) {
|
||||||
|
struct ir3_mark_helpers_data *bd =
|
||||||
|
ralloc(ctx, struct ir3_mark_helpers_data);
|
||||||
|
bd->valid = false;
|
||||||
|
regmask_init(&bd->needs_helpers, ctx->compiler->mergedregs);
|
||||||
|
block->data = bd;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool uses_helpers = false;
|
||||||
|
bool progress;
|
||||||
|
|
||||||
|
do {
|
||||||
|
progress = false;
|
||||||
|
|
||||||
|
foreach_block_rev (block, &ir->block_list) {
|
||||||
|
struct ir3_mark_helpers_data *bd = block->data;
|
||||||
|
|
||||||
|
if (bd->valid) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ir3_mark_helpers_data prev_bd = *bd;
|
||||||
|
regmask_init(&bd->needs_helpers, ctx->compiler->mergedregs);
|
||||||
|
bool may_have_needs_helpers_at_entry = true;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) {
|
||||||
|
struct ir3_block *succ = block->successors[i];
|
||||||
|
if (!succ) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ir3_mark_helpers_data *succ_bd = succ->data;
|
||||||
|
regmask_or(&bd->needs_helpers, &bd->needs_helpers,
|
||||||
|
&succ_bd->needs_helpers);
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach_instr_rev (instr, &block->instr_list) {
|
||||||
|
instr_mark_helpers(bd, instr);
|
||||||
|
|
||||||
|
/* We only care about the last instruction needing helpers. */
|
||||||
|
if (instr->flags & IR3_INSTR_NEEDS_HELPERS) {
|
||||||
|
uses_helpers = true;
|
||||||
|
|
||||||
|
/* This also means we can stop tracking needs_helpers. This saves
|
||||||
|
* us from unnecessarily invalidating predecessors. Making sure
|
||||||
|
* loops are handled correctly is done in helper_sched.
|
||||||
|
*/
|
||||||
|
regmask_init(&bd->needs_helpers, ctx->compiler->mergedregs);
|
||||||
|
may_have_needs_helpers_at_entry = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bd->valid = true;
|
||||||
|
|
||||||
|
/* We have to invalidate the block's predecessors whenever it has more
|
||||||
|
* needs_helpers registers as the previous time around because this may
|
||||||
|
* cause more instructions being marked as needing helpers in its
|
||||||
|
* predecessors. We don't have to do this when it has less
|
||||||
|
* needs_helpers registers as this won't change anything. This is
|
||||||
|
* checked using may_have_needs_helpers_at_entry which will be false
|
||||||
|
* whenever we cleared needs_helpers.
|
||||||
|
*/
|
||||||
|
if (may_have_needs_helpers_at_entry &&
|
||||||
|
memcmp(&prev_bd.needs_helpers, &bd->needs_helpers,
|
||||||
|
sizeof(prev_bd.needs_helpers)) != 0) {
|
||||||
|
progress = true;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < block->predecessors_count; i++) {
|
||||||
|
struct ir3_mark_helpers_data *pred_bd =
|
||||||
|
block->predecessors[i]->data;
|
||||||
|
pred_bd->valid = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} while (progress);
|
||||||
|
|
||||||
|
struct ir3_block *start_block = ir3_start_block(ir);
|
||||||
|
struct ir3_mark_helpers_data *start_bd = start_block->data;
|
||||||
|
|
||||||
|
foreach_input (input, ir) {
|
||||||
|
if (regmask_get(&start_bd->needs_helpers, input->dsts[0])) {
|
||||||
|
/* If we need helpers for an input reg, we have to make sure helpers
|
||||||
|
* are enabled when we enter the shader. Just mark the first
|
||||||
|
* instruction as needing helpers.
|
||||||
|
*/
|
||||||
|
struct ir3_instruction *first = ir3_block_get_first_instr(start_block);
|
||||||
|
first->flags |= IR3_INSTR_NEEDS_HELPERS;
|
||||||
|
uses_helpers = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return uses_helpers;
|
||||||
|
}
|
||||||
|
|
||||||
struct ir3_helper_block_data {
|
struct ir3_helper_block_data {
|
||||||
/* Whether helper invocations may be used on any path starting at the
|
/* Whether helper invocations may be used on any path starting at the
|
||||||
* beginning of the block.
|
* beginning of the block.
|
||||||
|
|
@ -1618,16 +1820,15 @@ static void
|
||||||
helper_sched(struct ir3_legalize_ctx *ctx, struct ir3 *ir,
|
helper_sched(struct ir3_legalize_ctx *ctx, struct ir3 *ir,
|
||||||
struct ir3_shader_variant *so)
|
struct ir3_shader_variant *so)
|
||||||
{
|
{
|
||||||
bool non_prefetch_helpers = false;
|
|
||||||
|
|
||||||
foreach_block (block, &ir->block_list) {
|
foreach_block (block, &ir->block_list) {
|
||||||
struct ir3_helper_block_data *bd =
|
struct ir3_helper_block_data *bd =
|
||||||
rzalloc(ctx, struct ir3_helper_block_data);
|
rzalloc(ctx, struct ir3_helper_block_data);
|
||||||
foreach_instr (instr, &block->instr_list) {
|
foreach_instr (instr, &block->instr_list) {
|
||||||
if (uses_helpers(instr)) {
|
if (instr->flags & IR3_INSTR_NEEDS_HELPERS) {
|
||||||
bd->uses_helpers_beginning = true;
|
bd->uses_helpers_beginning = true;
|
||||||
if (instr->opc != OPC_META_TEX_PREFETCH) {
|
|
||||||
non_prefetch_helpers = true;
|
if (is_terminator(instr)) {
|
||||||
|
bd->uses_helpers_end = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1640,28 +1841,9 @@ helper_sched(struct ir3_legalize_ctx *ctx, struct ir3 *ir,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ir3_instruction *terminator = ir3_block_get_terminator(block);
|
|
||||||
if (terminator) {
|
|
||||||
if (terminator->opc == OPC_BALL || terminator->opc == OPC_BANY ||
|
|
||||||
(terminator->opc == OPC_GETONE &&
|
|
||||||
(terminator->flags & IR3_INSTR_NEEDS_HELPERS))) {
|
|
||||||
bd->uses_helpers_beginning = true;
|
|
||||||
bd->uses_helpers_end = true;
|
|
||||||
non_prefetch_helpers = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
block->data = bd;
|
block->data = bd;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If only prefetches use helpers then we can disable them in the shader via
|
|
||||||
* a register setting.
|
|
||||||
*/
|
|
||||||
if (!non_prefetch_helpers) {
|
|
||||||
so->prefetch_end_of_quad = true;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool progress;
|
bool progress;
|
||||||
do {
|
do {
|
||||||
progress = false;
|
progress = false;
|
||||||
|
|
@ -1757,11 +1939,7 @@ helper_sched(struct ir3_legalize_ctx *ctx, struct ir3 *ir,
|
||||||
*/
|
*/
|
||||||
struct ir3_instruction *first_instr = NULL;
|
struct ir3_instruction *first_instr = NULL;
|
||||||
foreach_instr_rev (instr, &block->instr_list) {
|
foreach_instr_rev (instr, &block->instr_list) {
|
||||||
/* Skip prefetches because they actually execute before the block
|
if (instr->flags & IR3_INSTR_NEEDS_HELPERS)
|
||||||
* starts and at this stage they aren't guaranteed to be at the start
|
|
||||||
* of the block.
|
|
||||||
*/
|
|
||||||
if (uses_helpers(instr) && instr->opc != OPC_META_TEX_PREFETCH)
|
|
||||||
break;
|
break;
|
||||||
first_instr = instr;
|
first_instr = instr;
|
||||||
}
|
}
|
||||||
|
|
@ -2286,8 +2464,16 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary)
|
||||||
|
|
||||||
/* TODO: does (eq) exist before a6xx? */
|
/* TODO: does (eq) exist before a6xx? */
|
||||||
if (so->type == MESA_SHADER_FRAGMENT && so->need_pixlod &&
|
if (so->type == MESA_SHADER_FRAGMENT && so->need_pixlod &&
|
||||||
so->compiler->gen >= 6)
|
so->compiler->gen >= 6) {
|
||||||
helper_sched(ctx, ir, so);
|
if (mark_helpers(ctx, ir, so)) {
|
||||||
|
helper_sched(ctx, ir, so);
|
||||||
|
} else {
|
||||||
|
/* If no instructions use helpers, we can disable them in the shader
|
||||||
|
* via a register setting.
|
||||||
|
*/
|
||||||
|
so->prefetch_end_of_quad = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (ir3_shader_debug & IR3_DBG_FULLSYNC) {
|
if (ir3_shader_debug & IR3_DBG_FULLSYNC) {
|
||||||
dbg_sync_sched(ir, so);
|
dbg_sync_sched(ir, so);
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue