ir3: enable scalar predicates

Enable the use of scalar predicates by marking predicate dsts as uniform
when possible during instruction emission and in opt_predicates.

Totals:
Instrs: 48207402 -> 47967272 (-0.50%); split: -0.54%, +0.05%
CodeSize: 101907026 -> 101768626 (-0.14%); split: -0.15%, +0.01%
NOPs: 8386320 -> 8165410 (-2.63%); split: -2.88%, +0.25%
MOVs: 1468853 -> 1470546 (+0.12%); split: -0.17%, +0.28%
COVs: 823724 -> 823746 (+0.00%); split: -0.01%, +0.01%
Full: 1716708 -> 1716767 (+0.00%); split: -0.00%, +0.01%
(ss): 1113167 -> 1168194 (+4.94%); split: -0.15%, +5.09%
(sy): 552317 -> 552288 (-0.01%); split: -0.10%, +0.09%
(ss)-stall: 4013046 -> 4261336 (+6.19%); split: -0.11%, +6.30%
(sy)-stall: 16741190 -> 16748983 (+0.05%); split: -0.17%, +0.22%
STPs: 18895 -> 18901 (+0.03%); split: -0.02%, +0.05%
LDPs: 23853 -> 23762 (-0.38%); split: -0.39%, +0.01%
Preamble Instrs: 11506988 -> 11493425 (-0.12%); split: -0.12%, +0.01%
Early Preamble: 121339 -> 121695 (+0.29%)
Last helper: 11686328 -> 11628618 (-0.49%); split: -0.72%, +0.23%
Cat0: 9241457 -> 9020508 (-2.39%); split: -2.62%, +0.22%
Cat1: 2353411 -> 2354860 (+0.06%); split: -0.17%, +0.23%
Cat2: 17468471 -> 17447932 (-0.12%); split: -0.12%, +0.00%
Cat6: 515728 -> 515643 (-0.02%); split: -0.02%, +0.00%
Cat7: 1637795 -> 1637789 (-0.00%); split: -0.05%, +0.05%

Totals from 33275 (20.20% of 164705) affected shaders:
Instrs: 30329487 -> 30089357 (-0.79%); split: -0.86%, +0.07%
CodeSize: 59715922 -> 59577522 (-0.23%); split: -0.26%, +0.03%
NOPs: 6265422 -> 6044512 (-3.53%); split: -3.86%, +0.33%
MOVs: 1058197 -> 1059890 (+0.16%); split: -0.23%, +0.39%
COVs: 427513 -> 427535 (+0.01%); split: -0.02%, +0.03%
Full: 548495 -> 548554 (+0.01%); split: -0.01%, +0.02%
(ss): 769340 -> 824367 (+7.15%); split: -0.21%, +7.36%
(sy): 368276 -> 368247 (-0.01%); split: -0.14%, +0.13%
(ss)-stall: 3076333 -> 3324623 (+8.07%); split: -0.15%, +8.22%
(sy)-stall: 10740547 -> 10748340 (+0.07%); split: -0.27%, +0.34%
STPs: 12872 -> 12878 (+0.05%); split: -0.02%, +0.07%
LDPs: 20808 -> 20717 (-0.44%); split: -0.45%, +0.01%
Preamble Instrs: 6354490 -> 6340927 (-0.21%); split: -0.22%, +0.01%
Early Preamble: 15233 -> 15589 (+2.34%)
Last helper: 8106631 -> 8048921 (-0.71%); split: -1.04%, +0.32%
Cat0: 6888653 -> 6667704 (-3.21%); split: -3.51%, +0.30%
Cat1: 1541452 -> 1542901 (+0.09%); split: -0.25%, +0.35%
Cat2: 10963398 -> 10942859 (-0.19%); split: -0.19%, +0.00%
Cat6: 265945 -> 265860 (-0.03%); split: -0.03%, +0.00%
Cat7: 1164800 -> 1164794 (-0.00%); split: -0.07%, +0.07%

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36614>
This commit is contained in:
Job Noorman 2025-08-19 08:35:38 +02:00 committed by Marge Bot
parent cccb3ecc6a
commit 2a8c5ebc77
2 changed files with 33 additions and 7 deletions

View file

@ -437,8 +437,19 @@ ir3_get_predicate(struct ir3_context *ctx, struct ir3_instruction *src)
/* condition always goes in predicate register: */
cond->dsts[0]->flags |= IR3_REG_PREDICATE;
/* The builders will mark the dst as shared when both srcs are shared.
* Predicates can't be shared but do support the scalar ALU when marked as
* uniform.
*/
if (cond->dsts[0]->flags & IR3_REG_SHARED) {
cond->dsts[0]->flags &= ~IR3_REG_SHARED;
if (ctx->compiler->has_scalar_predicates) {
cond->dsts[0]->flags |= IR3_REG_UNIFORM;
}
}
_mesa_hash_table_insert(ctx->predicate_conversions, src, cond);
return cond;
}

View file

@ -31,16 +31,23 @@ struct opt_predicates_ctx {
};
static bool
is_shared_or_const(struct ir3_register *reg)
cat2_all_srcs_have_flag(struct ir3_instruction *instr, unsigned flags)
{
return reg->flags & (IR3_REG_CONST | IR3_REG_SHARED);
return (instr->srcs[0]->flags & flags) &&
(instr->srcs_count == 1 || (instr->srcs[1]->flags & flags));
}
static bool
cat2_needs_scalar_alu(struct ir3_instruction *instr)
{
return is_shared_or_const(instr->srcs[0]) &&
(instr->srcs_count == 1 || is_shared_or_const(instr->srcs[1]));
return cat2_all_srcs_have_flag(instr, IR3_REG_CONST | IR3_REG_SHARED);
}
static bool
cat2_may_use_scalar_alu(struct ir3_instruction *instr)
{
return cat2_all_srcs_have_flag(
instr, IR3_REG_CONST | IR3_REG_SHARED | IR3_REG_IMMED);
}
static struct ir3_instruction *
@ -58,6 +65,12 @@ clone_with_predicate_dst(struct opt_predicates_ctx *ctx,
ir3_instr_move_after(clone, instr);
clone->dsts[0]->flags |= IR3_REG_PREDICATE;
clone->dsts[0]->flags &= ~(IR3_REG_HALF | IR3_REG_SHARED);
if (ctx->ir->compiler->has_scalar_predicates && opc_cat(instr->opc) == 2 &&
cat2_may_use_scalar_alu(instr)) {
clone->dsts[0]->flags |= IR3_REG_UNIFORM;
}
_mesa_hash_table_insert(ctx->predicate_clones, instr, clone);
return clone;
}
@ -70,14 +83,16 @@ can_write_predicate(struct opt_predicates_ctx *ctx,
case OPC_CMPS_S:
case OPC_CMPS_U:
case OPC_CMPS_F:
return !cat2_needs_scalar_alu(instr);
return !cat2_needs_scalar_alu(instr) ||
ctx->ir->compiler->has_scalar_predicates;
case OPC_AND_B:
case OPC_OR_B:
case OPC_NOT_B:
case OPC_XOR_B:
case OPC_GETBIT_B:
return ctx->ir->compiler->bitops_can_write_predicates &&
!cat2_needs_scalar_alu(instr);
(!cat2_needs_scalar_alu(instr) ||
ctx->ir->compiler->has_scalar_predicates);
default:
return false;
}