mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 06:50:11 +01:00
ir3: add subreg move optimization
Certain instructions essentially behave as a move of half of their full src to their half dst. More specifically: - `cov.u32u16 hdst, src`: moves lower half of src to hdst. - `[a]shr.b hdst, src, 16`: moves upper half of src to hdst. In mergedregs mode, if the src and dst of these instructions are assigned overlapping registers, they can be removed. Implement this by 1) merging the src and dst merge sets of such instruction before RA, and 2) removing them if RA assigned overlapping registers. Totals from 7483 (4.55% of 164575) affected shaders: Instrs: 8913039 -> 8859209 (-0.60%); split: -0.62%, +0.01% CodeSize: 16588988 -> 16489082 (-0.60%); split: -0.61%, +0.00% NOPs: 2020848 -> 2013070 (-0.38%); split: -0.71%, +0.33% MOVs: 352179 -> 352146 (-0.01%); split: -0.06%, +0.05% COVs: 256946 -> 242972 (-5.44%) Full: 145737 -> 145738 (+0.00%) (ss): 224816 -> 222102 (-1.21%); split: -1.24%, +0.03% (sy): 109208 -> 109222 (+0.01%); split: -0.01%, +0.02% (ss)-stall: 842387 -> 831457 (-1.30%); split: -1.63%, +0.33% (sy)-stall: 3353188 -> 3337732 (-0.46%); split: -0.62%, +0.16% Preamble Instrs: 1403333 -> 1401362 (-0.14%) Cat0: 2219312 -> 2211530 (-0.35%); split: -0.65%, +0.30% Cat1: 690367 -> 677240 (-1.90%); split: -1.99%, +0.09% Cat2: 3279215 -> 3246293 (-1.00%) Cat7: 412865 -> 412866 (+0.00%) Signed-off-by: Job Noorman <jnoorman@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35858>
This commit is contained in:
parent
28e810eb74
commit
c757b22c5f
4 changed files with 103 additions and 0 deletions
|
|
@ -1905,3 +1905,44 @@ ir3_supports_rpt(struct ir3_compiler *compiler, unsigned opc)
|
|||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
is_unmodified_full_gpr(struct ir3_register *src)
|
||||
{
|
||||
return !(src->flags & (IR3_REG_HALF | IR3_REG_CONST | IR3_REG_IMMED |
|
||||
IR3_REG_RELATIV | IR3_REG_FNEG | IR3_REG_FABS |
|
||||
IR3_REG_SNEG | IR3_REG_SABS | IR3_REG_BNOT));
|
||||
}
|
||||
|
||||
/* Does `instr` move half of its full GPR src to its half dst? If this is the
|
||||
* case, and RA assigns overlapping registers to src and dst, the instruction
|
||||
* can be removed in mergedregs mode.
|
||||
*/
|
||||
enum ir3_subreg_move
|
||||
ir3_is_subreg_move(struct ir3_instruction *instr)
|
||||
{
|
||||
if (instr->opc == OPC_MOV) {
|
||||
/* `cov.u32u16 hdst, src`: moves lower half of src to hdst. */
|
||||
struct ir3_register *src = instr->srcs[0];
|
||||
struct ir3_register *dst = instr->dsts[0];
|
||||
|
||||
if (instr->cat1.src_type == TYPE_U32 &&
|
||||
instr->cat1.dst_type == TYPE_U16 && is_unmodified_full_gpr(src) &&
|
||||
(src->flags & IR3_REG_SHARED) == (dst->flags & IR3_REG_SHARED)) {
|
||||
return IR3_SUBREG_MOVE_LOWER;
|
||||
}
|
||||
} else if (instr->opc == OPC_SHR_B || instr->opc == OPC_ASHR_B) {
|
||||
/* `[a]shr.b hdst, src, 16`: moves upper half of src to hdst. */
|
||||
struct ir3_register *src = instr->srcs[0];
|
||||
struct ir3_register *shamt = instr->srcs[1];
|
||||
struct ir3_register *dst = instr->dsts[0];
|
||||
|
||||
if ((dst->flags & IR3_REG_HALF) && is_unmodified_full_gpr(src) &&
|
||||
((src->flags & IR3_REG_SHARED) == (dst->flags & IR3_REG_SHARED)) &&
|
||||
(shamt->flags & IR3_REG_IMMED) && shamt->uim_val == 16) {
|
||||
return IR3_SUBREG_MOVE_UPPER;
|
||||
}
|
||||
}
|
||||
|
||||
return IR3_SUBREG_MOVE_NONE;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1143,6 +1143,14 @@ is_subgroup_cond_mov_macro(struct ir3_instruction *instr)
|
|||
}
|
||||
}
|
||||
|
||||
enum ir3_subreg_move {
|
||||
IR3_SUBREG_MOVE_NONE,
|
||||
IR3_SUBREG_MOVE_LOWER,
|
||||
IR3_SUBREG_MOVE_UPPER,
|
||||
};
|
||||
|
||||
enum ir3_subreg_move ir3_is_subreg_move(struct ir3_instruction *instr);
|
||||
|
||||
static inline bool
|
||||
is_alu(struct ir3_instruction *instr)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@
|
|||
#include "instr-a3xx.h"
|
||||
#include "ir3.h"
|
||||
#include "ir3_context.h"
|
||||
#include "ir3_ra.h"
|
||||
|
||||
static struct ir3_instruction_rpt
|
||||
rpt_instr(struct ir3_instruction *instr, unsigned nrpt)
|
||||
|
|
@ -5515,6 +5516,44 @@ collect_tex_prefetches(struct ir3_context *ctx, struct ir3 *ir)
|
|||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
is_noop_subreg_move(struct ir3_instruction *instr)
|
||||
{
|
||||
enum ir3_subreg_move subreg_move = ir3_is_subreg_move(instr);
|
||||
|
||||
if (subreg_move == IR3_SUBREG_MOVE_NONE) {
|
||||
return false;
|
||||
}
|
||||
|
||||
struct ir3_register *src = instr->srcs[0];
|
||||
struct ir3_register *dst = instr->dsts[0];
|
||||
unsigned offset = subreg_move == IR3_SUBREG_MOVE_LOWER ? 0 : 1;
|
||||
|
||||
return ra_num_to_physreg(dst->num, dst->flags) ==
|
||||
ra_num_to_physreg(src->num, src->flags) + offset;
|
||||
}
|
||||
|
||||
static bool
|
||||
ir3_remove_noop_subreg_moves(struct ir3 *ir)
|
||||
{
|
||||
if (!ir->compiler->mergedregs) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool progress = false;
|
||||
|
||||
foreach_block (block, &ir->block_list) {
|
||||
foreach_instr_safe (instr, &block->instr_list) {
|
||||
if (is_noop_subreg_move(instr)) {
|
||||
ir3_instr_remove(instr);
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
int
|
||||
ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
||||
struct ir3_shader *shader,
|
||||
|
|
@ -5846,6 +5885,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
|||
goto out;
|
||||
}
|
||||
|
||||
IR3_PASS(ir, ir3_remove_noop_subreg_moves);
|
||||
IR3_PASS(ir, ir3_merge_rpt, so);
|
||||
IR3_PASS(ir, ir3_postsched, so);
|
||||
|
||||
|
|
|
|||
|
|
@ -384,6 +384,19 @@ aggressive_coalesce_collect(struct ir3_liveness *live,
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
aggressive_coalesce_subreg_move(struct ir3_liveness *live,
|
||||
struct ir3_instruction *instr)
|
||||
{
|
||||
enum ir3_subreg_move subreg_move = ir3_is_subreg_move(instr);
|
||||
|
||||
if (subreg_move != IR3_SUBREG_MOVE_NONE &&
|
||||
(instr->dsts[0]->flags & IR3_REG_SSA)) {
|
||||
unsigned offset = subreg_move == IR3_SUBREG_MOVE_LOWER ? 0 : 1;
|
||||
try_merge_defs(live, instr->srcs[0]->def, instr->dsts[0], offset);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
aggressive_coalesce_rpt(struct ir3_liveness *live,
|
||||
struct ir3_instruction *instr)
|
||||
|
|
@ -605,6 +618,7 @@ ir3_aggressive_coalesce(struct ir3_liveness *live,
|
|||
aggressive_coalesce_parallel_copy(live, instr);
|
||||
break;
|
||||
default:
|
||||
aggressive_coalesce_subreg_move(live, instr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue