mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-07 15:10:12 +01:00
lima: ppir: handle ffma in the backend
ppir doesn't do a good job in fusing ffma, so allow nir to do it and handle ffma in backend. shader-db: total instructions in shared programs: 29485 -> 29066 (-1.42%) instructions in affected programs: 10362 -> 9943 (-4.04%) helped: 114 HURT: 5 helped stats (abs) min: 1 max: 30 x̄: 3.72 x̃: 2 helped stats (rel) min: 0.78% max: 20.00% x̄: 5.66% x̃: 4.31% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 0.52% max: 1.09% x̄: 0.85% x̃: 0.98% 95% mean confidence interval for instructions value: -4.37 -2.67 95% mean confidence interval for instructions %-change: -6.10% -4.68% Instructions are helped. total loops in shared programs: 2 -> 2 (0.00%) loops in affected programs: 0 -> 0 helped: 0 HURT: 0 total spills in shared programs: 369 -> 367 (-0.54%) spills in affected programs: 199 -> 197 (-1.01%) helped: 8 HURT: 9 total fills in shared programs: 1265 -> 1208 (-4.51%) fills in affected programs: 758 -> 701 (-7.52%) helped: 11 HURT: 9 Reviewed-by: Erico Nunes <nunes.erico@gmail.com> Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33313>
This commit is contained in:
parent
a4b1924b22
commit
3983e88c27
5 changed files with 74 additions and 6 deletions
|
|
@ -402,7 +402,6 @@ spec@glsl-1.20@execution@built-in-functions@fs-op-div-mat2x4-mat2x4,Fail
|
|||
spec@glsl-1.20@execution@built-in-functions@fs-op-div-mat3x2-mat3x2,Fail
|
||||
spec@glsl-1.20@execution@built-in-functions@fs-op-div-mat3x4-mat3x4,Fail
|
||||
spec@glsl-1.20@execution@built-in-functions@fs-op-div-mat4x3-mat4x3,Fail
|
||||
spec@glsl-1.20@execution@built-in-functions@fs-op-mult-mat4x3-mat3x4,Fail
|
||||
spec@glsl-1.20@execution@clipping@fixed-clip-enables,Fail
|
||||
spec@glsl-1.20@execution@clipping@vs-clip-vertex-const-reject,Fail
|
||||
spec@glsl-1.20@execution@clipping@vs-clip-vertex-different-from-position,Fail
|
||||
|
|
|
|||
|
|
@ -33,10 +33,19 @@ lima_nir_duplicate_load_const(nir_builder *b, nir_load_const_instr *load)
|
|||
|
||||
nir_foreach_use_safe(use_src, &load->def) {
|
||||
nir_load_const_instr *dupl;
|
||||
nir_instr *instr = nir_src_parent_instr(use_src);
|
||||
nir_alu_instr *alu = NULL;
|
||||
if (instr->type == nir_instr_type_alu)
|
||||
alu = nir_instr_as_alu(instr);
|
||||
|
||||
if (last_parent_instr != nir_src_parent_instr(use_src)) {
|
||||
/* Always clone consts for FFMA sources as well, since it will translate
|
||||
* into 2 PPIR ops and each may need its own const. Redundant consts
|
||||
* will be dropped by PPIR later
|
||||
*/
|
||||
if (last_parent_instr != instr ||
|
||||
(alu && alu->op == nir_op_ffma)) {
|
||||
/* if ssa use, clone for the target block */
|
||||
b->cursor = nir_before_instr(nir_src_parent_instr(use_src));
|
||||
b->cursor = nir_before_instr(instr);
|
||||
|
||||
dupl = nir_load_const_instr_create(b->shader, load->def.num_components,
|
||||
load->def.bit_size);
|
||||
|
|
|
|||
|
|
@ -123,6 +123,57 @@ static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node,
|
|||
ppir_node_target_assign(ps, child);
|
||||
}
|
||||
|
||||
static bool ppir_emit_ffma(ppir_block *block, nir_instr *ni)
|
||||
{
|
||||
nir_alu_instr *instr = nir_instr_as_alu(ni);
|
||||
nir_def *def = &instr->def;
|
||||
unsigned mask = nir_component_mask(def->num_components);
|
||||
uint8_t identity[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
|
||||
PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W };
|
||||
|
||||
ppir_alu_node *add = ppir_node_create_dest(block, ppir_op_add, def, mask);
|
||||
if (!add)
|
||||
return false;
|
||||
ppir_alu_node *mul = ppir_node_create(block, ppir_op_mul, -1, mask);
|
||||
if (!mul)
|
||||
return false;
|
||||
|
||||
ppir_dest *mul_dest = &mul->dest;
|
||||
ppir_dest *add_dest = &add->dest;
|
||||
|
||||
mul_dest->type = ppir_target_pipeline;
|
||||
if (util_bitcount(add_dest->write_mask) == 1) {
|
||||
mul_dest->write_mask = 1;
|
||||
mul_dest->pipeline = ppir_pipeline_reg_fmul;
|
||||
} else {
|
||||
mul_dest->write_mask = u_bit_consecutive(0, 4);
|
||||
mul_dest->pipeline = ppir_pipeline_reg_vmul;
|
||||
}
|
||||
|
||||
add->num_src = 2;
|
||||
mul->num_src = 2;
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
nir_alu_src *alu_src = instr->src + i;
|
||||
ppir_src *ps = mul->src + i;
|
||||
memcpy(ps->swizzle, alu_src->swizzle, sizeof(ps->swizzle));
|
||||
ppir_node_add_src(block->comp, &mul->node, ps, &alu_src->src, mask);
|
||||
}
|
||||
|
||||
nir_alu_src *alu_src = instr->src + 2;
|
||||
ppir_src *ps = add->src;
|
||||
memcpy(ps[1].swizzle, alu_src->swizzle, sizeof(ps[1].swizzle));
|
||||
ppir_node_add_src(block->comp, &add->node, ps + 1, &alu_src->src, mask);
|
||||
|
||||
memcpy(ps[0].swizzle, identity, sizeof(ps[0].swizzle));
|
||||
ppir_node_target_assign(&ps[0], &mul->node);
|
||||
ppir_node_add_dep(&add->node, &mul->node, ppir_dep_src);
|
||||
|
||||
list_addtail(&add->node.list, &block->node_list);
|
||||
list_addtail(&mul->node.list, &block->node_list);
|
||||
return true;
|
||||
}
|
||||
|
||||
static int nir_to_ppir_opcodes[nir_num_opcodes] = {
|
||||
[nir_op_mov] = ppir_op_mov,
|
||||
[nir_op_fmul] = ppir_op_mul,
|
||||
|
|
@ -152,6 +203,7 @@ static int nir_to_ppir_opcodes[nir_num_opcodes] = {
|
|||
[nir_op_ftrunc] = ppir_op_trunc,
|
||||
[nir_op_fsat] = ppir_op_sat,
|
||||
[nir_op_fclamp_pos] = ppir_op_clamp_pos,
|
||||
[nir_op_ffma] = ppir_op_ffma,
|
||||
};
|
||||
|
||||
static bool ppir_emit_alu(ppir_block *block, nir_instr *ni)
|
||||
|
|
@ -164,6 +216,11 @@ static bool ppir_emit_alu(ppir_block *block, nir_instr *ni)
|
|||
ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (op == ppir_op_ffma) {
|
||||
return ppir_emit_ffma(block, ni);
|
||||
}
|
||||
|
||||
unsigned mask = nir_component_mask(def->num_components);
|
||||
ppir_alu_node *node = ppir_node_create_dest(block, op, def, mask);
|
||||
if (!node)
|
||||
|
|
|
|||
|
|
@ -118,6 +118,8 @@ typedef enum {
|
|||
ppir_op_undef,
|
||||
ppir_op_dummy,
|
||||
|
||||
ppir_op_ffma,
|
||||
|
||||
ppir_op_num,
|
||||
} ppir_op;
|
||||
|
||||
|
|
|
|||
|
|
@ -69,9 +69,9 @@ static const nir_shader_compiler_options vs_nir_options = {
|
|||
};
|
||||
|
||||
static const nir_shader_compiler_options fs_nir_options = {
|
||||
.lower_ffma16 = true,
|
||||
.lower_ffma32 = true,
|
||||
.lower_ffma64 = true,
|
||||
.fuse_ffma16 = true,
|
||||
.fuse_ffma32 = true,
|
||||
.fuse_ffma64 = true,
|
||||
.lower_fpow = true,
|
||||
.lower_fdiv = true,
|
||||
.lower_fmod = true,
|
||||
|
|
@ -266,6 +266,7 @@ lima_program_optimize_fs_nir(struct nir_shader *s,
|
|||
|
||||
/* Must be run after optimization loop */
|
||||
NIR_PASS_V(s, lima_nir_scale_trig);
|
||||
NIR_PASS_V(s, nir_opt_algebraic_late);
|
||||
NIR_PASS_V(s, lima_nir_ppir_algebraic_late);
|
||||
|
||||
NIR_PASS_V(s, nir_copy_prop);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue