diff --git a/src/gallium/drivers/lima/ci/lima-fails.txt b/src/gallium/drivers/lima/ci/lima-fails.txt index c0a2f1ac25f..e07775e7334 100644 --- a/src/gallium/drivers/lima/ci/lima-fails.txt +++ b/src/gallium/drivers/lima/ci/lima-fails.txt @@ -402,7 +402,6 @@ spec@glsl-1.20@execution@built-in-functions@fs-op-div-mat2x4-mat2x4,Fail spec@glsl-1.20@execution@built-in-functions@fs-op-div-mat3x2-mat3x2,Fail spec@glsl-1.20@execution@built-in-functions@fs-op-div-mat3x4-mat3x4,Fail spec@glsl-1.20@execution@built-in-functions@fs-op-div-mat4x3-mat4x3,Fail -spec@glsl-1.20@execution@built-in-functions@fs-op-mult-mat4x3-mat3x4,Fail spec@glsl-1.20@execution@clipping@fixed-clip-enables,Fail spec@glsl-1.20@execution@clipping@vs-clip-vertex-const-reject,Fail spec@glsl-1.20@execution@clipping@vs-clip-vertex-different-from-position,Fail diff --git a/src/gallium/drivers/lima/ir/lima_nir_duplicate_consts.c b/src/gallium/drivers/lima/ir/lima_nir_duplicate_consts.c index 23ed6f5cdbe..a8af9bc5709 100644 --- a/src/gallium/drivers/lima/ir/lima_nir_duplicate_consts.c +++ b/src/gallium/drivers/lima/ir/lima_nir_duplicate_consts.c @@ -33,10 +33,19 @@ lima_nir_duplicate_load_const(nir_builder *b, nir_load_const_instr *load) nir_foreach_use_safe(use_src, &load->def) { nir_load_const_instr *dupl; + nir_instr *instr = nir_src_parent_instr(use_src); + nir_alu_instr *alu = NULL; + if (instr->type == nir_instr_type_alu) + alu = nir_instr_as_alu(instr); - if (last_parent_instr != nir_src_parent_instr(use_src)) { + /* Always clone consts for FFMA sources as well, since it will translate + * into 2 PPIR ops and each may need its own const. Redundant consts + * will be dropped by PPIR later + */ + if (last_parent_instr != instr || + (alu && alu->op == nir_op_ffma)) { /* if ssa use, clone for the target block */ - b->cursor = nir_before_instr(nir_src_parent_instr(use_src)); + b->cursor = nir_before_instr(instr); dupl = nir_load_const_instr_create(b->shader, load->def.num_components, load->def.bit_size); diff --git a/src/gallium/drivers/lima/ir/pp/nir.c b/src/gallium/drivers/lima/ir/pp/nir.c index 11525409a20..1e29aa2f933 100644 --- a/src/gallium/drivers/lima/ir/pp/nir.c +++ b/src/gallium/drivers/lima/ir/pp/nir.c @@ -123,6 +123,57 @@ static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node, ppir_node_target_assign(ps, child); } +static bool ppir_emit_ffma(ppir_block *block, nir_instr *ni) +{ + nir_alu_instr *instr = nir_instr_as_alu(ni); + nir_def *def = &instr->def; + unsigned mask = nir_component_mask(def->num_components); + uint8_t identity[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, + PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W }; + + ppir_alu_node *add = ppir_node_create_dest(block, ppir_op_add, def, mask); + if (!add) + return false; + ppir_alu_node *mul = ppir_node_create(block, ppir_op_mul, -1, mask); + if (!mul) + return false; + + ppir_dest *mul_dest = &mul->dest; + ppir_dest *add_dest = &add->dest; + + mul_dest->type = ppir_target_pipeline; + if (util_bitcount(add_dest->write_mask) == 1) { + mul_dest->write_mask = 1; + mul_dest->pipeline = ppir_pipeline_reg_fmul; + } else { + mul_dest->write_mask = u_bit_consecutive(0, 4); + mul_dest->pipeline = ppir_pipeline_reg_vmul; + } + + add->num_src = 2; + mul->num_src = 2; + + for (int i = 0; i < 2; i++) { + nir_alu_src *alu_src = instr->src + i; + ppir_src *ps = mul->src + i; + memcpy(ps->swizzle, alu_src->swizzle, sizeof(ps->swizzle)); + ppir_node_add_src(block->comp, &mul->node, ps, &alu_src->src, mask); + } + + nir_alu_src *alu_src = instr->src + 2; + ppir_src *ps = add->src; + memcpy(ps[1].swizzle, alu_src->swizzle, sizeof(ps[1].swizzle)); + ppir_node_add_src(block->comp, &add->node, ps + 1, &alu_src->src, mask); + + memcpy(ps[0].swizzle, identity, sizeof(ps[0].swizzle)); + ppir_node_target_assign(&ps[0], &mul->node); + ppir_node_add_dep(&add->node, &mul->node, ppir_dep_src); + + list_addtail(&add->node.list, &block->node_list); + list_addtail(&mul->node.list, &block->node_list); + return true; +} + static int nir_to_ppir_opcodes[nir_num_opcodes] = { [nir_op_mov] = ppir_op_mov, [nir_op_fmul] = ppir_op_mul, @@ -152,6 +203,7 @@ static int nir_to_ppir_opcodes[nir_num_opcodes] = { [nir_op_ftrunc] = ppir_op_trunc, [nir_op_fsat] = ppir_op_sat, [nir_op_fclamp_pos] = ppir_op_clamp_pos, + [nir_op_ffma] = ppir_op_ffma, }; static bool ppir_emit_alu(ppir_block *block, nir_instr *ni) @@ -164,6 +216,11 @@ static bool ppir_emit_alu(ppir_block *block, nir_instr *ni) ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name); return false; } + + if (op == ppir_op_ffma) { + return ppir_emit_ffma(block, ni); + } + unsigned mask = nir_component_mask(def->num_components); ppir_alu_node *node = ppir_node_create_dest(block, op, def, mask); if (!node) diff --git a/src/gallium/drivers/lima/ir/pp/ppir.h b/src/gallium/drivers/lima/ir/pp/ppir.h index 3c45d27898a..a349c294005 100644 --- a/src/gallium/drivers/lima/ir/pp/ppir.h +++ b/src/gallium/drivers/lima/ir/pp/ppir.h @@ -118,6 +118,8 @@ typedef enum { ppir_op_undef, ppir_op_dummy, + ppir_op_ffma, + ppir_op_num, } ppir_op; diff --git a/src/gallium/drivers/lima/lima_program.c b/src/gallium/drivers/lima/lima_program.c index 5f623b2e7d9..952c1151a5d 100644 --- a/src/gallium/drivers/lima/lima_program.c +++ b/src/gallium/drivers/lima/lima_program.c @@ -69,9 +69,9 @@ static const nir_shader_compiler_options vs_nir_options = { }; static const nir_shader_compiler_options fs_nir_options = { - .lower_ffma16 = true, - .lower_ffma32 = true, - .lower_ffma64 = true, + .fuse_ffma16 = true, + .fuse_ffma32 = true, + .fuse_ffma64 = true, .lower_fpow = true, .lower_fdiv = true, .lower_fmod = true, @@ -266,6 +266,7 @@ lima_program_optimize_fs_nir(struct nir_shader *s, /* Must be run after optimization loop */ NIR_PASS_V(s, lima_nir_scale_trig); + NIR_PASS_V(s, nir_opt_algebraic_late); NIR_PASS_V(s, lima_nir_ppir_algebraic_late); NIR_PASS_V(s, nir_copy_prop);