nir: Allow the _replicates opcodes to have num_components != 4.

This required relaxing a core NIR assertion which I don't think is doing any important validation. The shader-db effects here are small, but they're important for avoiding a regression when we start doing per-component DCE in opt_shrink_vectors (https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12468) softpipe shader-db: total instructions in shared programs: 2859777 -> 2859454 (-0.01%) instructions in affected programs: 18881 -> 18558 (-1.71%) total temps in shared programs: 293994 -> 293914 (-0.03%) temps in affected programs: 418 -> 338 (-19.14%) i915g: total instructions in shared programs: 407562 -> 407544 (<.01%) instructions in affected programs: 570 -> 552 (-3.16%) r300: total instructions in shared programs: 1414450 -> 1414459 (<.01%) instructions in affected programs: 44494 -> 44503 (0.02%) total vinst in shared programs: 473782 -> 473727 (-0.01%) vinst in affected programs: 1102 -> 1047 (-4.99%) total sinst in shared programs: 231224 -> 231216 (<.01%) sinst in affected programs: 432 -> 424 (-1.85%) total temps in shared programs: 197605 -> 197607 (<.01%) temps in affected programs: 103 -> 105 (1.94%) crocus hsw: total instructions in shared programs: 8158185 -> 8158134 (<.01%) instructions in affected programs: 10927 -> 10876 (-0.47%) Acked-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15178>
2026-05-05 05:18:08 +02:00 · 2021-12-14 19:26:21 -08:00 · 2021-12-14 19:26:21 -08:00 · b1f349dff4
commit b1f349dff4
parent f030b75b7d
2 changed files with 2 additions and 7 deletions
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@ -78,8 +78,6 @@ class Opcode(object):
      assert 0 <= output_size <= 5 or (output_size == 8) or (output_size == 16)
      for size in input_sizes:
         assert 0 <= size <= 5 or (size == 8) or (size == 16)
-         if output_size == 0:
-            assert size == 0
         if output_size != 0:
            assert size != 0
      self.name = name
@ -884,13 +882,13 @@ binop("ixor", tuint, _2src_commutative + associative, "src0 ^ src1")
 binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
             "{src}")

-binop_reduce("fdot", 4, tfloat, tfloat,
+binop_reduce("fdot", 0, tfloat, tfloat,
             "{src0} * {src1}", "{src0} + {src1}", "{src}",
             suffix="_replicated")

 opcode("fdph", 1, tfloat, [3, 4], [tfloat, tfloat], False, "",
       "src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w")
-opcode("fdph_replicated", 4, tfloat, [3, 4], [tfloat, tfloat], False, "",
+opcode("fdph_replicated", 0, tfloat, [3, 4], [tfloat, tfloat], False, "",
       "src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w")

 binop("fmin", tfloat, _2src_commutative + associative, "fmin(src0, src1)")
--- a/src/gallium/drivers/i915/ci/i915-g33-fails.txt
+++ b/src/gallium/drivers/i915/ci/i915-g33-fails.txt
@ -27,13 +27,10 @@ dEQP-GLES2.functional.shaders.functions.control_flow.mixed_return_break_continue
 dEQP-GLES2.functional.shaders.functions.control_flow.return_after_continue_fragment,Fail
 dEQP-GLES2.functional.shaders.functions.control_flow.return_in_loop_if_fragment,Fail
 dEQP-GLES2.functional.shaders.functions.control_flow.return_in_nested_loop_fragment,Fail
-dEQP-GLES2.functional.shaders.indexing.matrix_subscript.mat4_dynamic_write_dynamic_read_fragment,Fail
 dEQP-GLES2.functional.shaders.indexing.tmp_array.float_const_write_dynamic_read_fragment,Fail
 dEQP-GLES2.functional.shaders.indexing.tmp_array.vec2_const_write_dynamic_read_fragment,Fail
 dEQP-GLES2.functional.shaders.indexing.tmp_array.vec3_const_write_dynamic_read_fragment,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec3_dynamic_write_dynamic_read_fragment,Fail
 dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_const_write_dynamic_read_fragment,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_dynamic_write_dynamic_read_fragment,Fail

 dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec4_dynamic_subscript_write_static_loop_subscript_read_fragment,Fail
 dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec4_dynamic_subscript_write_component_read_fragment,Fail