nir: Allow the _replicates opcodes to have num_components != 4.

This required relaxing a core NIR assertion which I don't think is doing
any important validation.

The shader-db effects here are small, but they're important for avoiding a
regression when we start doing per-component DCE in opt_shrink_vectors
(https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12468)

softpipe shader-db:
total instructions in shared programs: 2859777 -> 2859454 (-0.01%)
instructions in affected programs: 18881 -> 18558 (-1.71%)
total temps in shared programs: 293994 -> 293914 (-0.03%)
temps in affected programs: 418 -> 338 (-19.14%)

i915g:
total instructions in shared programs: 407562 -> 407544 (<.01%)
instructions in affected programs: 570 -> 552 (-3.16%)

r300:
total instructions in shared programs: 1414450 -> 1414459 (<.01%)
instructions in affected programs: 44494 -> 44503 (0.02%)
total vinst in shared programs: 473782 -> 473727 (-0.01%)
vinst in affected programs: 1102 -> 1047 (-4.99%)
total sinst in shared programs: 231224 -> 231216 (<.01%)
sinst in affected programs: 432 -> 424 (-1.85%)
total temps in shared programs: 197605 -> 197607 (<.01%)
temps in affected programs: 103 -> 105 (1.94%)

crocus hsw:
total instructions in shared programs: 8158185 -> 8158134 (<.01%)
instructions in affected programs: 10927 -> 10876 (-0.47%)

Acked-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15178>
This commit is contained in:
Emma Anholt 2021-12-14 19:26:21 -08:00
parent f030b75b7d
commit b1f349dff4
2 changed files with 2 additions and 7 deletions

View file

@ -78,8 +78,6 @@ class Opcode(object):
assert 0 <= output_size <= 5 or (output_size == 8) or (output_size == 16)
for size in input_sizes:
assert 0 <= size <= 5 or (size == 8) or (size == 16)
if output_size == 0:
assert size == 0
if output_size != 0:
assert size != 0
self.name = name
@ -884,13 +882,13 @@ binop("ixor", tuint, _2src_commutative + associative, "src0 ^ src1")
binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
"{src}")
binop_reduce("fdot", 4, tfloat, tfloat,
binop_reduce("fdot", 0, tfloat, tfloat,
"{src0} * {src1}", "{src0} + {src1}", "{src}",
suffix="_replicated")
opcode("fdph", 1, tfloat, [3, 4], [tfloat, tfloat], False, "",
"src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w")
opcode("fdph_replicated", 4, tfloat, [3, 4], [tfloat, tfloat], False, "",
opcode("fdph_replicated", 0, tfloat, [3, 4], [tfloat, tfloat], False, "",
"src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w")
binop("fmin", tfloat, _2src_commutative + associative, "fmin(src0, src1)")

View file

@ -27,13 +27,10 @@ dEQP-GLES2.functional.shaders.functions.control_flow.mixed_return_break_continue
dEQP-GLES2.functional.shaders.functions.control_flow.return_after_continue_fragment,Fail
dEQP-GLES2.functional.shaders.functions.control_flow.return_in_loop_if_fragment,Fail
dEQP-GLES2.functional.shaders.functions.control_flow.return_in_nested_loop_fragment,Fail
dEQP-GLES2.functional.shaders.indexing.matrix_subscript.mat4_dynamic_write_dynamic_read_fragment,Fail
dEQP-GLES2.functional.shaders.indexing.tmp_array.float_const_write_dynamic_read_fragment,Fail
dEQP-GLES2.functional.shaders.indexing.tmp_array.vec2_const_write_dynamic_read_fragment,Fail
dEQP-GLES2.functional.shaders.indexing.tmp_array.vec3_const_write_dynamic_read_fragment,Fail
dEQP-GLES2.functional.shaders.indexing.tmp_array.vec3_dynamic_write_dynamic_read_fragment,Fail
dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_const_write_dynamic_read_fragment,Fail
dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_dynamic_write_dynamic_read_fragment,Fail
dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec4_dynamic_subscript_write_static_loop_subscript_read_fragment,Fail
dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec4_dynamic_subscript_write_component_read_fragment,Fail