mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 02:20:11 +01:00
intel/compiler: Vectorize gl_TessLevelInner/Outer[] writes
Setting the NIR options takes care of iris thanks to the common st/mesa linking code, and updating brw_nir_link_shaders should handle anv. The main effort here is updating remap_tess_levels, which needs to handle vector stores, writemasking, and swizzling. Unfortunately, we also need to continue handling the existing single-component access because it's used for TES inputs, which we don't vectorize. We could try to vectorize TES inputs too, but they're all pushed anyway, so it wouldn't buy us much other than deleting this code. Also, we do have opt_combine_stores, but not one for loads. One limitation of using nir_vectorize_tess_levels is that it works on variables, and so isn't able to combine outer/inner writes that happen to live in the same vec4 slot (for triangle domains). That said, it's still better than before. For writes, we allow the intrinsics to supply up to the full size of the variable (vec4 for outer, vec2 for inner) even if the domain only requires a subset of those components (i.e. triangles needs 3). shader-db results on Icelake: total instructions in shared programs: 19605070 -> 19602284 (-0.01%) instructions in affected programs: 65338 -> 62552 (-4.26%) helped: 271 / HURT: 0 helped stats (abs) min: 6 max: 24 x̄: 10.28 x̃: 12 helped stats (rel) min: 1.30% max: 18.18% x̄: 5.80% x̃: 7.59% 95% mean confidence interval for instructions value: -10.71 -9.85 95% mean confidence interval for instructions %-change: -6.17% -5.43% Instructions are helped. total cycles in shared programs: 851854659 -> 851820320 (<.01%) cycles in affected programs: 618749 -> 584410 (-5.55%) helped: 271 / HURT: 0 helped stats (abs) min: 69 max: 540 x̄: 126.71 x̃: 108 helped stats (rel) min: 2.57% max: 37.97% x̄: 6.17% x̃: 5.06% 95% mean confidence interval for cycles value: -135.89 -117.54 95% mean confidence interval for cycles %-change: -6.72% -5.63% Cycles are helped. total sends in shared programs: 1025285 -> 1024355 (-0.09%) sends in affected programs: 6454 -> 5524 (-14.41%) helped: 271 / HURT: 0 helped stats (abs) min: 2 max: 8 x̄: 3.43 x̃: 4 helped stats (rel) min: 5.71% max: 25.00% x̄: 14.98% x̃: 17.39% 95% mean confidence interval for sends value: -3.57 -3.29 95% mean confidence interval for sends %-change: -15.42% -14.54% Sends are helped. According to Felix DeGrood, this results in a 10% improvement in the draw call time for certain draw calls from Strange Brigade. Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17944>
This commit is contained in:
parent
c9d399604e
commit
abba55382f
2 changed files with 128 additions and 17 deletions
|
|
@ -44,6 +44,7 @@
|
|||
.lower_ldexp = true, \
|
||||
.lower_device_index_to_zero = true, \
|
||||
.vectorize_io = true, \
|
||||
.vectorize_tess_levels = true, \
|
||||
.use_interpolated_input_intrinsics = true, \
|
||||
.lower_insert_byte = true, \
|
||||
.lower_insert_word = true, \
|
||||
|
|
|
|||
|
|
@ -35,19 +35,54 @@ remap_tess_levels(nir_builder *b, nir_intrinsic_instr *intr,
|
|||
{
|
||||
const int location = nir_intrinsic_base(intr);
|
||||
const unsigned component = nir_intrinsic_component(intr);
|
||||
bool out_of_bounds;
|
||||
bool out_of_bounds = false;
|
||||
bool write = !nir_intrinsic_infos[intr->intrinsic].has_dest;
|
||||
unsigned mask = write ? nir_intrinsic_write_mask(intr) : 0;
|
||||
nir_ssa_def *src = NULL, *dest = NULL;
|
||||
|
||||
if (write) {
|
||||
assert(intr->src[0].is_ssa);
|
||||
assert(intr->num_components == intr->src[0].ssa->num_components);
|
||||
} else {
|
||||
assert(intr->dest.is_ssa);
|
||||
assert(intr->num_components == intr->dest.ssa.num_components);
|
||||
}
|
||||
|
||||
if (location == VARYING_SLOT_TESS_LEVEL_INNER) {
|
||||
b->cursor = write ? nir_before_instr(&intr->instr)
|
||||
: nir_after_instr(&intr->instr);
|
||||
|
||||
switch (_primitive_mode) {
|
||||
case TESS_PRIMITIVE_QUADS:
|
||||
/* gl_TessLevelInner[0..1] lives at DWords 3-2 (reversed). */
|
||||
nir_intrinsic_set_base(intr, 0);
|
||||
nir_intrinsic_set_component(intr, 3 - component);
|
||||
out_of_bounds = false;
|
||||
|
||||
if (write) {
|
||||
assert(intr->src[0].ssa->num_components >= 2);
|
||||
|
||||
intr->num_components = 4;
|
||||
|
||||
nir_ssa_def *undef = nir_ssa_undef(b, 1, 32);
|
||||
nir_ssa_def *x = nir_channel(b, intr->src[0].ssa, 0);
|
||||
nir_ssa_def *y = nir_channel(b, intr->src[0].ssa, 1);
|
||||
src = nir_vec4(b, undef, undef, y, x);
|
||||
mask = !!(mask & WRITEMASK_X) << 3 | !!(mask & WRITEMASK_Y) << 2;
|
||||
} else if (intr->dest.ssa.num_components > 1) {
|
||||
assert(intr->dest.ssa.num_components == 2);
|
||||
|
||||
intr->num_components = 4;
|
||||
intr->dest.ssa.num_components = 4;
|
||||
|
||||
unsigned wz[2] = { 3, 2 };
|
||||
dest = nir_swizzle(b, &intr->dest.ssa, wz, 2);
|
||||
} else {
|
||||
nir_intrinsic_set_component(intr, 3 - component);
|
||||
}
|
||||
break;
|
||||
case TESS_PRIMITIVE_TRIANGLES:
|
||||
/* gl_TessLevelInner[0] lives at DWord 4. */
|
||||
nir_intrinsic_set_base(intr, 1);
|
||||
mask &= WRITEMASK_X;
|
||||
out_of_bounds = component > 0;
|
||||
break;
|
||||
case TESS_PRIMITIVE_ISOLINES:
|
||||
|
|
@ -57,28 +92,98 @@ remap_tess_levels(nir_builder *b, nir_intrinsic_instr *intr,
|
|||
unreachable("Bogus tessellation domain");
|
||||
}
|
||||
} else if (location == VARYING_SLOT_TESS_LEVEL_OUTER) {
|
||||
if (_primitive_mode == TESS_PRIMITIVE_ISOLINES) {
|
||||
b->cursor = write ? nir_before_instr(&intr->instr)
|
||||
: nir_after_instr(&intr->instr);
|
||||
|
||||
nir_intrinsic_set_base(intr, 1);
|
||||
|
||||
switch (_primitive_mode) {
|
||||
case TESS_PRIMITIVE_QUADS:
|
||||
/* gl_TessLevelOuter[0..3] lives at DWords 7-4 (reversed). */
|
||||
if (write) {
|
||||
assert(intr->src[0].ssa->num_components == 4);
|
||||
|
||||
unsigned wzyx[4] = { 3, 2, 1, 0 };
|
||||
src = nir_swizzle(b, intr->src[0].ssa, wzyx, 4);
|
||||
mask = !!(mask & WRITEMASK_X) << 3 | !!(mask & WRITEMASK_Y) << 2 |
|
||||
!!(mask & WRITEMASK_Z) << 1 | !!(mask & WRITEMASK_W) << 0;
|
||||
} else if (intr->dest.ssa.num_components > 1) {
|
||||
assert(intr->dest.ssa.num_components == 4);
|
||||
|
||||
unsigned wzyx[4] = { 3, 2, 1, 0 };
|
||||
dest = nir_swizzle(b, &intr->dest.ssa, wzyx, 4);
|
||||
} else {
|
||||
nir_intrinsic_set_component(intr, 3 - component);
|
||||
}
|
||||
break;
|
||||
case TESS_PRIMITIVE_TRIANGLES:
|
||||
/* gl_TessLevelOuter[0..2] lives at DWords 7-5 (reversed). */
|
||||
if (write) {
|
||||
assert(intr->src[0].ssa->num_components >= 3);
|
||||
|
||||
intr->num_components = 4;
|
||||
|
||||
nir_ssa_def *x = nir_channel(b, intr->src[0].ssa, 0);
|
||||
nir_ssa_def *y = nir_channel(b, intr->src[0].ssa, 1);
|
||||
nir_ssa_def *z = nir_channel(b, intr->src[0].ssa, 2);
|
||||
nir_ssa_def *undef = nir_ssa_undef(b, 1, 32);
|
||||
src = nir_vec4(b, undef, z, y, x);
|
||||
mask = !!(mask & WRITEMASK_X) << 3 | !!(mask & WRITEMASK_Y) << 2 |
|
||||
!!(mask & WRITEMASK_Z) << 1;
|
||||
} else if (intr->dest.ssa.num_components > 1) {
|
||||
assert(intr->dest.ssa.num_components == 3);
|
||||
|
||||
intr->num_components = 4;
|
||||
intr->dest.ssa.num_components = 4;
|
||||
|
||||
unsigned wzy[3] = { 3, 2, 1 };
|
||||
dest = nir_swizzle(b, &intr->dest.ssa, wzy, 3);
|
||||
} else {
|
||||
nir_intrinsic_set_component(intr, 3 - component);
|
||||
out_of_bounds = component == 3;
|
||||
}
|
||||
break;
|
||||
case TESS_PRIMITIVE_ISOLINES:
|
||||
/* gl_TessLevelOuter[0..1] lives at DWords 6-7 (in order). */
|
||||
nir_intrinsic_set_base(intr, 1);
|
||||
nir_intrinsic_set_component(intr, 2 + nir_intrinsic_component(intr));
|
||||
out_of_bounds = component > 1;
|
||||
} else {
|
||||
/* Triangles use DWords 7-5 (reversed); Quads use 7-4 (reversed) */
|
||||
nir_intrinsic_set_base(intr, 1);
|
||||
nir_intrinsic_set_component(intr, 3 - nir_intrinsic_component(intr));
|
||||
out_of_bounds = component == 3 && _primitive_mode == TESS_PRIMITIVE_TRIANGLES;
|
||||
if (write) {
|
||||
assert(intr->src[0].ssa->num_components >= 2);
|
||||
|
||||
nir_ssa_def *undef = nir_ssa_undef(b, 1, 32);
|
||||
nir_ssa_def *x = nir_channel(b, intr->src[0].ssa, 0);
|
||||
nir_ssa_def *y = nir_channel(b, intr->src[0].ssa, 1);
|
||||
src = nir_vec4(b, undef, undef, x, y);
|
||||
mask = !!(mask & WRITEMASK_X) << 2 | !!(mask & WRITEMASK_Y) << 3;
|
||||
} else if (intr->dest.ssa.num_components > 1) {
|
||||
assert(intr->dest.ssa.num_components == 2);
|
||||
|
||||
unsigned zw[2] = { 2, 3 };
|
||||
dest = nir_swizzle(b, &intr->dest.ssa, zw, 2);
|
||||
} else {
|
||||
nir_intrinsic_set_component(intr, 2 + component);
|
||||
out_of_bounds = component > 1;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
unreachable("Bogus tessellation domain");
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (out_of_bounds) {
|
||||
if (nir_intrinsic_infos[intr->intrinsic].has_dest) {
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
nir_ssa_def *undef = nir_ssa_undef(b, 1, 32);
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa, undef);
|
||||
}
|
||||
if (!write)
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_ssa_undef(b, 1, 32));
|
||||
nir_instr_remove(&intr->instr);
|
||||
} else if (write) {
|
||||
nir_intrinsic_set_write_mask(intr, mask);
|
||||
|
||||
if (src) {
|
||||
nir_instr_rewrite_src(&intr->instr, &intr->src[0],
|
||||
nir_src_for_ssa(src));
|
||||
}
|
||||
} else if (dest) {
|
||||
nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, dest,
|
||||
dest->parent_instr);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
@ -1001,6 +1106,11 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
|
|||
}
|
||||
|
||||
NIR_PASS(_, producer, nir_lower_io_to_vector, nir_var_shader_out);
|
||||
|
||||
if (producer->info.stage == MESA_SHADER_TESS_CTRL &&
|
||||
producer->options->vectorize_tess_levels)
|
||||
NIR_PASS_V(producer, nir_vectorize_tess_levels);
|
||||
|
||||
NIR_PASS(_, producer, nir_opt_combine_stores, nir_var_shader_out);
|
||||
NIR_PASS(_, consumer, nir_lower_io_to_vector, nir_var_shader_in);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue