mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 20:28:04 +02:00
nir/opt_varyings: add inter-shader code motion for uniform/UBO indexing
If input_value, index, index1 or index2 is an input, here are examples of code that this commit moves from consumers to producers: * input_value * uniform_array[index] * uniform_array[index] * ubo[0].array[index] * ubo[index].var * ubo[index1].array[index2] If the array index is computed from an input, it must be flat or convergent within a primitive to be moved. If the array index is not an input, it must be a uniform expression. dEQP-GLES31.functional.shaders.opaque_type_indexing.ubo.dynamically_uniform_fragment has UBO indexing that is moved to the producer by this. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32424>
This commit is contained in:
parent
f52ae35d73
commit
dcc679ab3a
1 changed files with 150 additions and 34 deletions
|
|
@ -3236,26 +3236,86 @@ update_movable_flags(struct linkage_info *linkage, nir_instr *instr)
|
||||||
if (NEED_UPDATE_MOVABLE_FLAGS(deref))
|
if (NEED_UPDATE_MOVABLE_FLAGS(deref))
|
||||||
update_movable_flags(linkage, deref);
|
update_movable_flags(linkage, deref);
|
||||||
|
|
||||||
if (deref->pass_flags & FLAG_MOVABLE) {
|
instr->pass_flags |= deref->pass_flags;
|
||||||
/* Treat uniforms as convergent, which means compatible with both
|
return;
|
||||||
* flat and non-flat inputs.
|
|
||||||
*/
|
|
||||||
instr->pass_flags |= FLAG_MOVABLE | FLAG_INTERP_CONVERGENT;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
instr->pass_flags |= FLAG_UNMOVABLE;
|
instr->pass_flags |= FLAG_UNMOVABLE;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
case nir_instr_type_deref:
|
case nir_instr_type_deref: {
|
||||||
if (can_move_deref_between_shaders(linkage, instr) &&
|
if (!can_move_deref_between_shaders(linkage, instr)) {
|
||||||
!nir_deref_instr_has_indirect(nir_instr_as_deref(instr)))
|
|
||||||
instr->pass_flags |= FLAG_MOVABLE;
|
|
||||||
else
|
|
||||||
instr->pass_flags |= FLAG_UNMOVABLE;
|
instr->pass_flags |= FLAG_UNMOVABLE;
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
nir_deref_instr *deref = nir_instr_as_deref(instr);
|
||||||
|
nir_deref_instr *parent = nir_deref_instr_parent(deref);
|
||||||
|
|
||||||
|
if (parent) {
|
||||||
|
if (NEED_UPDATE_MOVABLE_FLAGS(&parent->instr))
|
||||||
|
update_movable_flags(linkage, &parent->instr);
|
||||||
|
|
||||||
|
if (parent->instr.pass_flags & FLAG_UNMOVABLE) {
|
||||||
|
instr->pass_flags |= FLAG_UNMOVABLE;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (deref->deref_type) {
|
||||||
|
case nir_deref_type_var:
|
||||||
|
instr->pass_flags |= FLAG_MOVABLE;
|
||||||
|
return;
|
||||||
|
|
||||||
|
case nir_deref_type_struct:
|
||||||
|
assert(parent->instr.pass_flags & FLAG_MOVABLE);
|
||||||
|
instr->pass_flags |= parent->instr.pass_flags;
|
||||||
|
return;
|
||||||
|
|
||||||
|
case nir_deref_type_array: {
|
||||||
|
nir_instr *index = deref->arr.index.ssa->parent_instr;
|
||||||
|
|
||||||
|
if (NEED_UPDATE_MOVABLE_FLAGS(index))
|
||||||
|
update_movable_flags(linkage, index);
|
||||||
|
|
||||||
|
/* Integer array indices should be movable only if they are
|
||||||
|
* convergent or flat.
|
||||||
|
*/
|
||||||
|
ASSERTED unsigned index_interp = index->pass_flags & FLAG_INTERP_MASK;
|
||||||
|
assert(index->pass_flags & FLAG_UNMOVABLE ||
|
||||||
|
(index_interp == FLAG_INTERP_CONVERGENT ||
|
||||||
|
index_interp == FLAG_INTERP_FLAT));
|
||||||
|
|
||||||
|
if (parent) {
|
||||||
|
unsigned parent_interp = parent->instr.pass_flags & FLAG_INTERP_MASK;
|
||||||
|
|
||||||
|
/* Check if the interpolation flags are compatible. */
|
||||||
|
if (parent_interp != FLAG_INTERP_CONVERGENT &&
|
||||||
|
index_interp != FLAG_INTERP_CONVERGENT &&
|
||||||
|
parent_interp != index_interp) {
|
||||||
|
instr->pass_flags |= FLAG_UNMOVABLE;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Pick the one that isn't convergent because convergent inputs
|
||||||
|
* can be in expressions with any other qualifier.
|
||||||
|
*/
|
||||||
|
if (parent_interp == FLAG_INTERP_CONVERGENT)
|
||||||
|
instr->pass_flags |= index->pass_flags;
|
||||||
|
else
|
||||||
|
instr->pass_flags |= parent->instr.pass_flags;
|
||||||
|
} else {
|
||||||
|
instr->pass_flags |= index->pass_flags;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
instr->pass_flags |= FLAG_UNMOVABLE;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
instr->pass_flags |= FLAG_UNMOVABLE;
|
instr->pass_flags |= FLAG_UNMOVABLE;
|
||||||
|
|
@ -3289,10 +3349,14 @@ gather_used_input_loads(nir_instr *instr,
|
||||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||||
|
|
||||||
switch (intr->intrinsic) {
|
switch (intr->intrinsic) {
|
||||||
case nir_intrinsic_load_deref:
|
|
||||||
case nir_intrinsic_load_tess_coord:
|
case nir_intrinsic_load_tess_coord:
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
case nir_intrinsic_load_deref:
|
||||||
|
gather_used_input_loads(intr->src[0].ssa->parent_instr,
|
||||||
|
loads, num_loads);
|
||||||
|
return;
|
||||||
|
|
||||||
case nir_intrinsic_load_input:
|
case nir_intrinsic_load_input:
|
||||||
case nir_intrinsic_load_per_vertex_input:
|
case nir_intrinsic_load_per_vertex_input:
|
||||||
case nir_intrinsic_load_interpolated_input:
|
case nir_intrinsic_load_interpolated_input:
|
||||||
|
|
@ -3309,6 +3373,28 @@ gather_used_input_loads(nir_instr *instr,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case nir_instr_type_deref: {
|
||||||
|
nir_deref_instr *deref = nir_instr_as_deref(instr);
|
||||||
|
nir_deref_instr *parent = nir_deref_instr_parent(deref);
|
||||||
|
|
||||||
|
if (parent)
|
||||||
|
gather_used_input_loads(&parent->instr, loads, num_loads);
|
||||||
|
|
||||||
|
switch (deref->deref_type) {
|
||||||
|
case nir_deref_type_var:
|
||||||
|
case nir_deref_type_struct:
|
||||||
|
return;
|
||||||
|
|
||||||
|
case nir_deref_type_array:
|
||||||
|
gather_used_input_loads(deref->arr.index.ssa->parent_instr,
|
||||||
|
loads, num_loads);
|
||||||
|
return;
|
||||||
|
|
||||||
|
default:
|
||||||
|
unreachable("unexpected deref type");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
unreachable("unexpected instr type");
|
unreachable("unexpected instr type");
|
||||||
}
|
}
|
||||||
|
|
@ -3336,6 +3422,7 @@ try_move_postdominator(struct linkage_info *linkage,
|
||||||
nir_intrinsic_instr *loads[NUM_SCALAR_SLOTS*8];
|
nir_intrinsic_instr *loads[NUM_SCALAR_SLOTS*8];
|
||||||
unsigned num_loads = 0;
|
unsigned num_loads = 0;
|
||||||
gather_used_input_loads(postdom, loads, &num_loads);
|
gather_used_input_loads(postdom, loads, &num_loads);
|
||||||
|
assert(num_loads && "no loads were gathered");
|
||||||
|
|
||||||
/* Clear the flag set by gather_used_input_loads. */
|
/* Clear the flag set by gather_used_input_loads. */
|
||||||
for (unsigned i = 0; i < num_loads; i++)
|
for (unsigned i = 0; i < num_loads; i++)
|
||||||
|
|
@ -3909,28 +3996,57 @@ backward_inter_shader_code_motion(struct linkage_info *linkage,
|
||||||
if (iter->pass_flags & FLAG_UNMOVABLE)
|
if (iter->pass_flags & FLAG_UNMOVABLE)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* This can only be an ALU instruction. */
|
/* We can't move derefs into the previous shader, but we can move
|
||||||
nir_alu_instr *alu = nir_instr_as_alu(iter);
|
* instructions that use derefs.
|
||||||
|
*/
|
||||||
/* Skip unsupported bit sizes and keep searching. */
|
if (iter->type == nir_instr_type_deref)
|
||||||
if (!(alu->def.bit_size & supported_io_types))
|
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* Skip comparison opcodes that directly source the first load
|
unsigned bit_size;
|
||||||
* and a constant because any 1-bit values would have to be
|
|
||||||
* converted to 32 bits in the producer and then converted back
|
if (iter->type == nir_instr_type_alu) {
|
||||||
* to 1 bit using nir_op_ine in the consumer, achieving nothing.
|
nir_alu_instr *alu = nir_instr_as_alu(iter);
|
||||||
*/
|
|
||||||
if (alu->def.bit_size == 1 &&
|
/* Skip comparison opcodes that directly source the first load
|
||||||
((nir_op_infos[alu->op].num_inputs == 1 &&
|
* and a constant because any 1-bit values would have to be
|
||||||
alu->src[0].src.ssa == load_def) ||
|
* converted to 32 bits in the producer and then converted back
|
||||||
(nir_op_infos[alu->op].num_inputs == 2 &&
|
* to 1 bit using nir_op_ine in the consumer, achieving nothing.
|
||||||
((alu->src[0].src.ssa == load_def &&
|
*/
|
||||||
alu->src[1].src.ssa->parent_instr->type ==
|
if (alu->def.bit_size == 1 &&
|
||||||
nir_instr_type_load_const) ||
|
((nir_op_infos[alu->op].num_inputs == 1 &&
|
||||||
(alu->src[0].src.ssa->parent_instr->type ==
|
alu->src[0].src.ssa == load_def) ||
|
||||||
nir_instr_type_load_const &&
|
(nir_op_infos[alu->op].num_inputs == 2 &&
|
||||||
alu->src[1].src.ssa == load_def)))))
|
((alu->src[0].src.ssa == load_def &&
|
||||||
|
alu->src[1].src.ssa->parent_instr->type ==
|
||||||
|
nir_instr_type_load_const) ||
|
||||||
|
(alu->src[0].src.ssa->parent_instr->type ==
|
||||||
|
nir_instr_type_load_const &&
|
||||||
|
alu->src[1].src.ssa == load_def)))))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
bit_size = alu->def.bit_size;
|
||||||
|
} else if (iter->type == nir_instr_type_intrinsic) {
|
||||||
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(iter);
|
||||||
|
|
||||||
|
/* This is a uniform load with a non-constant index because
|
||||||
|
* only a non-constant index can be post-dominated by a load.
|
||||||
|
*/
|
||||||
|
assert(intr->intrinsic == nir_intrinsic_load_deref);
|
||||||
|
|
||||||
|
/* Uniform loads must be scalar if their result is immediately
|
||||||
|
* stored into an output because this pass only works with
|
||||||
|
* scalar outputs.
|
||||||
|
*/
|
||||||
|
if (intr->num_components > 1)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
bit_size = intr->def.bit_size;
|
||||||
|
} else {
|
||||||
|
unreachable("unexpected instr type");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Skip unsupported bit sizes and keep searching. */
|
||||||
|
if (!(bit_size & supported_io_types))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
movable_postdom = iter;
|
movable_postdom = iter;
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue