mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 04:58:05 +02:00
i965/fs: Don't set exec_all on instructions wider than the original in lower_simd_width.
This could have led to somewhat increased bandwidth usage for lowered texturing instructions on Gen4 (which is the only case in which lower_width may be greater than inst->exec_size). After the previous patches the invariant mentioned in the comment should no longer be assumed by any of the other optimization and lowering passes, so the exec_all() call shouldn't be necessary anymore. Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
This commit is contained in:
parent
eaba922582
commit
4529916dfd
1 changed files with 11 additions and 9 deletions
|
|
@ -4163,10 +4163,15 @@ fs_visitor::lower_simd_width()
|
||||||
const unsigned lower_width = get_lowered_simd_width(devinfo, inst);
|
const unsigned lower_width = get_lowered_simd_width(devinfo, inst);
|
||||||
|
|
||||||
if (lower_width != inst->exec_size) {
|
if (lower_width != inst->exec_size) {
|
||||||
/* Builder matching the original instruction. */
|
/* Builder matching the original instruction. We may also need to
|
||||||
|
* emit an instruction of width larger than the original, set the
|
||||||
|
* execution size of the builder to the highest of both for now so
|
||||||
|
* we're sure that both cases can be handled.
|
||||||
|
*/
|
||||||
const fs_builder ibld = bld.at(block, inst)
|
const fs_builder ibld = bld.at(block, inst)
|
||||||
.exec_all(inst->force_writemask_all)
|
.exec_all(inst->force_writemask_all)
|
||||||
.group(inst->exec_size, inst->force_sechalf);
|
.group(MAX2(inst->exec_size, lower_width),
|
||||||
|
inst->force_sechalf);
|
||||||
|
|
||||||
/* Split the copies in chunks of the execution width of either the
|
/* Split the copies in chunks of the execution width of either the
|
||||||
* original or the lowered instruction, whichever is lower.
|
* original or the lowered instruction, whichever is lower.
|
||||||
|
|
@ -4189,14 +4194,11 @@ fs_visitor::lower_simd_width()
|
||||||
split_inst.exec_size = lower_width;
|
split_inst.exec_size = lower_width;
|
||||||
split_inst.eot = inst->eot && i == n - 1;
|
split_inst.eot = inst->eot && i == n - 1;
|
||||||
|
|
||||||
/* Set exec_all if the lowered width is higher than the original
|
/* Select the correct channel enables for the i-th group, then
|
||||||
* to avoid breaking the compiler invariant that no control
|
* transform the sources and destination and emit the lowered
|
||||||
* flow-masked instruction is wider than the shader's
|
* instruction.
|
||||||
* dispatch_width. Then transform the sources and destination and
|
|
||||||
* emit the lowered instruction.
|
|
||||||
*/
|
*/
|
||||||
const fs_builder lbld = ibld.exec_all(lower_width > inst->exec_size)
|
const fs_builder lbld = ibld.group(lower_width, i);
|
||||||
.group(lower_width, i);
|
|
||||||
|
|
||||||
for (unsigned j = 0; j < inst->sources; j++) {
|
for (unsigned j = 0; j < inst->sources; j++) {
|
||||||
if (inst->src[j].file != BAD_FILE &&
|
if (inst->src[j].file != BAD_FILE &&
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue