mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-27 11:00:37 +02:00
i965/vec4: Don't cmod propagate from CMP to ADD if the writemask isn't compatible
Otherwise we can incorrectly cmod propagate in situations like
add(8) g10<1>.xD g2<0>.xD -16D
...
cmp.ge.f0(8) null<1>D g2<0>.xD 16D
...
(+f0) sel(8) g21<1>.xyUD g14<4>.xyyyUD g18<4>.xyyyUD
Sadly, this change hurts quite a few shaders.
v2: Refactor writemask compatibility check into a separate function.
Suggested by Caio.
Ivy Bridge and Haswell had similar results. (Haswell shown)
total instructions in shared programs: 12968489 -> 12968738 (<.01%)
instructions in affected programs: 60679 -> 60928 (0.41%)
helped: 0
HURT: 249
HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
HURT stats (rel) min: 0.22% max: 0.81% x̄: 0.46% x̃: 0.44%
95% mean confidence interval for instructions value: 1.00 1.00
95% mean confidence interval for instructions %-change: 0.44% 0.48%
Instructions are HURT.
total cycles in shared programs: 409171965 -> 409172317 (<.01%)
cycles in affected programs: 260056 -> 260408 (0.14%)
helped: 0
HURT: 176
HURT stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2
HURT stats (rel) min: 0.04% max: 0.34% x̄: 0.17% x̃: 0.17%
95% mean confidence interval for cycles value: 2.00 2.00
95% mean confidence interval for cycles %-change: 0.16% 0.18%
Cycles are HURT.
Sandy Bridge
total instructions in shared programs: 10423577 -> 10423753 (<.01%)
instructions in affected programs: 40667 -> 40843 (0.43%)
helped: 0
HURT: 176
HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
HURT stats (rel) min: 0.29% max: 0.79% x̄: 0.48% x̃: 0.42%
95% mean confidence interval for instructions value: 1.00 1.00
95% mean confidence interval for instructions %-change: 0.46% 0.51%
Instructions are HURT.
total cycles in shared programs: 146097503 -> 146097855 (<.01%)
cycles in affected programs: 503990 -> 504342 (0.07%)
helped: 0
HURT: 176
HURT stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2
HURT stats (rel) min: 0.02% max: 0.36% x̄: 0.12% x̃: 0.11%
95% mean confidence interval for cycles value: 2.00 2.00
95% mean confidence interval for cycles %-change: 0.11% 0.13%
Cycles are HURT.
No changes on any other platforms.
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Fixes: cd635d149b i965/vec4: Propagate conditional modifiers from compares to adds
Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
This commit is contained in:
parent
fb6dc8e894
commit
995d993710
2 changed files with 87 additions and 5 deletions
|
|
@ -35,6 +35,17 @@
|
|||
|
||||
namespace brw {
|
||||
|
||||
static bool
|
||||
writemasks_incompatible(const vec4_instruction *earlier,
|
||||
const vec4_instruction *later)
|
||||
{
|
||||
return (earlier->dst.writemask != WRITEMASK_X &&
|
||||
earlier->dst.writemask != WRITEMASK_XYZW) ||
|
||||
(earlier->dst.writemask == WRITEMASK_XYZW &&
|
||||
later->src[0].swizzle != BRW_SWIZZLE_XYZW) ||
|
||||
(later->dst.writemask & ~earlier->dst.writemask) != 0;
|
||||
}
|
||||
|
||||
static bool
|
||||
opt_cmod_propagation_local(bblock_t *block)
|
||||
{
|
||||
|
|
@ -82,6 +93,9 @@ opt_cmod_propagation_local(bblock_t *block)
|
|||
if (scan_inst->opcode != BRW_OPCODE_ADD)
|
||||
goto not_match;
|
||||
|
||||
if (writemasks_incompatible(scan_inst, inst))
|
||||
goto not_match;
|
||||
|
||||
/* A CMP is basically a subtraction. The result of the
|
||||
* subtraction must be the same as the result of the addition.
|
||||
* This means that one of the operands must be negated. So (a +
|
||||
|
|
@ -132,11 +146,7 @@ opt_cmod_propagation_local(bblock_t *block)
|
|||
scan_inst->dst, scan_inst->size_written)) {
|
||||
if ((scan_inst->predicate && scan_inst->opcode != BRW_OPCODE_SEL) ||
|
||||
scan_inst->dst.offset != inst->src[0].offset ||
|
||||
(scan_inst->dst.writemask != WRITEMASK_X &&
|
||||
scan_inst->dst.writemask != WRITEMASK_XYZW) ||
|
||||
(scan_inst->dst.writemask == WRITEMASK_XYZW &&
|
||||
inst->src[0].swizzle != BRW_SWIZZLE_XYZW) ||
|
||||
(inst->dst.writemask & ~scan_inst->dst.writemask) != 0 ||
|
||||
writemasks_incompatible(scan_inst, inst) ||
|
||||
scan_inst->exec_size != inst->exec_size ||
|
||||
scan_inst->group != inst->group) {
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -821,3 +821,75 @@ TEST_F(cmod_propagation_test, mul_cmp_different_channels_vec4)
|
|||
EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
|
||||
EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 1)->conditional_mod);
|
||||
}
|
||||
|
||||
TEST_F(cmod_propagation_test, add_cmp_same_dst_writemask)
|
||||
{
|
||||
const vec4_builder bld = vec4_builder(v).at_end();
|
||||
dst_reg dest = dst_reg(v, glsl_type::vec4_type);
|
||||
src_reg src0 = src_reg(v, glsl_type::vec4_type);
|
||||
src_reg src1 = src_reg(v, glsl_type::vec4_type);
|
||||
dst_reg dest_null = bld.null_reg_f();
|
||||
|
||||
bld.ADD(dest, src0, src1);
|
||||
vec4_instruction *inst = bld.CMP(dest_null, src0, src1, BRW_CONDITIONAL_GE);
|
||||
inst->src[1].negate = true;
|
||||
|
||||
/* = Before =
|
||||
*
|
||||
* 0: add dest.xyzw src0 src1
|
||||
* 1: cmp.ge.f0 null.xyzw src0 -src1
|
||||
*
|
||||
* = After =
|
||||
* 0: add.ge.f0 dest.xyzw src0 src1
|
||||
*/
|
||||
|
||||
v->calculate_cfg();
|
||||
bblock_t *block0 = v->cfg->blocks[0];
|
||||
|
||||
EXPECT_EQ(0, block0->start_ip);
|
||||
EXPECT_EQ(1, block0->end_ip);
|
||||
|
||||
EXPECT_TRUE(cmod_propagation(v));
|
||||
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(0, block0->end_ip);
|
||||
EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
|
||||
EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod);
|
||||
}
|
||||
|
||||
TEST_F(cmod_propagation_test, add_cmp_different_dst_writemask)
|
||||
{
|
||||
const vec4_builder bld = vec4_builder(v).at_end();
|
||||
dst_reg dest = dst_reg(v, glsl_type::float_type);
|
||||
src_reg src0 = src_reg(v, glsl_type::vec4_type);
|
||||
src_reg src1 = src_reg(v, glsl_type::vec4_type);
|
||||
dst_reg dest_null = bld.null_reg_f();
|
||||
|
||||
bld.ADD(dest, src0, src1);
|
||||
vec4_instruction *inst = bld.CMP(dest_null, src0, src1, BRW_CONDITIONAL_GE);
|
||||
inst->src[1].negate = true;
|
||||
|
||||
/* = Before =
|
||||
*
|
||||
* 0: add dest.x src0 src1
|
||||
* 1: cmp.ge.f0 null.xyzw src0 -src1
|
||||
*
|
||||
* = After =
|
||||
* (no changes)
|
||||
*/
|
||||
|
||||
v->calculate_cfg();
|
||||
bblock_t *block0 = v->cfg->blocks[0];
|
||||
|
||||
EXPECT_EQ(0, block0->start_ip);
|
||||
EXPECT_EQ(1, block0->end_ip);
|
||||
|
||||
EXPECT_FALSE(cmod_propagation(v));
|
||||
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(1, block0->end_ip);
|
||||
EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
|
||||
EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
|
||||
EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
|
||||
EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue