diff --git a/src/intel/compiler/brw_ir_performance.cpp b/src/intel/compiler/brw_ir_performance.cpp index 182779657d1..43555bd40a9 100644 --- a/src/intel/compiler/brw_ir_performance.cpp +++ b/src/intel/compiler/brw_ir_performance.cpp @@ -1493,7 +1493,7 @@ namespace { st, reg_dependency_id(devinfo, brw_acc_reg(8), j)); } - if (inst->writes_flag()) + if (inst->writes_flag(devinfo)) stall_on_dependency(st, dependency_id_flag0); } @@ -1531,7 +1531,7 @@ namespace { reg_dependency_id(devinfo, brw_acc_reg(8), j)); } - if (inst->writes_flag()) + if (inst->writes_flag(devinfo)) mark_write_dependency(st, perf, dependency_id_flag0); } diff --git a/src/intel/compiler/brw_ir_vec4.h b/src/intel/compiler/brw_ir_vec4.h index 6ee6bf922d1..ec0952b4c04 100644 --- a/src/intel/compiler/brw_ir_vec4.h +++ b/src/intel/compiler/brw_ir_vec4.h @@ -330,9 +330,9 @@ public: } } - bool writes_flag() const + bool writes_flag(const intel_device_info *devinfo) const { - return (conditional_mod && (opcode != BRW_OPCODE_SEL && + return (conditional_mod && ((opcode != BRW_OPCODE_SEL || devinfo->ver <= 5) && opcode != BRW_OPCODE_CSEL && opcode != BRW_OPCODE_IF && opcode != BRW_OPCODE_WHILE)); diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index 7c6d459e00d..ae24bff2271 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -1489,7 +1489,7 @@ vec4_instruction_scheduler::calculate_deps() } } - if (inst->writes_flag()) { + if (inst->writes_flag(v->devinfo)) { add_dep(last_conditional_mod, n, 0); last_conditional_mod = n; } @@ -1565,7 +1565,7 @@ vec4_instruction_scheduler::calculate_deps() } } - if (inst->writes_flag()) { + if (inst->writes_flag(v->devinfo)) { last_conditional_mod = n; } diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp index 2053be3579d..347500a36f7 100644 --- a/src/intel/compiler/brw_vec4.cpp +++ b/src/intel/compiler/brw_vec4.cpp @@ -1141,7 +1141,7 @@ vec4_instruction::can_reswizzle(const struct intel_device_info *devinfo, /* If we write to the flag register changing the swizzle would change * what channels are written to the flag register. */ - if (writes_flag()) + if (writes_flag(devinfo)) return false; /* We can't swizzle implicit accumulator access. We'd have to diff --git a/src/intel/compiler/brw_vec4_cmod_propagation.cpp b/src/intel/compiler/brw_vec4_cmod_propagation.cpp index be0f957aeba..acea9e3ced3 100644 --- a/src/intel/compiler/brw_vec4_cmod_propagation.cpp +++ b/src/intel/compiler/brw_vec4_cmod_propagation.cpp @@ -278,7 +278,7 @@ opt_cmod_propagation_local(bblock_t *block, vec4_visitor *v) */ if (inst->conditional_mod == BRW_CONDITIONAL_NZ && !inst->src[0].negate && - scan_inst->writes_flag()) { + scan_inst->writes_flag(v->devinfo)) { inst->remove(block); progress = true; break; @@ -337,7 +337,7 @@ opt_cmod_propagation_local(bblock_t *block, vec4_visitor *v) } not_match: - if (scan_inst->writes_flag()) + if (scan_inst->writes_flag(v->devinfo)) break; read_flag = read_flag || scan_inst->reads_flag(); diff --git a/src/intel/compiler/brw_vec4_cse.cpp b/src/intel/compiler/brw_vec4_cse.cpp index accc037af8d..45be33721dd 100644 --- a/src/intel/compiler/brw_vec4_cse.cpp +++ b/src/intel/compiler/brw_vec4_cse.cpp @@ -261,9 +261,9 @@ vec4_visitor::opt_cse_local(bblock_t *block, const vec4_live_variables &live) /* Kill all AEB entries that write a different value to or read from * the flag register if we just wrote it. */ - if (inst->writes_flag()) { + if (inst->writes_flag(devinfo)) { if (entry->generator->reads_flag() || - (entry->generator->writes_flag() && + (entry->generator->writes_flag(devinfo) && !instructions_match(inst, entry->generator))) { entry->remove(); ralloc_free(entry); diff --git a/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp b/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp index 39fd3f513ff..10a64a56143 100644 --- a/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp +++ b/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp @@ -54,7 +54,7 @@ vec4_visitor::dead_code_eliminate() foreach_inst_in_block_reverse_safe(vec4_instruction, inst, block) { if ((inst->dst.file == VGRF && !inst->has_side_effects()) || - (inst->dst.is_null() && inst->writes_flag())){ + (inst->dst.is_null() && inst->writes_flag(devinfo))){ bool result_live[4] = { false }; if (inst->dst.file == VGRF) { for (unsigned i = 0; i < DIV_ROUND_UP(inst->size_written, 16); i++) { @@ -80,7 +80,7 @@ vec4_visitor::dead_code_eliminate() result_live[3] = result; } - if (inst->writes_flag()) { + if (inst->writes_flag(devinfo)) { /* Independently calculate the usage of the flag components and * the destination value components. */ @@ -126,7 +126,7 @@ vec4_visitor::dead_code_eliminate() } } - if (inst->dst.is_null() && inst->writes_flag()) { + if (inst->dst.is_null() && inst->writes_flag(devinfo)) { bool combined_live = false; for (unsigned c = 0; c < 4; c++) combined_live |= BITSET_TEST(flag_live, c); @@ -149,7 +149,7 @@ vec4_visitor::dead_code_eliminate() } } - if (inst->writes_flag() && !inst->predicate && inst->exec_size == 8) { + if (inst->writes_flag(devinfo) && !inst->predicate && inst->exec_size == 8) { for (unsigned c = 0; c < 4; c++) BITSET_CLEAR(flag_live, c); } diff --git a/src/intel/compiler/brw_vec4_live_variables.cpp b/src/intel/compiler/brw_vec4_live_variables.cpp index cc3f0a5a3c4..88fa179d0f5 100644 --- a/src/intel/compiler/brw_vec4_live_variables.cpp +++ b/src/intel/compiler/brw_vec4_live_variables.cpp @@ -119,7 +119,7 @@ vec4_live_variables::setup_def_use() } } } - if (inst->writes_flag()) { + if (inst->writes_flag(devinfo)) { for (unsigned c = 0; c < 4; c++) { if ((inst->dst.writemask & (1 << c)) && !BITSET_TEST(bd->flag_use, c)) { @@ -229,6 +229,8 @@ vec4_live_variables::vec4_live_variables(const backend_shader *s) end[i] = -1; } + devinfo = s->compiler->devinfo; + block_data = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks); bitset_words = BITSET_WORDS(num_vars); diff --git a/src/intel/compiler/brw_vec4_live_variables.h b/src/intel/compiler/brw_vec4_live_variables.h index 06281432955..39d97c8a521 100644 --- a/src/intel/compiler/brw_vec4_live_variables.h +++ b/src/intel/compiler/brw_vec4_live_variables.h @@ -81,6 +81,8 @@ public: int num_vars; int bitset_words; + const struct intel_device_info *devinfo; + /** Per-basic-block information on live variables */ struct block_data *block_data; diff --git a/src/intel/compiler/test_vec4_cmod_propagation.cpp b/src/intel/compiler/test_vec4_cmod_propagation.cpp index 70539d4b875..21cee5d64ac 100644 --- a/src/intel/compiler/test_vec4_cmod_propagation.cpp +++ b/src/intel/compiler/test_vec4_cmod_propagation.cpp @@ -109,7 +109,7 @@ void cmod_propagation_test::SetUp() v = new cmod_propagation_vec4_visitor(compiler, ctx, shader, prog_data); - devinfo->ver = 4; + devinfo->ver = 7; devinfo->verx10 = devinfo->ver * 10; } @@ -905,3 +905,149 @@ TEST_F(cmod_propagation_test, add_cmp_different_dst_writemask) EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod); } + +TEST_F(cmod_propagation_test, prop_across_sel_gfx7) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest1 = dst_reg(v, glsl_type::float_type); + dst_reg dest2 = dst_reg(v, glsl_type::float_type); + src_reg src0 = src_reg(v, glsl_type::float_type); + src_reg src1 = src_reg(v, glsl_type::float_type); + src_reg src2 = src_reg(v, glsl_type::float_type); + src_reg src3 = src_reg(v, glsl_type::float_type); + src_reg zero(brw_imm_f(0.0f)); + dst_reg dest_null = bld.null_reg_f(); + dest_null.writemask = WRITEMASK_X; + + bld.ADD(dest1, src0, src1); + bld.SEL(dest2, src2, src3) + ->conditional_mod = BRW_CONDITIONAL_GE; + bld.CMP(dest_null, src_reg(dest1), zero, BRW_CONDITIONAL_GE); + + /* = Before = + * + * 0: add dest1.x src0.xxxx src1.xxxx + * 1: sel.ge.f0 dest2.x src2.xxxx src3.xxxx + * 2: cmp.ge.f0 null.x dest.xxxx 0.0f + * + * = After = + * 0: add.ge.f0 dest.x src0.xxxx src1.xxxx + * 1: sel.ge.f0 dest2.x src2.xxxx src3.xxxx + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod); +} + +TEST_F(cmod_propagation_test, prop_across_sel_gfx5) +{ + devinfo->ver = 5; + devinfo->verx10 = devinfo->ver * 10; + + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest1 = dst_reg(v, glsl_type::float_type); + dst_reg dest2 = dst_reg(v, glsl_type::float_type); + src_reg src0 = src_reg(v, glsl_type::float_type); + src_reg src1 = src_reg(v, glsl_type::float_type); + src_reg src2 = src_reg(v, glsl_type::float_type); + src_reg src3 = src_reg(v, glsl_type::float_type); + src_reg zero(brw_imm_f(0.0f)); + dst_reg dest_null = bld.null_reg_f(); + dest_null.writemask = WRITEMASK_X; + + bld.ADD(dest1, src0, src1); + bld.SEL(dest2, src2, src3) + ->conditional_mod = BRW_CONDITIONAL_GE; + bld.CMP(dest_null, src_reg(dest1), zero, BRW_CONDITIONAL_GE); + + /* = Before = + * + * 0: add dest1.x src0.xxxx src1.xxxx + * 1: sel.ge.f0 dest2.x src2.xxxx src3.xxxx + * 2: cmp.ge.f0 null.x dest.xxxx 0.0f + * + * = After = + * (no changes) + * + * On Gfx4 and Gfx5, sel.l (for min) and sel.ge (for max) are implemented + * using a separate cmpn and sel instruction. This lowering occurs in + * fs_vistor::lower_minmax which is called a long time after the first + * calls to cmod_propagation. + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(2, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 2)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 2)->conditional_mod); +} + +TEST_F(cmod_propagation_test, prop_into_sel_gfx5) +{ + devinfo->ver = 5; + devinfo->verx10 = devinfo->ver * 10; + + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::float_type); + src_reg src0 = src_reg(v, glsl_type::float_type); + src_reg src1 = src_reg(v, glsl_type::float_type); + src_reg zero(brw_imm_f(0.0f)); + dst_reg dest_null = bld.null_reg_f(); + dest_null.writemask = WRITEMASK_X; + + bld.SEL(dest, src0, src1) + ->conditional_mod = BRW_CONDITIONAL_GE; + bld.CMP(dest_null, src_reg(dest), zero, BRW_CONDITIONAL_GE); + + /* = Before = + * + * 0: sel.ge.f0 dest.x src2.xxxx src3.xxxx + * 1: cmp.ge.f0 null.x dest.xxxx 0.0f + * + * = After = + * (no changes) + * + * Do not copy propagate into a sel.cond instruction. While it does modify + * the flags, the flags are not based on the result compared with zero (as + * with most other instructions). The result is based on the sources + * compared with each other (like cmp.cond). + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod); +}