diff --git a/src/intel/compiler/brw/brw_schedule_instructions.cpp b/src/intel/compiler/brw/brw_schedule_instructions.cpp index a2a837eaea5..f856eaa0995 100644 --- a/src/intel/compiler/brw/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw/brw_schedule_instructions.cpp @@ -571,6 +571,7 @@ public: int block_count, bool post_reg_alloc, bool need_latencies); void add_barrier_deps(schedule_node *n); + void add_memory_deps(schedule_node *n); void add_cross_lane_deps(schedule_node *n); void add_dep(schedule_node *before, schedule_node *after, int latency); void add_dep(schedule_node *before, schedule_node *after); @@ -1080,8 +1081,27 @@ static bool is_scheduling_barrier(const brw_inst *inst) { return inst->opcode == SHADER_OPCODE_HALT_TARGET || + inst->opcode == SHADER_OPCODE_RND_MODE || + inst->opcode == SHADER_OPCODE_FLOAT_CONTROL_MODE || (inst->is_control_flow() && inst->opcode != BRW_OPCODE_HALT) || - inst->has_side_effects(); + inst->eot; +} + +static bool +has_memory_side_effects(const brw_inst *inst) +{ + assert(inst->opcode != SHADER_OPCODE_LSC_SPILL); + return inst->opcode == BRW_OPCODE_SYNC || + inst->opcode == SHADER_OPCODE_BARRIER || + inst->opcode == FS_OPCODE_SCHEDULING_FENCE || + (inst->is_send() && inst->as_send()->has_side_effects); +} + +static bool +is_memory_volatile(const brw_inst *inst) +{ + return has_memory_side_effects(inst) || + (inst->is_send() && inst->as_send()->is_volatile); } static bool @@ -1186,6 +1206,30 @@ brw_instruction_scheduler::add_barrier_deps(schedule_node *n) } } +void +brw_instruction_scheduler::add_memory_deps(schedule_node *n) +{ + for (schedule_node *prev = n - 1; prev >= current.start; prev--) { + if (has_memory_side_effects(prev->inst)) { + add_dep(prev, n, 0); + break; + } + if (is_memory_volatile(prev->inst)) { + add_dep(prev, n, 0); + } + } + + for (schedule_node *next = n + 1; next < current.end; next++) { + if (has_memory_side_effects(next->inst)) { + add_dep(n, next, 0); + break; + } + if (is_memory_volatile(next->inst)) { + add_dep(n, next, 0); + } + } +} + /** * Because some instructions like HALT can disable lanes, scheduling prior to * a cross lane access should not be allowed, otherwise we could end up with @@ -1338,6 +1382,9 @@ brw_instruction_scheduler::calculate_deps() if (is_scheduling_barrier(inst)) add_barrier_deps(n); + if (has_memory_side_effects(inst)) + add_memory_deps(n); + if (inst->opcode == BRW_OPCODE_HALT || inst->opcode == SHADER_OPCODE_HALT_TARGET) add_cross_lane_deps(n);