mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 02:38:04 +02:00
Merge branch 'review/brw-scheduler-fence-improvements' into 'main'
brw: Allow instruction reordering around memory writes See merge request mesa/mesa!41008
This commit is contained in:
commit
b532eda0c0
5 changed files with 60 additions and 5 deletions
|
|
@ -669,6 +669,10 @@ enum memory_flags {
|
|||
* fusion (Gfx12.x only).
|
||||
*/
|
||||
MEMORY_FLAG_FUSED_EU_DISABLE = 1 << 4,
|
||||
/** Whether this memory load can be arbitrarily reordered or CSE'd
|
||||
* with other loads.
|
||||
*/
|
||||
MEMORY_FLAG_CAN_REORDER = 1 << 5,
|
||||
};
|
||||
|
||||
enum rt_logical_srcs {
|
||||
|
|
|
|||
|
|
@ -5930,13 +5930,15 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
|
|||
(nir_intrinsic_access(instr) & ACCESS_COHERENT);
|
||||
const bool fused_eu_disable = nir_intrinsic_has_access(instr) &&
|
||||
(nir_intrinsic_access(instr) & ACCESS_FUSED_EU_DISABLE_INTEL);
|
||||
const bool can_reorder = nir_intrinsic_can_reorder(instr);
|
||||
const unsigned alignment =
|
||||
nir_intrinsic_has_align(instr) ? nir_intrinsic_align(instr) : 0;
|
||||
uint8_t flags =
|
||||
(include_helpers ? MEMORY_FLAG_INCLUDE_HELPERS : 0) |
|
||||
(volatile_access ? MEMORY_FLAG_VOLATILE_ACCESS : 0) |
|
||||
(coherent_access ? MEMORY_FLAG_COHERENT_ACCESS : 0) |
|
||||
(fused_eu_disable ? MEMORY_FLAG_FUSED_EU_DISABLE : 0);
|
||||
(fused_eu_disable ? MEMORY_FLAG_FUSED_EU_DISABLE : 0) |
|
||||
(can_reorder ? MEMORY_FLAG_CAN_REORDER : 0);
|
||||
bool no_mask_handle = false;
|
||||
|
||||
uint8_t coord_components = 1;
|
||||
|
|
|
|||
|
|
@ -1192,6 +1192,7 @@ lower_lsc_memory_logical_send(const brw_builder &bld, brw_mem_inst *mem)
|
|||
const bool coherent_access = mem->flags & MEMORY_FLAG_COHERENT_ACCESS;
|
||||
const bool has_side_effects = mem->has_side_effects();
|
||||
const bool fused_eu_disable = mem->flags & MEMORY_FLAG_FUSED_EU_DISABLE;
|
||||
const bool can_reorder = mem->flags & MEMORY_FLAG_CAN_REORDER;
|
||||
|
||||
const uint32_t data_size_B = lsc_data_size_bytes(data_size);
|
||||
const enum brw_reg_type data_type =
|
||||
|
|
@ -1344,7 +1345,7 @@ lower_lsc_memory_logical_send(const brw_builder &bld, brw_mem_inst *mem)
|
|||
send->ex_mlen = ex_mlen;
|
||||
send->header_size = 0;
|
||||
send->has_side_effects = has_side_effects;
|
||||
send->is_volatile = !has_side_effects || volatile_access;
|
||||
send->is_volatile = (!has_side_effects && !can_reorder) || volatile_access;
|
||||
send->fused_eu_disable = fused_eu_disable;
|
||||
|
||||
/* Finally, the payload */
|
||||
|
|
@ -1405,6 +1406,7 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_mem_inst *mem)
|
|||
const bool volatile_access = mem->flags & MEMORY_FLAG_VOLATILE_ACCESS;
|
||||
const bool fused_eu_disable = mem->flags & MEMORY_FLAG_FUSED_EU_DISABLE;
|
||||
const bool has_side_effects = mem->has_side_effects();
|
||||
const bool can_reorder = mem->flags & MEMORY_FLAG_CAN_REORDER;
|
||||
const bool has_dest = mem->dst.file != BAD_FILE && !mem->dst.is_null();
|
||||
assert(mem->address_offset == 0);
|
||||
|
||||
|
|
@ -1610,7 +1612,7 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_mem_inst *mem)
|
|||
send->ex_mlen = ex_mlen;
|
||||
send->header_size = header.file != BAD_FILE ? 1 : 0;
|
||||
send->has_side_effects = has_side_effects;
|
||||
send->is_volatile = !has_side_effects || volatile_access;
|
||||
send->is_volatile = (!has_side_effects && !can_reorder) || volatile_access;
|
||||
send->fused_eu_disable = fused_eu_disable;
|
||||
|
||||
if (block) {
|
||||
|
|
|
|||
|
|
@ -94,7 +94,7 @@ is_expression(const brw_shader *v, const brw_inst *const inst)
|
|||
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
|
||||
return true;
|
||||
case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
|
||||
return inst->as_mem()->mode == MEMORY_MODE_CONSTANT;
|
||||
return inst->as_mem()->flags & MEMORY_FLAG_CAN_REORDER;
|
||||
case SHADER_OPCODE_LOAD_PAYLOAD:
|
||||
return !is_coalescing_payload(*v, inst);
|
||||
case SHADER_OPCODE_SEND:
|
||||
|
|
|
|||
|
|
@ -571,6 +571,7 @@ public:
|
|||
int block_count, bool post_reg_alloc, bool need_latencies);
|
||||
|
||||
void add_barrier_deps(schedule_node *n);
|
||||
void add_memory_deps(schedule_node *n);
|
||||
void add_cross_lane_deps(schedule_node *n);
|
||||
void add_dep(schedule_node *before, schedule_node *after, int latency);
|
||||
void add_dep(schedule_node *before, schedule_node *after);
|
||||
|
|
@ -1080,8 +1081,27 @@ static bool
|
|||
is_scheduling_barrier(const brw_inst *inst)
|
||||
{
|
||||
return inst->opcode == SHADER_OPCODE_HALT_TARGET ||
|
||||
inst->opcode == SHADER_OPCODE_RND_MODE ||
|
||||
inst->opcode == SHADER_OPCODE_FLOAT_CONTROL_MODE ||
|
||||
(inst->is_control_flow() && inst->opcode != BRW_OPCODE_HALT) ||
|
||||
inst->has_side_effects();
|
||||
inst->eot;
|
||||
}
|
||||
|
||||
static bool
|
||||
has_memory_side_effects(const brw_inst *inst)
|
||||
{
|
||||
assert(inst->opcode != SHADER_OPCODE_LSC_SPILL);
|
||||
return inst->opcode == BRW_OPCODE_SYNC ||
|
||||
inst->opcode == SHADER_OPCODE_BARRIER ||
|
||||
inst->opcode == FS_OPCODE_SCHEDULING_FENCE ||
|
||||
(inst->is_send() && inst->as_send()->has_side_effects);
|
||||
}
|
||||
|
||||
static bool
|
||||
is_memory_volatile(const brw_inst *inst)
|
||||
{
|
||||
return has_memory_side_effects(inst) ||
|
||||
(inst->is_send() && inst->as_send()->is_volatile);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -1186,6 +1206,30 @@ brw_instruction_scheduler::add_barrier_deps(schedule_node *n)
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
brw_instruction_scheduler::add_memory_deps(schedule_node *n)
|
||||
{
|
||||
for (schedule_node *prev = n - 1; prev >= current.start; prev--) {
|
||||
if (has_memory_side_effects(prev->inst)) {
|
||||
add_dep(prev, n, 0);
|
||||
break;
|
||||
}
|
||||
if (is_memory_volatile(prev->inst)) {
|
||||
add_dep(prev, n, 0);
|
||||
}
|
||||
}
|
||||
|
||||
for (schedule_node *next = n + 1; next < current.end; next++) {
|
||||
if (has_memory_side_effects(next->inst)) {
|
||||
add_dep(n, next, 0);
|
||||
break;
|
||||
}
|
||||
if (is_memory_volatile(next->inst)) {
|
||||
add_dep(n, next, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Because some instructions like HALT can disable lanes, scheduling prior to
|
||||
* a cross lane access should not be allowed, otherwise we could end up with
|
||||
|
|
@ -1338,6 +1382,9 @@ brw_instruction_scheduler::calculate_deps()
|
|||
if (is_scheduling_barrier(inst))
|
||||
add_barrier_deps(n);
|
||||
|
||||
if (has_memory_side_effects(inst))
|
||||
add_memory_deps(n);
|
||||
|
||||
if (inst->opcode == BRW_OPCODE_HALT ||
|
||||
inst->opcode == SHADER_OPCODE_HALT_TARGET)
|
||||
add_cross_lane_deps(n);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue