brw: Add support for ACCESS_CAN_REORDER memory ordering

Passes the ACCESS_CAN_REORDER flag from NIR on to the backend so that we
can lower the loads to a non-volatile SEND. This allows the scheduler to
freely reorder them around stores or fences.

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
This commit is contained in:
Calder Young 2026-04-15 12:39:38 -07:00
parent 30c7f5820f
commit bbfc986573
4 changed files with 12 additions and 4 deletions

View file

@ -669,6 +669,10 @@ enum memory_flags {
* fusion (Gfx12.x only).
*/
MEMORY_FLAG_FUSED_EU_DISABLE = 1 << 4,
/** Whether this memory load can be arbitrarily reordered or CSE'd
* with other loads.
*/
MEMORY_FLAG_CAN_REORDER = 1 << 5,
};
enum rt_logical_srcs {

View file

@ -5936,13 +5936,15 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
(nir_intrinsic_access(instr) & ACCESS_COHERENT);
const bool fused_eu_disable = nir_intrinsic_has_access(instr) &&
(nir_intrinsic_access(instr) & ACCESS_FUSED_EU_DISABLE_INTEL);
const bool can_reorder = nir_intrinsic_can_reorder(instr);
const unsigned alignment =
nir_intrinsic_has_align(instr) ? nir_intrinsic_align(instr) : 0;
uint8_t flags =
(include_helpers ? MEMORY_FLAG_INCLUDE_HELPERS : 0) |
(volatile_access ? MEMORY_FLAG_VOLATILE_ACCESS : 0) |
(coherent_access ? MEMORY_FLAG_COHERENT_ACCESS : 0) |
(fused_eu_disable ? MEMORY_FLAG_FUSED_EU_DISABLE : 0);
(fused_eu_disable ? MEMORY_FLAG_FUSED_EU_DISABLE : 0) |
(can_reorder ? MEMORY_FLAG_CAN_REORDER : 0);
bool no_mask_handle = false;
uint8_t coord_components = 1;

View file

@ -1192,6 +1192,7 @@ lower_lsc_memory_logical_send(const brw_builder &bld, brw_mem_inst *mem)
const bool coherent_access = mem->flags & MEMORY_FLAG_COHERENT_ACCESS;
const bool has_side_effects = mem->has_side_effects();
const bool fused_eu_disable = mem->flags & MEMORY_FLAG_FUSED_EU_DISABLE;
const bool can_reorder = mem->flags & MEMORY_FLAG_CAN_REORDER;
const uint32_t data_size_B = lsc_data_size_bytes(data_size);
const enum brw_reg_type data_type =
@ -1344,7 +1345,7 @@ lower_lsc_memory_logical_send(const brw_builder &bld, brw_mem_inst *mem)
send->ex_mlen = ex_mlen;
send->header_size = 0;
send->has_side_effects = has_side_effects;
send->is_volatile = !has_side_effects || volatile_access;
send->is_volatile = (!has_side_effects && !can_reorder) || volatile_access;
send->fused_eu_disable = fused_eu_disable;
/* Finally, the payload */
@ -1405,6 +1406,7 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_mem_inst *mem)
const bool volatile_access = mem->flags & MEMORY_FLAG_VOLATILE_ACCESS;
const bool fused_eu_disable = mem->flags & MEMORY_FLAG_FUSED_EU_DISABLE;
const bool has_side_effects = mem->has_side_effects();
const bool can_reorder = mem->flags & MEMORY_FLAG_CAN_REORDER;
const bool has_dest = mem->dst.file != BAD_FILE && !mem->dst.is_null();
assert(mem->address_offset == 0);
@ -1610,7 +1612,7 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_mem_inst *mem)
send->ex_mlen = ex_mlen;
send->header_size = header.file != BAD_FILE ? 1 : 0;
send->has_side_effects = has_side_effects;
send->is_volatile = !has_side_effects || volatile_access;
send->is_volatile = (!has_side_effects && !can_reorder) || volatile_access;
send->fused_eu_disable = fused_eu_disable;
if (block) {

View file

@ -94,7 +94,7 @@ is_expression(const brw_shader *v, const brw_inst *const inst)
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
return true;
case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
return inst->as_mem()->mode == MEMORY_MODE_CONSTANT;
return inst->as_mem()->flags & MEMORY_FLAG_CAN_REORDER;
case SHADER_OPCODE_LOAD_PAYLOAD:
return !is_coalescing_payload(*v, inst);
case SHADER_OPCODE_SEND: