mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 16:00:08 +01:00
brw: serialize messages on Gfx12.x if required
The Intel EU fusion feature needs to be disabled on SEND messages where either the texture handle, sampler handle, sampler header is not identical on fused threads. This is the case in particular with accesses on non-uniform texture/sampler handles but could also strike with dynamic programmable offsets (currently disabled). Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Alyssa Anne Rosenzweig <alyssa.rosenzweig@intel.com> Reviewed-by: Francisco Jerez <currojerez@riseup.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37394>
This commit is contained in:
parent
301b71a19f
commit
37a9c5411f
10 changed files with 175 additions and 9 deletions
|
|
@ -206,6 +206,11 @@ static const char *const branch_ctrl[2] = {
|
||||||
[1] = "BranchCtrl"
|
[1] = "BranchCtrl"
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const char *const fusion_ctrl[2] = {
|
||||||
|
[0] = "",
|
||||||
|
[1] = "FusionCtrl"
|
||||||
|
};
|
||||||
|
|
||||||
static const char *const wectrl[2] = {
|
static const char *const wectrl[2] = {
|
||||||
[0] = "",
|
[0] = "",
|
||||||
[1] = "WE_all"
|
[1] = "WE_all"
|
||||||
|
|
@ -2619,6 +2624,12 @@ brw_disassemble_inst(FILE *file, const struct brw_isa_info *isa,
|
||||||
err |= control(file, "acc write control", accwr,
|
err |= control(file, "acc write control", accwr,
|
||||||
brw_eu_inst_acc_wr_control(devinfo, inst), &space);
|
brw_eu_inst_acc_wr_control(devinfo, inst), &space);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (devinfo->ver == 12 && is_send(opcode)) {
|
||||||
|
err |= control(file, "fusion ctrl", fusion_ctrl,
|
||||||
|
brw_eu_inst_fusion_ctrl(devinfo, inst), &space);
|
||||||
|
}
|
||||||
|
|
||||||
if (is_send(opcode))
|
if (is_send(opcode))
|
||||||
err |= control(file, "end of thread", end_of_thread,
|
err |= control(file, "end of thread", end_of_thread,
|
||||||
brw_eu_inst_eot(devinfo, inst), &space);
|
brw_eu_inst_eot(devinfo, inst), &space);
|
||||||
|
|
|
||||||
|
|
@ -712,6 +712,10 @@ enum memory_flags {
|
||||||
MEMORY_FLAG_VOLATILE_ACCESS = 1 << 2,
|
MEMORY_FLAG_VOLATILE_ACCESS = 1 << 2,
|
||||||
/** Whether memory access is marked coherent by GLSL/SPIR-V. */
|
/** Whether memory access is marked coherent by GLSL/SPIR-V. */
|
||||||
MEMORY_FLAG_COHERENT_ACCESS = 1 << 3,
|
MEMORY_FLAG_COHERENT_ACCESS = 1 << 3,
|
||||||
|
/** Whether this instruction should run serialized with regard to EU
|
||||||
|
* fusion (Gfx12.x only).
|
||||||
|
*/
|
||||||
|
MEMORY_FLAG_FUSED_EU_DISABLE = 1 << 4,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum rt_logical_srcs {
|
enum rt_logical_srcs {
|
||||||
|
|
|
||||||
|
|
@ -897,6 +897,7 @@ brw_eu_inst_sends_ex_desc(const struct intel_device_info *devinfo,
|
||||||
* @{
|
* @{
|
||||||
*/
|
*/
|
||||||
F(eot, /* 9+ */ 127, 127, /* 12+ */ 34, 34)
|
F(eot, /* 9+ */ 127, 127, /* 12+ */ 34, 34)
|
||||||
|
F(fusion_ctrl, /* 9+ */ -1, -1, /* 12+ */ 33, 33)
|
||||||
F(mlen, /* 9+ */ 124, 121, /* 12+ */ MD12(28), MD12(25))
|
F(mlen, /* 9+ */ 124, 121, /* 12+ */ MD12(28), MD12(25))
|
||||||
F(rlen, /* 9+ */ 120, 116, /* 12+ */ MD12(24), MD12(20))
|
F(rlen, /* 9+ */ 120, 116, /* 12+ */ MD12(24), MD12(20))
|
||||||
F(header_present, /* 9+ */ 115, 115, /* 12+ */ MD12(19), MD12(19))
|
F(header_present, /* 9+ */ 115, 115, /* 12+ */ MD12(19), MD12(19))
|
||||||
|
|
|
||||||
|
|
@ -6471,6 +6471,8 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
||||||
brw_inst *inst = ubld.emit(SHADER_OPCODE_GET_BUFFER_SIZE, ret_payload,
|
brw_inst *inst = ubld.emit(SHADER_OPCODE_GET_BUFFER_SIZE, ret_payload,
|
||||||
srcs, GET_BUFFER_SIZE_SRCS);
|
srcs, GET_BUFFER_SIZE_SRCS);
|
||||||
inst->size_written = 4 * REG_SIZE * reg_unit(devinfo);
|
inst->size_written = 4 * REG_SIZE * reg_unit(devinfo);
|
||||||
|
inst->fused_eu_disable =
|
||||||
|
(nir_intrinsic_access(instr) & ACCESS_FUSED_EU_DISABLE_INTEL) != 0;
|
||||||
|
|
||||||
/* SKL PRM, vol07, 3D Media GPGPU Engine, Bounds Checking and Faulting:
|
/* SKL PRM, vol07, 3D Media GPGPU Engine, Bounds Checking and Faulting:
|
||||||
*
|
*
|
||||||
|
|
@ -7016,12 +7018,15 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
|
||||||
(nir_intrinsic_access(instr) & ACCESS_VOLATILE);
|
(nir_intrinsic_access(instr) & ACCESS_VOLATILE);
|
||||||
const bool coherent_access = nir_intrinsic_has_access(instr) &&
|
const bool coherent_access = nir_intrinsic_has_access(instr) &&
|
||||||
(nir_intrinsic_access(instr) & ACCESS_COHERENT);
|
(nir_intrinsic_access(instr) & ACCESS_COHERENT);
|
||||||
|
const bool fused_eu_disable = nir_intrinsic_has_access(instr) &&
|
||||||
|
(nir_intrinsic_access(instr) & ACCESS_FUSED_EU_DISABLE_INTEL);
|
||||||
const unsigned align =
|
const unsigned align =
|
||||||
nir_intrinsic_has_align(instr) ? nir_intrinsic_align(instr) : 0;
|
nir_intrinsic_has_align(instr) ? nir_intrinsic_align(instr) : 0;
|
||||||
uint8_t flags =
|
uint8_t flags =
|
||||||
(include_helpers ? MEMORY_FLAG_INCLUDE_HELPERS : 0) |
|
(include_helpers ? MEMORY_FLAG_INCLUDE_HELPERS : 0) |
|
||||||
(volatile_access ? MEMORY_FLAG_VOLATILE_ACCESS : 0) |
|
(volatile_access ? MEMORY_FLAG_VOLATILE_ACCESS : 0) |
|
||||||
(coherent_access ? MEMORY_FLAG_COHERENT_ACCESS : 0);
|
(coherent_access ? MEMORY_FLAG_COHERENT_ACCESS : 0) |
|
||||||
|
(fused_eu_disable ? MEMORY_FLAG_FUSED_EU_DISABLE : 0);
|
||||||
bool no_mask_handle = false;
|
bool no_mask_handle = false;
|
||||||
int data_src = -1;
|
int data_src = -1;
|
||||||
|
|
||||||
|
|
@ -7661,6 +7666,7 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb,
|
||||||
tex->residency = instr->is_sparse;
|
tex->residency = instr->is_sparse;
|
||||||
tex->coord_components = instr->coord_components;
|
tex->coord_components = instr->coord_components;
|
||||||
tex->grad_components = lod_components;
|
tex->grad_components = lod_components;
|
||||||
|
tex->fused_eu_disable = (instr->backend_flags & BRW_TEX_INSTR_FUSED_EU_DISABLE) != 0;
|
||||||
|
|
||||||
/* Wa_14012688258:
|
/* Wa_14012688258:
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -198,6 +198,10 @@ brw_generator::generate_send(brw_send_inst *inst,
|
||||||
brw_eu_inst_set_opcode(p->isa, brw_last_inst,
|
brw_eu_inst_set_opcode(p->isa, brw_last_inst,
|
||||||
devinfo->ver >= 12 ? BRW_OPCODE_SENDC : BRW_OPCODE_SENDSC);
|
devinfo->ver >= 12 ? BRW_OPCODE_SENDC : BRW_OPCODE_SENDSC);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Serialize messages if needed */
|
||||||
|
if (devinfo->ver == 12 && inst->fused_eu_disable)
|
||||||
|
brw_eu_inst_set_fusion_ctrl(devinfo, brw_last_inst, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
||||||
|
|
@ -213,7 +213,12 @@ struct brw_inst : brw_exec_node {
|
||||||
*/
|
*/
|
||||||
bool has_no_mask_send_params:1;
|
bool has_no_mask_send_params:1;
|
||||||
|
|
||||||
uint8_t pad:5;
|
/**
|
||||||
|
* Serialize the message (Gfx12.x only)
|
||||||
|
*/
|
||||||
|
bool fused_eu_disable:1;
|
||||||
|
|
||||||
|
uint8_t pad:4;
|
||||||
};
|
};
|
||||||
uint16_t bits;
|
uint16_t bits;
|
||||||
};
|
};
|
||||||
|
|
@ -261,6 +266,11 @@ struct brw_send_inst : brw_inst {
|
||||||
*/
|
*/
|
||||||
bool ex_bso:1;
|
bool ex_bso:1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Serialize the message (Gfx12.x only)
|
||||||
|
*/
|
||||||
|
bool fused_eu_disable:1;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Only for SHADER_OPCODE_SEND, @offset field contains an immediate
|
* Only for SHADER_OPCODE_SEND, @offset field contains an immediate
|
||||||
* part of the extended descriptor that must be encoded in the
|
* part of the extended descriptor that must be encoded in the
|
||||||
|
|
@ -268,7 +278,7 @@ struct brw_send_inst : brw_inst {
|
||||||
*/
|
*/
|
||||||
bool ex_desc_imm:1;
|
bool ex_desc_imm:1;
|
||||||
|
|
||||||
uint8_t pad:3;
|
uint8_t pad:2;
|
||||||
};
|
};
|
||||||
uint8_t send_bits;
|
uint8_t send_bits;
|
||||||
};
|
};
|
||||||
|
|
@ -279,9 +289,28 @@ struct brw_tex_inst : brw_inst {
|
||||||
uint32_t offset;
|
uint32_t offset;
|
||||||
uint8_t coord_components;
|
uint8_t coord_components;
|
||||||
uint8_t grad_components;
|
uint8_t grad_components;
|
||||||
|
union {
|
||||||
|
struct {
|
||||||
|
/**
|
||||||
|
* Whether the instruction requests the residency data (additional register
|
||||||
|
* written).
|
||||||
|
*/
|
||||||
bool residency:1;
|
bool residency:1;
|
||||||
|
/**
|
||||||
|
* Serialize the message (Gfx12.x only)
|
||||||
|
*/
|
||||||
|
bool fused_eu_disable:1;
|
||||||
|
/**
|
||||||
|
* Whether the surface handle is bindless
|
||||||
|
*/
|
||||||
bool surface_bindless:1;
|
bool surface_bindless:1;
|
||||||
|
/**
|
||||||
|
* Whether the sampler handle is bindless
|
||||||
|
*/
|
||||||
bool sampler_bindless:1;
|
bool sampler_bindless:1;
|
||||||
|
};
|
||||||
|
uint8_t bits;
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
struct brw_mem_inst : brw_inst {
|
struct brw_mem_inst : brw_inst {
|
||||||
|
|
|
||||||
|
|
@ -1217,9 +1217,12 @@ lower_sampler_logical_send(const brw_builder &bld, brw_tex_inst *tex)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const bool fused_eu_disable = tex->fused_eu_disable;
|
||||||
|
|
||||||
brw_send_inst *send = brw_transform_inst_to_send(bld, tex);
|
brw_send_inst *send = brw_transform_inst_to_send(bld, tex);
|
||||||
tex = NULL;
|
tex = NULL;
|
||||||
|
|
||||||
|
send->fused_eu_disable = fused_eu_disable;
|
||||||
send->mlen = mlen;
|
send->mlen = mlen;
|
||||||
send->header_size = header_size;
|
send->header_size = header_size;
|
||||||
send->sfid = BRW_SFID_SAMPLER;
|
send->sfid = BRW_SFID_SAMPLER;
|
||||||
|
|
@ -1481,6 +1484,7 @@ lower_lsc_memory_logical_send(const brw_builder &bld, brw_mem_inst *mem)
|
||||||
const bool volatile_access = mem->flags & MEMORY_FLAG_VOLATILE_ACCESS;
|
const bool volatile_access = mem->flags & MEMORY_FLAG_VOLATILE_ACCESS;
|
||||||
const bool coherent_access = mem->flags & MEMORY_FLAG_COHERENT_ACCESS;
|
const bool coherent_access = mem->flags & MEMORY_FLAG_COHERENT_ACCESS;
|
||||||
const bool has_side_effects = mem->has_side_effects();
|
const bool has_side_effects = mem->has_side_effects();
|
||||||
|
const bool fused_eu_disable = mem->flags & MEMORY_FLAG_FUSED_EU_DISABLE;
|
||||||
|
|
||||||
const uint32_t data_size_B = lsc_data_size_bytes(data_size);
|
const uint32_t data_size_B = lsc_data_size_bytes(data_size);
|
||||||
const enum brw_reg_type data_type =
|
const enum brw_reg_type data_type =
|
||||||
|
|
@ -1634,6 +1638,7 @@ lower_lsc_memory_logical_send(const brw_builder &bld, brw_mem_inst *mem)
|
||||||
send->header_size = 0;
|
send->header_size = 0;
|
||||||
send->has_side_effects = has_side_effects;
|
send->has_side_effects = has_side_effects;
|
||||||
send->is_volatile = !has_side_effects || volatile_access;
|
send->is_volatile = !has_side_effects || volatile_access;
|
||||||
|
send->fused_eu_disable = fused_eu_disable;
|
||||||
|
|
||||||
/* Finally, the payload */
|
/* Finally, the payload */
|
||||||
send->src[SEND_SRC_PAYLOAD1] = payload;
|
send->src[SEND_SRC_PAYLOAD1] = payload;
|
||||||
|
|
@ -1692,6 +1697,7 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_mem_inst *mem)
|
||||||
const bool block = mem->flags & MEMORY_FLAG_TRANSPOSE;
|
const bool block = mem->flags & MEMORY_FLAG_TRANSPOSE;
|
||||||
const bool include_helpers = mem->flags & MEMORY_FLAG_INCLUDE_HELPERS;
|
const bool include_helpers = mem->flags & MEMORY_FLAG_INCLUDE_HELPERS;
|
||||||
const bool volatile_access = mem->flags & MEMORY_FLAG_VOLATILE_ACCESS;
|
const bool volatile_access = mem->flags & MEMORY_FLAG_VOLATILE_ACCESS;
|
||||||
|
const bool fused_eu_disable = mem->flags & MEMORY_FLAG_FUSED_EU_DISABLE;
|
||||||
const bool has_side_effects = mem->has_side_effects();
|
const bool has_side_effects = mem->has_side_effects();
|
||||||
const bool has_dest = mem->dst.file != BAD_FILE && !mem->dst.is_null();
|
const bool has_dest = mem->dst.file != BAD_FILE && !mem->dst.is_null();
|
||||||
assert(mem->address_offset == 0);
|
assert(mem->address_offset == 0);
|
||||||
|
|
@ -1903,6 +1909,7 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_mem_inst *mem)
|
||||||
send->header_size = header.file != BAD_FILE ? 1 : 0;
|
send->header_size = header.file != BAD_FILE ? 1 : 0;
|
||||||
send->has_side_effects = has_side_effects;
|
send->has_side_effects = has_side_effects;
|
||||||
send->is_volatile = !has_side_effects || volatile_access;
|
send->is_volatile = !has_side_effects || volatile_access;
|
||||||
|
send->fused_eu_disable = fused_eu_disable;
|
||||||
|
|
||||||
if (block) {
|
if (block) {
|
||||||
assert(send->force_writemask_all);
|
assert(send->force_writemask_all);
|
||||||
|
|
@ -2447,6 +2454,7 @@ lower_get_buffer_size(const brw_builder &bld, brw_inst *inst)
|
||||||
brw_reg surface = inst->src[GET_BUFFER_SIZE_SRC_SURFACE];
|
brw_reg surface = inst->src[GET_BUFFER_SIZE_SRC_SURFACE];
|
||||||
brw_reg surface_handle = inst->src[GET_BUFFER_SIZE_SRC_SURFACE_HANDLE];
|
brw_reg surface_handle = inst->src[GET_BUFFER_SIZE_SRC_SURFACE_HANDLE];
|
||||||
brw_reg lod = bld.move_to_vgrf(inst->src[GET_BUFFER_SIZE_SRC_LOD], 1);
|
brw_reg lod = bld.move_to_vgrf(inst->src[GET_BUFFER_SIZE_SRC_LOD], 1);
|
||||||
|
const bool fused_eu_disable = inst->fused_eu_disable;
|
||||||
|
|
||||||
brw_send_inst *send = brw_transform_inst_to_send(bld, inst);
|
brw_send_inst *send = brw_transform_inst_to_send(bld, inst);
|
||||||
inst = NULL;
|
inst = NULL;
|
||||||
|
|
@ -2468,6 +2476,7 @@ lower_get_buffer_size(const brw_builder &bld, brw_inst *inst)
|
||||||
|
|
||||||
send->dst = retype(send->dst, BRW_TYPE_UW);
|
send->dst = retype(send->dst, BRW_TYPE_UW);
|
||||||
send->sfid = BRW_SFID_SAMPLER;
|
send->sfid = BRW_SFID_SAMPLER;
|
||||||
|
send->fused_eu_disable = fused_eu_disable;
|
||||||
setup_surface_descriptors(bld, send, desc, surface, surface_handle);
|
setup_surface_descriptors(bld, send, desc, surface, surface_handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2065,6 +2065,86 @@ lower_txd_cb(const nir_tex_instr *tex, const void *data)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
flag_fused_eu_disable_instr(nir_builder *b, nir_instr *instr, void *data)
|
||||||
|
{
|
||||||
|
switch (instr->type) {
|
||||||
|
case nir_instr_type_tex: {
|
||||||
|
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < tex->num_srcs; ++i) {
|
||||||
|
nir_tex_src_type src_type = tex->src[i].src_type;
|
||||||
|
|
||||||
|
if (src_type != nir_tex_src_texture_handle &&
|
||||||
|
src_type != nir_tex_src_sampler_handle &&
|
||||||
|
src_type != nir_tex_src_texture_offset &&
|
||||||
|
src_type != nir_tex_src_sampler_offset)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (nir_src_is_divergent(&tex->src[i].src)) {
|
||||||
|
tex->backend_flags |= BRW_TEX_INSTR_FUSED_EU_DISABLE;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
case nir_instr_type_intrinsic: {
|
||||||
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||||
|
/* We only need to care of intrinsics that refers to a structure/descriptor
|
||||||
|
* outside of the EU's registers like RENDER_SURFACE_STATE/SAMPLER_STATE,
|
||||||
|
* because the fusing will pick one thread's descriptor handle and use that
|
||||||
|
* for the 2 fused threads.
|
||||||
|
*
|
||||||
|
* Global pointers don't have that problem since all the access' data is
|
||||||
|
* per lane in the payload of the SEND message (the 64bit pointer).
|
||||||
|
*
|
||||||
|
* URB/shared-memory don't have that problem either because there is no
|
||||||
|
* descriptor information outside the EU, it's just a per lane
|
||||||
|
* handle/offset.
|
||||||
|
*/
|
||||||
|
switch (intrin->intrinsic) {
|
||||||
|
case nir_intrinsic_load_ssbo_uniform_block_intel:
|
||||||
|
case nir_intrinsic_load_ubo_uniform_block_intel:
|
||||||
|
case nir_intrinsic_load_ssbo_block_intel:
|
||||||
|
case nir_intrinsic_load_ssbo_intel:
|
||||||
|
case nir_intrinsic_store_ssbo_intel:
|
||||||
|
case nir_intrinsic_load_ssbo:
|
||||||
|
case nir_intrinsic_store_ssbo:
|
||||||
|
case nir_intrinsic_get_ssbo_size:
|
||||||
|
case nir_intrinsic_load_ubo:
|
||||||
|
case nir_intrinsic_image_load:
|
||||||
|
case nir_intrinsic_image_store:
|
||||||
|
case nir_intrinsic_image_size:
|
||||||
|
case nir_intrinsic_image_levels:
|
||||||
|
case nir_intrinsic_image_atomic:
|
||||||
|
case nir_intrinsic_image_atomic_swap:
|
||||||
|
case nir_intrinsic_bindless_image_load:
|
||||||
|
case nir_intrinsic_bindless_image_store:
|
||||||
|
case nir_intrinsic_bindless_image_size:
|
||||||
|
case nir_intrinsic_bindless_image_levels:
|
||||||
|
case nir_intrinsic_bindless_image_atomic:
|
||||||
|
case nir_intrinsic_bindless_image_atomic_swap: {
|
||||||
|
int src_idx = nir_get_io_index_src_number(intrin);
|
||||||
|
if (nir_src_is_divergent(&intrin->src[src_idx])) {
|
||||||
|
nir_intrinsic_set_access(intrin,
|
||||||
|
nir_intrinsic_access(intrin) |
|
||||||
|
ACCESS_FUSED_EU_DISABLE_INTEL);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Prepare the given shader for codegen
|
/* Prepare the given shader for codegen
|
||||||
*
|
*
|
||||||
* This function is intended to be called right before going into the actual
|
* This function is intended to be called right before going into the actual
|
||||||
|
|
@ -2283,6 +2363,28 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler,
|
||||||
|
|
||||||
OPT(nir_lower_subgroups, &subgroups_options);
|
OPT(nir_lower_subgroups, &subgroups_options);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Deal with EU fusion */
|
||||||
|
if (devinfo->ver == 12) {
|
||||||
|
nir_divergence_options options =
|
||||||
|
nir_divergence_across_subgroups |
|
||||||
|
nir_divergence_multiple_workgroup_per_compute_subgroup;
|
||||||
|
|
||||||
|
nir_foreach_function_impl(impl, nir) {
|
||||||
|
nir_divergence_analysis_impl(impl, options);
|
||||||
|
}
|
||||||
|
|
||||||
|
nir_shader_instructions_pass(nir,
|
||||||
|
flag_fused_eu_disable_instr,
|
||||||
|
nir_metadata_all, NULL);
|
||||||
|
|
||||||
|
/* We request a special divergence information which is not needed
|
||||||
|
* after.
|
||||||
|
*/
|
||||||
|
nir_foreach_function_impl(impl, nir) {
|
||||||
|
nir_progress(true, impl, ~nir_metadata_divergence);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,8 @@
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define BRW_TEX_INSTR_FUSED_EU_DISABLE (1u << 30)
|
||||||
|
|
||||||
extern const struct nir_shader_compiler_options brw_scalar_nir_options;
|
extern const struct nir_shader_compiler_options brw_scalar_nir_options;
|
||||||
|
|
||||||
int type_size_vec4(const struct glsl_type *type, bool bindless);
|
int type_size_vec4(const struct glsl_type *type, bool bindless);
|
||||||
|
|
|
||||||
|
|
@ -391,9 +391,7 @@ hash_inst(const void *v)
|
||||||
const uint8_t tex_u8data[] = {
|
const uint8_t tex_u8data[] = {
|
||||||
tex->coord_components,
|
tex->coord_components,
|
||||||
tex->grad_components,
|
tex->grad_components,
|
||||||
tex->residency,
|
tex->bits,
|
||||||
tex->surface_bindless,
|
|
||||||
tex->sampler_bindless,
|
|
||||||
};
|
};
|
||||||
const uint32_t tex_u32data[] = {
|
const uint32_t tex_u32data[] = {
|
||||||
tex->sampler_opcode,
|
tex->sampler_opcode,
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue