2024-01-04 22:39:57 -08:00
|
|
|
/*
|
|
|
|
|
* Copyright © 2010 Intel Corporation
|
|
|
|
|
* SPDX-License-Identifier: MIT
|
|
|
|
|
*/
|
|
|
|
|
|
2025-02-05 14:25:15 -08:00
|
|
|
#include "brw_shader.h"
|
2025-01-15 08:20:46 -08:00
|
|
|
#include "brw_builder.h"
|
2024-01-04 22:39:57 -08:00
|
|
|
|
|
|
|
|
/* Wa_14015360517
|
|
|
|
|
*
|
|
|
|
|
* The first instruction of any kernel should have non-zero emask.
|
|
|
|
|
* Make sure this happens by introducing a dummy mov instruction.
|
|
|
|
|
*/
|
|
|
|
|
bool
|
2024-12-07 10:25:45 -08:00
|
|
|
brw_workaround_emit_dummy_mov_instruction(brw_shader &s)
|
2024-01-04 22:39:57 -08:00
|
|
|
{
|
|
|
|
|
if (!intel_needs_workaround(s.devinfo, 14015360517))
|
|
|
|
|
return false;
|
|
|
|
|
|
2024-12-07 00:23:07 -08:00
|
|
|
brw_inst *first_inst =
|
2024-01-04 22:39:57 -08:00
|
|
|
s.cfg->first_block()->start();
|
|
|
|
|
|
|
|
|
|
/* We can skip the WA if first instruction is marked with
|
|
|
|
|
* force_writemask_all or exec_size equals dispatch_width.
|
|
|
|
|
*/
|
|
|
|
|
if (first_inst->force_writemask_all ||
|
|
|
|
|
first_inst->exec_size == s.dispatch_width)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
/* Insert dummy mov as first instruction. */
|
2025-02-27 22:04:03 -08:00
|
|
|
const brw_builder ubld = brw_builder(first_inst).exec_all().group(8, 0);
|
2024-01-04 22:39:57 -08:00
|
|
|
ubld.MOV(ubld.null_reg_ud(), brw_imm_ud(0u));
|
|
|
|
|
|
2024-12-06 20:52:05 -08:00
|
|
|
s.invalidate_analysis(BRW_DEPENDENCY_INSTRUCTIONS |
|
|
|
|
|
BRW_DEPENDENCY_VARIABLES);
|
2024-01-04 22:39:57 -08:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
2024-12-07 00:23:07 -08:00
|
|
|
needs_dummy_fence(const intel_device_info *devinfo, brw_inst *inst)
|
2024-01-04 22:39:57 -08:00
|
|
|
{
|
|
|
|
|
/* This workaround is about making sure that any instruction writing
|
|
|
|
|
* through UGM has completed before we hit EOT.
|
|
|
|
|
*/
|
brw: Rename shared function enums for clarity
Our name for this enum was brw_message_target, but it's better known as
shared function ID or SFID. Call it brw_sfid to make it easier to find.
Now that brw only supports Gfx9+, we don't particularly care whether
SFIDs were introduced on Gfx4, Gfx6, or Gfx7.5. Also, the LSC SFIDs
were confusingly tagged "GFX12" but aren't available on Gfx12.0; they
were introduced with Alchemist/Meteorlake.
GFX6_SFID_DATAPORT_SAMPLER_CACHE in particular was confusing. It sounds
like the SFID to use for the sampler on Gfx6+, however it has nothing to
do with the sampler at all. BRW_SFID_SAMPLER remains the sampler SFID.
On Haswell, we ran out of messages on the main data cache data port, and
so they introduced two additional ones, for more messages. The modern
Tigerlake PRMs simply call these DP_DC0, DP_DC1, and DP_DC2. I think
the "sampler" name came from some idea about reorganizing messages that
never materialized (instead, the LSC came as a much larger cleanup).
Recently we've adopted the term "HDC" for the legacy data cluster, as
opposed to "LSC" for the modern Load/Store Cache. To make clear which
SFIDs target the legacy HDC dataports, we use BRW_SFID_HDC0/1/2.
We were also citing the G45, Sandybridge, and Ivybridge PRMs for a
compiler that supports none of those platforms. Cite modern docs.
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33650>
2025-02-10 16:28:48 -08:00
|
|
|
if (inst->sfid != BRW_SFID_UGM)
|
2024-01-04 22:39:57 -08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
/* Any UGM, non-Scratch-surface Stores (not including Atomic) messages,
|
|
|
|
|
* where the L1-cache override is NOT among {WB, WS, WT}
|
|
|
|
|
*/
|
|
|
|
|
enum lsc_opcode opcode = lsc_msg_desc_opcode(devinfo, inst->desc);
|
|
|
|
|
if (lsc_opcode_is_store(opcode)) {
|
|
|
|
|
switch (lsc_msg_desc_cache_ctrl(devinfo, inst->desc)) {
|
|
|
|
|
case LSC_CACHE_STORE_L1STATE_L3MOCS:
|
|
|
|
|
case LSC_CACHE_STORE_L1WB_L3WB:
|
|
|
|
|
case LSC_CACHE_STORE_L1S_L3UC:
|
|
|
|
|
case LSC_CACHE_STORE_L1S_L3WB:
|
|
|
|
|
case LSC_CACHE_STORE_L1WT_L3UC:
|
|
|
|
|
case LSC_CACHE_STORE_L1WT_L3WB:
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Any UGM Atomic message WITHOUT return value */
|
|
|
|
|
if (lsc_opcode_is_atomic(opcode) && inst->dst.file == BAD_FILE)
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Wa_22013689345
|
|
|
|
|
*
|
|
|
|
|
* We need to emit UGM fence message before EOT, if shader has any UGM write
|
|
|
|
|
* or atomic message.
|
|
|
|
|
*
|
|
|
|
|
* TODO/FINISHME: According to Curro we could avoid the fence in some cases.
|
|
|
|
|
* We probably need a better criteria in needs_dummy_fence().
|
|
|
|
|
*/
|
|
|
|
|
bool
|
2024-12-07 10:25:45 -08:00
|
|
|
brw_workaround_memory_fence_before_eot(brw_shader &s)
|
2024-01-04 22:39:57 -08:00
|
|
|
{
|
|
|
|
|
bool progress = false;
|
|
|
|
|
bool has_ugm_write_or_atomic = false;
|
|
|
|
|
|
|
|
|
|
if (!intel_needs_workaround(s.devinfo, 22013689345))
|
|
|
|
|
return false;
|
|
|
|
|
|
2024-12-07 00:23:07 -08:00
|
|
|
foreach_block_and_inst_safe (block, brw_inst, inst, s.cfg) {
|
2024-01-04 22:39:57 -08:00
|
|
|
if (!inst->eot) {
|
|
|
|
|
if (needs_dummy_fence(s.devinfo, inst))
|
|
|
|
|
has_ugm_write_or_atomic = true;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!has_ugm_write_or_atomic)
|
|
|
|
|
break;
|
|
|
|
|
|
2025-02-27 22:04:03 -08:00
|
|
|
const brw_builder ibld(inst);
|
2024-12-29 15:41:04 -08:00
|
|
|
const brw_builder ubld = ibld.exec_all().group(1, 0);
|
2024-01-04 22:39:57 -08:00
|
|
|
|
2024-06-18 23:42:59 -07:00
|
|
|
brw_reg dst = ubld.vgrf(BRW_TYPE_UD);
|
2024-12-07 00:23:07 -08:00
|
|
|
brw_inst *dummy_fence = ubld.emit(SHADER_OPCODE_MEMORY_FENCE,
|
2024-01-04 22:39:57 -08:00
|
|
|
dst, brw_vec8_grf(0, 0),
|
2025-01-18 00:48:10 -08:00
|
|
|
/* commit enable */ brw_imm_ud(1));
|
brw: Rename shared function enums for clarity
Our name for this enum was brw_message_target, but it's better known as
shared function ID or SFID. Call it brw_sfid to make it easier to find.
Now that brw only supports Gfx9+, we don't particularly care whether
SFIDs were introduced on Gfx4, Gfx6, or Gfx7.5. Also, the LSC SFIDs
were confusingly tagged "GFX12" but aren't available on Gfx12.0; they
were introduced with Alchemist/Meteorlake.
GFX6_SFID_DATAPORT_SAMPLER_CACHE in particular was confusing. It sounds
like the SFID to use for the sampler on Gfx6+, however it has nothing to
do with the sampler at all. BRW_SFID_SAMPLER remains the sampler SFID.
On Haswell, we ran out of messages on the main data cache data port, and
so they introduced two additional ones, for more messages. The modern
Tigerlake PRMs simply call these DP_DC0, DP_DC1, and DP_DC2. I think
the "sampler" name came from some idea about reorganizing messages that
never materialized (instead, the LSC came as a much larger cleanup).
Recently we've adopted the term "HDC" for the legacy data cluster, as
opposed to "LSC" for the modern Load/Store Cache. To make clear which
SFIDs target the legacy HDC dataports, we use BRW_SFID_HDC0/1/2.
We were also citing the G45, Sandybridge, and Ivybridge PRMs for a
compiler that supports none of those platforms. Cite modern docs.
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33650>
2025-02-10 16:28:48 -08:00
|
|
|
dummy_fence->sfid = BRW_SFID_UGM;
|
2024-01-04 22:39:57 -08:00
|
|
|
dummy_fence->desc = lsc_fence_msg_desc(s.devinfo, LSC_FENCE_TILE,
|
|
|
|
|
LSC_FLUSH_TYPE_NONE_6, false);
|
2025-01-17 22:56:24 -08:00
|
|
|
dummy_fence->size_written = REG_SIZE * reg_unit(s.devinfo);
|
2024-01-04 22:39:57 -08:00
|
|
|
ubld.emit(FS_OPCODE_SCHEDULING_FENCE, ubld.null_reg_ud(), dst);
|
|
|
|
|
progress = true;
|
|
|
|
|
/* TODO: remove this break if we ever have shader with multiple EOT. */
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (progress) {
|
2024-12-06 20:52:05 -08:00
|
|
|
s.invalidate_analysis(BRW_DEPENDENCY_INSTRUCTIONS |
|
|
|
|
|
BRW_DEPENDENCY_VARIABLES);
|
2024-01-04 22:39:57 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return progress;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Find the first instruction in the program that might start a region of
|
|
|
|
|
* divergent control flow due to a HALT jump. There is no
|
|
|
|
|
* find_halt_control_flow_region_end(), the region of divergence extends until
|
|
|
|
|
* the only SHADER_OPCODE_HALT_TARGET in the program.
|
|
|
|
|
*/
|
2024-12-07 00:23:07 -08:00
|
|
|
static const brw_inst *
|
2024-12-07 10:25:45 -08:00
|
|
|
find_halt_control_flow_region_start(const brw_shader *v)
|
2024-01-04 22:39:57 -08:00
|
|
|
{
|
2024-12-07 00:23:07 -08:00
|
|
|
foreach_block_and_inst(block, brw_inst, inst, v->cfg) {
|
2024-01-04 22:39:57 -08:00
|
|
|
if (inst->opcode == BRW_OPCODE_HALT ||
|
|
|
|
|
inst->opcode == SHADER_OPCODE_HALT_TARGET)
|
|
|
|
|
return inst;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Work around the Gfx12 hardware bug filed as Wa_1407528679. EU fusion
|
|
|
|
|
* can cause a BB to be executed with all channels disabled, which will lead
|
|
|
|
|
* to the execution of any NoMask instructions in it, even though any
|
|
|
|
|
* execution-masked instructions will be correctly shot down. This may break
|
|
|
|
|
* assumptions of some NoMask SEND messages whose descriptor depends on data
|
|
|
|
|
* generated by live invocations of the shader.
|
|
|
|
|
*
|
|
|
|
|
* This avoids the problem by predicating certain instructions on an ANY
|
|
|
|
|
* horizontal predicate that makes sure that their execution is omitted when
|
|
|
|
|
* all channels of the program are disabled.
|
|
|
|
|
*/
|
|
|
|
|
bool
|
2024-12-07 10:25:45 -08:00
|
|
|
brw_workaround_nomask_control_flow(brw_shader &s)
|
2024-01-04 22:39:57 -08:00
|
|
|
{
|
|
|
|
|
if (s.devinfo->ver != 12)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
const brw_predicate pred = s.dispatch_width > 16 ? BRW_PREDICATE_ALIGN1_ANY32H :
|
|
|
|
|
s.dispatch_width > 8 ? BRW_PREDICATE_ALIGN1_ANY16H :
|
|
|
|
|
BRW_PREDICATE_ALIGN1_ANY8H;
|
2024-12-07 00:23:07 -08:00
|
|
|
const brw_inst *halt_start = find_halt_control_flow_region_start(&s);
|
2024-01-04 22:39:57 -08:00
|
|
|
unsigned depth = 0;
|
|
|
|
|
bool progress = false;
|
|
|
|
|
|
2024-12-06 21:20:58 -08:00
|
|
|
const brw_live_variables &live_vars = s.live_analysis.require();
|
2024-01-04 22:39:57 -08:00
|
|
|
|
|
|
|
|
/* Scan the program backwards in order to be able to easily determine
|
|
|
|
|
* whether the flag register is live at any point.
|
|
|
|
|
*/
|
|
|
|
|
foreach_block_reverse_safe(block, s.cfg) {
|
|
|
|
|
BITSET_WORD flag_liveout = live_vars.block_data[block->num]
|
|
|
|
|
.flag_liveout[0];
|
|
|
|
|
STATIC_ASSERT(ARRAY_SIZE(live_vars.block_data[0].flag_liveout) == 1);
|
|
|
|
|
|
2024-12-07 00:23:07 -08:00
|
|
|
foreach_inst_in_block_reverse_safe(brw_inst, inst, block) {
|
2024-01-04 22:39:57 -08:00
|
|
|
if (!inst->predicate && inst->exec_size >= 8)
|
|
|
|
|
flag_liveout &= ~inst->flags_written(s.devinfo);
|
|
|
|
|
|
|
|
|
|
switch (inst->opcode) {
|
|
|
|
|
case BRW_OPCODE_DO:
|
|
|
|
|
case BRW_OPCODE_IF:
|
|
|
|
|
/* Note that this doesn't handle BRW_OPCODE_HALT since only
|
|
|
|
|
* the first one in the program closes the region of divergent
|
|
|
|
|
* control flow due to any HALT instructions -- Instead this is
|
|
|
|
|
* handled with the halt_start check below.
|
|
|
|
|
*/
|
|
|
|
|
depth--;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case BRW_OPCODE_WHILE:
|
|
|
|
|
case BRW_OPCODE_ENDIF:
|
|
|
|
|
case SHADER_OPCODE_HALT_TARGET:
|
|
|
|
|
depth++;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
/* Note that the vast majority of NoMask SEND instructions in the
|
|
|
|
|
* program are harmless while executed in a block with all
|
|
|
|
|
* channels disabled, since any instructions with side effects we
|
|
|
|
|
* could hit here should be execution-masked.
|
|
|
|
|
*
|
|
|
|
|
* The main concern is NoMask SEND instructions where the message
|
|
|
|
|
* descriptor or header depends on data generated by live
|
|
|
|
|
* invocations of the shader (RESINFO and
|
|
|
|
|
* FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD with a dynamically
|
|
|
|
|
* computed surface index seem to be the only examples right now
|
|
|
|
|
* where this could easily lead to GPU hangs). Unfortunately we
|
|
|
|
|
* have no straightforward way to detect that currently, so just
|
|
|
|
|
* predicate any NoMask SEND instructions we find under control
|
|
|
|
|
* flow.
|
|
|
|
|
*
|
|
|
|
|
* If this proves to have a measurable performance impact it can
|
|
|
|
|
* be easily extended with a whitelist of messages we know we can
|
|
|
|
|
* safely omit the predication for.
|
|
|
|
|
*/
|
|
|
|
|
if (depth && inst->force_writemask_all &&
|
2024-06-07 18:50:45 +03:00
|
|
|
is_send(inst) && !inst->predicate &&
|
|
|
|
|
!inst->has_no_mask_send_params) {
|
2024-01-04 22:39:57 -08:00
|
|
|
/* We need to load the execution mask into the flag register by
|
|
|
|
|
* using a builder with channel group matching the whole shader
|
|
|
|
|
* (rather than the default which is derived from the original
|
|
|
|
|
* instruction), in order to avoid getting a right-shifted
|
|
|
|
|
* value.
|
|
|
|
|
*/
|
2025-02-27 22:04:03 -08:00
|
|
|
const brw_builder ubld = brw_builder(inst)
|
2024-01-04 22:39:57 -08:00
|
|
|
.exec_all().group(s.dispatch_width, 0);
|
2024-06-18 23:42:59 -07:00
|
|
|
const brw_reg flag = retype(brw_flag_reg(0, 0),
|
2024-04-20 17:08:02 -07:00
|
|
|
BRW_TYPE_UD);
|
2024-01-04 22:39:57 -08:00
|
|
|
|
|
|
|
|
/* Due to the lack of flag register allocation we need to save
|
|
|
|
|
* and restore the flag register if it's live.
|
|
|
|
|
*/
|
|
|
|
|
const bool save_flag = flag_liveout &
|
2025-02-10 08:55:26 -08:00
|
|
|
brw_flag_mask(flag, s.dispatch_width / 8);
|
2024-06-18 23:42:59 -07:00
|
|
|
const brw_reg tmp = ubld.group(8, 0).vgrf(flag.type);
|
2024-01-04 22:39:57 -08:00
|
|
|
|
|
|
|
|
if (save_flag) {
|
|
|
|
|
ubld.group(8, 0).UNDEF(tmp);
|
|
|
|
|
ubld.group(1, 0).MOV(tmp, flag);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ubld.emit(FS_OPCODE_LOAD_LIVE_CHANNELS);
|
|
|
|
|
|
|
|
|
|
set_predicate(pred, inst);
|
|
|
|
|
inst->flag_subreg = 0;
|
|
|
|
|
inst->predicate_trivial = true;
|
|
|
|
|
|
|
|
|
|
if (save_flag)
|
|
|
|
|
ubld.group(1, 0).at(block, inst->next).MOV(flag, tmp);
|
|
|
|
|
|
|
|
|
|
progress = true;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (inst == halt_start)
|
|
|
|
|
depth--;
|
|
|
|
|
|
|
|
|
|
flag_liveout |= inst->flags_read(s.devinfo);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (progress)
|
2024-12-06 20:52:05 -08:00
|
|
|
s.invalidate_analysis(BRW_DEPENDENCY_INSTRUCTIONS |
|
|
|
|
|
BRW_DEPENDENCY_VARIABLES);
|
2024-01-04 22:39:57 -08:00
|
|
|
|
|
|
|
|
return progress;
|
|
|
|
|
}
|
2024-10-19 12:53:21 +03:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* flags_read() and flags_written() return flag access with byte granularity,
|
|
|
|
|
* but for Flag Register PRM lists "Access Granularity: Word", so we can assume
|
|
|
|
|
* accessing any part of a word will clear its register dependency.
|
|
|
|
|
*/
|
|
|
|
|
static unsigned
|
|
|
|
|
bytes_bitmask_to_words(unsigned b)
|
|
|
|
|
{
|
|
|
|
|
unsigned first_byte_mask = b & 0x55555555;
|
|
|
|
|
unsigned second_byte_mask = b & 0xaaaaaaaa;
|
|
|
|
|
return first_byte_mask |
|
|
|
|
|
(first_byte_mask << 1) |
|
|
|
|
|
second_byte_mask |
|
|
|
|
|
(second_byte_mask >> 1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* WaClearArfDependenciesBeforeEot
|
|
|
|
|
*
|
|
|
|
|
* Flag register dependency not cleared after EOT, so we have to source them
|
|
|
|
|
* before EOT. We can do this with simple `mov(1) nullUD, f{0,1}UD`
|
|
|
|
|
*
|
|
|
|
|
* To avoid emitting MOVs when it's not needed, check if each block reads all
|
|
|
|
|
* the flags it sets. We might falsely determine register as unread if it'll be
|
|
|
|
|
* accessed inside the next blocks, but this still should be good enough.
|
|
|
|
|
*/
|
|
|
|
|
bool
|
2024-12-07 10:25:45 -08:00
|
|
|
brw_workaround_source_arf_before_eot(brw_shader &s)
|
2024-10-19 12:53:21 +03:00
|
|
|
{
|
|
|
|
|
bool progress = false;
|
|
|
|
|
|
|
|
|
|
if (s.devinfo->ver != 9)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
unsigned flags_unread = 0;
|
|
|
|
|
|
|
|
|
|
foreach_block(block, s.cfg) {
|
|
|
|
|
unsigned flags_unread_in_block = 0;
|
|
|
|
|
|
2024-12-07 00:23:07 -08:00
|
|
|
foreach_inst_in_block(brw_inst, inst, block) {
|
2024-10-19 12:53:21 +03:00
|
|
|
/* Instruction can read and write to the same flag, so the order is important */
|
|
|
|
|
flags_unread_in_block &= ~bytes_bitmask_to_words(inst->flags_read(s.devinfo));
|
|
|
|
|
flags_unread_in_block |= bytes_bitmask_to_words(inst->flags_written(s.devinfo));
|
|
|
|
|
|
|
|
|
|
/* HALT does not start its block even though it can leave a dependency */
|
|
|
|
|
if (inst->opcode == BRW_OPCODE_HALT ||
|
|
|
|
|
inst->opcode == SHADER_OPCODE_HALT_TARGET) {
|
|
|
|
|
flags_unread |= flags_unread_in_block;
|
|
|
|
|
flags_unread_in_block = 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
flags_unread |= flags_unread_in_block;
|
|
|
|
|
|
|
|
|
|
if ((flags_unread & 0x0f) && (flags_unread & 0xf0))
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (flags_unread) {
|
|
|
|
|
int eot_count = 0;
|
|
|
|
|
|
2024-12-07 00:23:07 -08:00
|
|
|
foreach_block_and_inst_safe(block, brw_inst, inst, s.cfg)
|
2024-10-19 12:53:21 +03:00
|
|
|
{
|
|
|
|
|
if (!inst->eot)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/* Currently, we always emit only one EOT per program,
|
|
|
|
|
* this WA should be updated if it ever changes.
|
|
|
|
|
*/
|
|
|
|
|
assert(++eot_count == 1);
|
|
|
|
|
|
2025-02-27 22:04:03 -08:00
|
|
|
const brw_builder ibld(inst);
|
2024-12-29 15:41:04 -08:00
|
|
|
const brw_builder ubld = ibld.exec_all().group(1, 0);
|
2024-10-19 12:53:21 +03:00
|
|
|
|
|
|
|
|
if (flags_unread & 0x0f)
|
|
|
|
|
ubld.MOV(ubld.null_reg_ud(), retype(brw_flag_reg(0, 0), BRW_TYPE_UD));
|
|
|
|
|
|
|
|
|
|
if (flags_unread & 0xf0)
|
|
|
|
|
ubld.MOV(ubld.null_reg_ud(), retype(brw_flag_reg(1, 0), BRW_TYPE_UD));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
progress = true;
|
2024-12-06 20:52:05 -08:00
|
|
|
s.invalidate_analysis(BRW_DEPENDENCY_INSTRUCTIONS);
|
2024-10-19 12:53:21 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return progress;
|
|
|
|
|
}
|