mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-04 22:49:13 +02:00
intel/brw: Move workarounds to a separate file
All the workarounds are relatively small, so keep them in a single file. Promote (or add) them to a separate file if they get large -- like it is done for opt and lower. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26887>
This commit is contained in:
parent
c25803880e
commit
e3dc608db9
3 changed files with 274 additions and 263 deletions
|
|
@ -2576,269 +2576,6 @@ fs_visitor::debug_optimizer(const nir_shader *nir,
|
|||
free(filename);
|
||||
}
|
||||
|
||||
static bool
|
||||
needs_dummy_fence(const intel_device_info *devinfo, fs_inst *inst)
|
||||
{
|
||||
/* This workaround is about making sure that any instruction writing
|
||||
* through UGM has completed before we hit EOT.
|
||||
*/
|
||||
if (inst->sfid != GFX12_SFID_UGM)
|
||||
return false;
|
||||
|
||||
/* Any UGM, non-Scratch-surface Stores (not including Atomic) messages,
|
||||
* where the L1-cache override is NOT among {WB, WS, WT}
|
||||
*/
|
||||
enum lsc_opcode opcode = lsc_msg_desc_opcode(devinfo, inst->desc);
|
||||
if (lsc_opcode_is_store(opcode)) {
|
||||
switch (lsc_msg_desc_cache_ctrl(devinfo, inst->desc)) {
|
||||
case LSC_CACHE_STORE_L1STATE_L3MOCS:
|
||||
case LSC_CACHE_STORE_L1WB_L3WB:
|
||||
case LSC_CACHE_STORE_L1S_L3UC:
|
||||
case LSC_CACHE_STORE_L1S_L3WB:
|
||||
case LSC_CACHE_STORE_L1WT_L3UC:
|
||||
case LSC_CACHE_STORE_L1WT_L3WB:
|
||||
return false;
|
||||
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/* Any UGM Atomic message WITHOUT return value */
|
||||
if (lsc_opcode_is_atomic(opcode) && inst->dst.file == BAD_FILE)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Wa_14015360517
|
||||
*
|
||||
* The first instruction of any kernel should have non-zero emask.
|
||||
* Make sure this happens by introducing a dummy mov instruction.
|
||||
*/
|
||||
bool
|
||||
brw_fs_workaround_emit_dummy_mov_instruction(fs_visitor &s)
|
||||
{
|
||||
if (!intel_needs_workaround(s.devinfo, 14015360517))
|
||||
return false;
|
||||
|
||||
struct backend_instruction *first_inst =
|
||||
s.cfg->first_block()->start();
|
||||
|
||||
/* We can skip the WA if first instruction is marked with
|
||||
* force_writemask_all or exec_size equals dispatch_width.
|
||||
*/
|
||||
if (first_inst->force_writemask_all ||
|
||||
first_inst->exec_size == s.dispatch_width)
|
||||
return false;
|
||||
|
||||
/* Insert dummy mov as first instruction. */
|
||||
const fs_builder ubld =
|
||||
fs_builder(&s, s.cfg->first_block(), (fs_inst *)first_inst).exec_all().group(8, 0);
|
||||
ubld.MOV(ubld.null_reg_ud(), brw_imm_ud(0u));
|
||||
|
||||
s.invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Wa_22013689345
|
||||
*
|
||||
* We need to emit UGM fence message before EOT, if shader has any UGM write
|
||||
* or atomic message.
|
||||
*
|
||||
* TODO/FINISHME: According to Curro we could avoid the fence in some cases.
|
||||
* We probably need a better criteria in needs_dummy_fence().
|
||||
*/
|
||||
bool
|
||||
brw_fs_workaround_memory_fence_before_eot(fs_visitor &s)
|
||||
{
|
||||
bool progress = false;
|
||||
bool has_ugm_write_or_atomic = false;
|
||||
|
||||
if (!intel_needs_workaround(s.devinfo, 22013689345))
|
||||
return false;
|
||||
|
||||
foreach_block_and_inst_safe (block, fs_inst, inst, s.cfg) {
|
||||
if (!inst->eot) {
|
||||
if (needs_dummy_fence(s.devinfo, inst))
|
||||
has_ugm_write_or_atomic = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!has_ugm_write_or_atomic)
|
||||
break;
|
||||
|
||||
const fs_builder ibld(&s, block, inst);
|
||||
const fs_builder ubld = ibld.exec_all().group(1, 0);
|
||||
|
||||
fs_reg dst = ubld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
fs_inst *dummy_fence = ubld.emit(SHADER_OPCODE_MEMORY_FENCE,
|
||||
dst, brw_vec8_grf(0, 0),
|
||||
/* commit enable */ brw_imm_ud(1),
|
||||
/* bti */ brw_imm_ud(0));
|
||||
dummy_fence->sfid = GFX12_SFID_UGM;
|
||||
dummy_fence->desc = lsc_fence_msg_desc(s.devinfo, LSC_FENCE_TILE,
|
||||
LSC_FLUSH_TYPE_NONE_6, false);
|
||||
ubld.emit(FS_OPCODE_SCHEDULING_FENCE, ubld.null_reg_ud(), dst);
|
||||
progress = true;
|
||||
/* TODO: remove this break if we ever have shader with multiple EOT. */
|
||||
break;
|
||||
}
|
||||
|
||||
if (progress) {
|
||||
s.invalidate_analysis(DEPENDENCY_INSTRUCTIONS |
|
||||
DEPENDENCY_VARIABLES);
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the first instruction in the program that might start a region of
|
||||
* divergent control flow due to a HALT jump. There is no
|
||||
* find_halt_control_flow_region_end(), the region of divergence extends until
|
||||
* the only SHADER_OPCODE_HALT_TARGET in the program.
|
||||
*/
|
||||
static const fs_inst *
|
||||
find_halt_control_flow_region_start(const fs_visitor *v)
|
||||
{
|
||||
foreach_block_and_inst(block, fs_inst, inst, v->cfg) {
|
||||
if (inst->opcode == BRW_OPCODE_HALT ||
|
||||
inst->opcode == SHADER_OPCODE_HALT_TARGET)
|
||||
return inst;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Work around the Gfx12 hardware bug filed as Wa_1407528679. EU fusion
|
||||
* can cause a BB to be executed with all channels disabled, which will lead
|
||||
* to the execution of any NoMask instructions in it, even though any
|
||||
* execution-masked instructions will be correctly shot down. This may break
|
||||
* assumptions of some NoMask SEND messages whose descriptor depends on data
|
||||
* generated by live invocations of the shader.
|
||||
*
|
||||
* This avoids the problem by predicating certain instructions on an ANY
|
||||
* horizontal predicate that makes sure that their execution is omitted when
|
||||
* all channels of the program are disabled.
|
||||
*/
|
||||
bool
|
||||
brw_fs_workaround_nomask_control_flow(fs_visitor &s)
|
||||
{
|
||||
if (s.devinfo->ver != 12)
|
||||
return false;
|
||||
|
||||
const brw_predicate pred = s.dispatch_width > 16 ? BRW_PREDICATE_ALIGN1_ANY32H :
|
||||
s.dispatch_width > 8 ? BRW_PREDICATE_ALIGN1_ANY16H :
|
||||
BRW_PREDICATE_ALIGN1_ANY8H;
|
||||
const fs_inst *halt_start = find_halt_control_flow_region_start(&s);
|
||||
unsigned depth = 0;
|
||||
bool progress = false;
|
||||
|
||||
const fs_live_variables &live_vars = s.live_analysis.require();
|
||||
|
||||
/* Scan the program backwards in order to be able to easily determine
|
||||
* whether the flag register is live at any point.
|
||||
*/
|
||||
foreach_block_reverse_safe(block, s.cfg) {
|
||||
BITSET_WORD flag_liveout = live_vars.block_data[block->num]
|
||||
.flag_liveout[0];
|
||||
STATIC_ASSERT(ARRAY_SIZE(live_vars.block_data[0].flag_liveout) == 1);
|
||||
|
||||
foreach_inst_in_block_reverse_safe(fs_inst, inst, block) {
|
||||
if (!inst->predicate && inst->exec_size >= 8)
|
||||
flag_liveout &= ~inst->flags_written(s.devinfo);
|
||||
|
||||
switch (inst->opcode) {
|
||||
case BRW_OPCODE_DO:
|
||||
case BRW_OPCODE_IF:
|
||||
/* Note that this doesn't handle BRW_OPCODE_HALT since only
|
||||
* the first one in the program closes the region of divergent
|
||||
* control flow due to any HALT instructions -- Instead this is
|
||||
* handled with the halt_start check below.
|
||||
*/
|
||||
depth--;
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_WHILE:
|
||||
case BRW_OPCODE_ENDIF:
|
||||
case SHADER_OPCODE_HALT_TARGET:
|
||||
depth++;
|
||||
break;
|
||||
|
||||
default:
|
||||
/* Note that the vast majority of NoMask SEND instructions in the
|
||||
* program are harmless while executed in a block with all
|
||||
* channels disabled, since any instructions with side effects we
|
||||
* could hit here should be execution-masked.
|
||||
*
|
||||
* The main concern is NoMask SEND instructions where the message
|
||||
* descriptor or header depends on data generated by live
|
||||
* invocations of the shader (RESINFO and
|
||||
* FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD with a dynamically
|
||||
* computed surface index seem to be the only examples right now
|
||||
* where this could easily lead to GPU hangs). Unfortunately we
|
||||
* have no straightforward way to detect that currently, so just
|
||||
* predicate any NoMask SEND instructions we find under control
|
||||
* flow.
|
||||
*
|
||||
* If this proves to have a measurable performance impact it can
|
||||
* be easily extended with a whitelist of messages we know we can
|
||||
* safely omit the predication for.
|
||||
*/
|
||||
if (depth && inst->force_writemask_all &&
|
||||
is_send(inst) && !inst->predicate) {
|
||||
/* We need to load the execution mask into the flag register by
|
||||
* using a builder with channel group matching the whole shader
|
||||
* (rather than the default which is derived from the original
|
||||
* instruction), in order to avoid getting a right-shifted
|
||||
* value.
|
||||
*/
|
||||
const fs_builder ubld = fs_builder(&s, block, inst)
|
||||
.exec_all().group(s.dispatch_width, 0);
|
||||
const fs_reg flag = retype(brw_flag_reg(0, 0),
|
||||
BRW_REGISTER_TYPE_UD);
|
||||
|
||||
/* Due to the lack of flag register allocation we need to save
|
||||
* and restore the flag register if it's live.
|
||||
*/
|
||||
const bool save_flag = flag_liveout &
|
||||
brw_fs_flag_mask(flag, s.dispatch_width / 8);
|
||||
const fs_reg tmp = ubld.group(8, 0).vgrf(flag.type);
|
||||
|
||||
if (save_flag) {
|
||||
ubld.group(8, 0).UNDEF(tmp);
|
||||
ubld.group(1, 0).MOV(tmp, flag);
|
||||
}
|
||||
|
||||
ubld.emit(FS_OPCODE_LOAD_LIVE_CHANNELS);
|
||||
|
||||
set_predicate(pred, inst);
|
||||
inst->flag_subreg = 0;
|
||||
inst->predicate_trivial = true;
|
||||
|
||||
if (save_flag)
|
||||
ubld.group(1, 0).at(block, inst->next).MOV(flag, tmp);
|
||||
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (inst == halt_start)
|
||||
depth--;
|
||||
|
||||
flag_liveout |= inst->flags_read(s.devinfo);
|
||||
}
|
||||
}
|
||||
|
||||
if (progress)
|
||||
s.invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
fs_visitor::compute_max_register_pressure()
|
||||
{
|
||||
|
|
|
|||
273
src/intel/compiler/brw_fs_workaround.cpp
Normal file
273
src/intel/compiler/brw_fs_workaround.cpp
Normal file
|
|
@ -0,0 +1,273 @@
|
|||
/*
|
||||
* Copyright © 2010 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "brw_fs.h"
|
||||
#include "brw_fs_builder.h"
|
||||
|
||||
using namespace brw;
|
||||
|
||||
/* Wa_14015360517
|
||||
*
|
||||
* The first instruction of any kernel should have non-zero emask.
|
||||
* Make sure this happens by introducing a dummy mov instruction.
|
||||
*/
|
||||
bool
|
||||
brw_fs_workaround_emit_dummy_mov_instruction(fs_visitor &s)
|
||||
{
|
||||
if (!intel_needs_workaround(s.devinfo, 14015360517))
|
||||
return false;
|
||||
|
||||
struct backend_instruction *first_inst =
|
||||
s.cfg->first_block()->start();
|
||||
|
||||
/* We can skip the WA if first instruction is marked with
|
||||
* force_writemask_all or exec_size equals dispatch_width.
|
||||
*/
|
||||
if (first_inst->force_writemask_all ||
|
||||
first_inst->exec_size == s.dispatch_width)
|
||||
return false;
|
||||
|
||||
/* Insert dummy mov as first instruction. */
|
||||
const fs_builder ubld =
|
||||
fs_builder(&s, s.cfg->first_block(), (fs_inst *)first_inst).exec_all().group(8, 0);
|
||||
ubld.MOV(ubld.null_reg_ud(), brw_imm_ud(0u));
|
||||
|
||||
s.invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
needs_dummy_fence(const intel_device_info *devinfo, fs_inst *inst)
|
||||
{
|
||||
/* This workaround is about making sure that any instruction writing
|
||||
* through UGM has completed before we hit EOT.
|
||||
*/
|
||||
if (inst->sfid != GFX12_SFID_UGM)
|
||||
return false;
|
||||
|
||||
/* Any UGM, non-Scratch-surface Stores (not including Atomic) messages,
|
||||
* where the L1-cache override is NOT among {WB, WS, WT}
|
||||
*/
|
||||
enum lsc_opcode opcode = lsc_msg_desc_opcode(devinfo, inst->desc);
|
||||
if (lsc_opcode_is_store(opcode)) {
|
||||
switch (lsc_msg_desc_cache_ctrl(devinfo, inst->desc)) {
|
||||
case LSC_CACHE_STORE_L1STATE_L3MOCS:
|
||||
case LSC_CACHE_STORE_L1WB_L3WB:
|
||||
case LSC_CACHE_STORE_L1S_L3UC:
|
||||
case LSC_CACHE_STORE_L1S_L3WB:
|
||||
case LSC_CACHE_STORE_L1WT_L3UC:
|
||||
case LSC_CACHE_STORE_L1WT_L3WB:
|
||||
return false;
|
||||
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/* Any UGM Atomic message WITHOUT return value */
|
||||
if (lsc_opcode_is_atomic(opcode) && inst->dst.file == BAD_FILE)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Wa_22013689345
|
||||
*
|
||||
* We need to emit UGM fence message before EOT, if shader has any UGM write
|
||||
* or atomic message.
|
||||
*
|
||||
* TODO/FINISHME: According to Curro we could avoid the fence in some cases.
|
||||
* We probably need a better criteria in needs_dummy_fence().
|
||||
*/
|
||||
bool
|
||||
brw_fs_workaround_memory_fence_before_eot(fs_visitor &s)
|
||||
{
|
||||
bool progress = false;
|
||||
bool has_ugm_write_or_atomic = false;
|
||||
|
||||
if (!intel_needs_workaround(s.devinfo, 22013689345))
|
||||
return false;
|
||||
|
||||
foreach_block_and_inst_safe (block, fs_inst, inst, s.cfg) {
|
||||
if (!inst->eot) {
|
||||
if (needs_dummy_fence(s.devinfo, inst))
|
||||
has_ugm_write_or_atomic = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!has_ugm_write_or_atomic)
|
||||
break;
|
||||
|
||||
const fs_builder ibld(&s, block, inst);
|
||||
const fs_builder ubld = ibld.exec_all().group(1, 0);
|
||||
|
||||
fs_reg dst = ubld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
fs_inst *dummy_fence = ubld.emit(SHADER_OPCODE_MEMORY_FENCE,
|
||||
dst, brw_vec8_grf(0, 0),
|
||||
/* commit enable */ brw_imm_ud(1),
|
||||
/* bti */ brw_imm_ud(0));
|
||||
dummy_fence->sfid = GFX12_SFID_UGM;
|
||||
dummy_fence->desc = lsc_fence_msg_desc(s.devinfo, LSC_FENCE_TILE,
|
||||
LSC_FLUSH_TYPE_NONE_6, false);
|
||||
ubld.emit(FS_OPCODE_SCHEDULING_FENCE, ubld.null_reg_ud(), dst);
|
||||
progress = true;
|
||||
/* TODO: remove this break if we ever have shader with multiple EOT. */
|
||||
break;
|
||||
}
|
||||
|
||||
if (progress) {
|
||||
s.invalidate_analysis(DEPENDENCY_INSTRUCTIONS |
|
||||
DEPENDENCY_VARIABLES);
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the first instruction in the program that might start a region of
|
||||
* divergent control flow due to a HALT jump. There is no
|
||||
* find_halt_control_flow_region_end(), the region of divergence extends until
|
||||
* the only SHADER_OPCODE_HALT_TARGET in the program.
|
||||
*/
|
||||
static const fs_inst *
|
||||
find_halt_control_flow_region_start(const fs_visitor *v)
|
||||
{
|
||||
foreach_block_and_inst(block, fs_inst, inst, v->cfg) {
|
||||
if (inst->opcode == BRW_OPCODE_HALT ||
|
||||
inst->opcode == SHADER_OPCODE_HALT_TARGET)
|
||||
return inst;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Work around the Gfx12 hardware bug filed as Wa_1407528679. EU fusion
|
||||
* can cause a BB to be executed with all channels disabled, which will lead
|
||||
* to the execution of any NoMask instructions in it, even though any
|
||||
* execution-masked instructions will be correctly shot down. This may break
|
||||
* assumptions of some NoMask SEND messages whose descriptor depends on data
|
||||
* generated by live invocations of the shader.
|
||||
*
|
||||
* This avoids the problem by predicating certain instructions on an ANY
|
||||
* horizontal predicate that makes sure that their execution is omitted when
|
||||
* all channels of the program are disabled.
|
||||
*/
|
||||
bool
|
||||
brw_fs_workaround_nomask_control_flow(fs_visitor &s)
|
||||
{
|
||||
if (s.devinfo->ver != 12)
|
||||
return false;
|
||||
|
||||
const brw_predicate pred = s.dispatch_width > 16 ? BRW_PREDICATE_ALIGN1_ANY32H :
|
||||
s.dispatch_width > 8 ? BRW_PREDICATE_ALIGN1_ANY16H :
|
||||
BRW_PREDICATE_ALIGN1_ANY8H;
|
||||
const fs_inst *halt_start = find_halt_control_flow_region_start(&s);
|
||||
unsigned depth = 0;
|
||||
bool progress = false;
|
||||
|
||||
const fs_live_variables &live_vars = s.live_analysis.require();
|
||||
|
||||
/* Scan the program backwards in order to be able to easily determine
|
||||
* whether the flag register is live at any point.
|
||||
*/
|
||||
foreach_block_reverse_safe(block, s.cfg) {
|
||||
BITSET_WORD flag_liveout = live_vars.block_data[block->num]
|
||||
.flag_liveout[0];
|
||||
STATIC_ASSERT(ARRAY_SIZE(live_vars.block_data[0].flag_liveout) == 1);
|
||||
|
||||
foreach_inst_in_block_reverse_safe(fs_inst, inst, block) {
|
||||
if (!inst->predicate && inst->exec_size >= 8)
|
||||
flag_liveout &= ~inst->flags_written(s.devinfo);
|
||||
|
||||
switch (inst->opcode) {
|
||||
case BRW_OPCODE_DO:
|
||||
case BRW_OPCODE_IF:
|
||||
/* Note that this doesn't handle BRW_OPCODE_HALT since only
|
||||
* the first one in the program closes the region of divergent
|
||||
* control flow due to any HALT instructions -- Instead this is
|
||||
* handled with the halt_start check below.
|
||||
*/
|
||||
depth--;
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_WHILE:
|
||||
case BRW_OPCODE_ENDIF:
|
||||
case SHADER_OPCODE_HALT_TARGET:
|
||||
depth++;
|
||||
break;
|
||||
|
||||
default:
|
||||
/* Note that the vast majority of NoMask SEND instructions in the
|
||||
* program are harmless while executed in a block with all
|
||||
* channels disabled, since any instructions with side effects we
|
||||
* could hit here should be execution-masked.
|
||||
*
|
||||
* The main concern is NoMask SEND instructions where the message
|
||||
* descriptor or header depends on data generated by live
|
||||
* invocations of the shader (RESINFO and
|
||||
* FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD with a dynamically
|
||||
* computed surface index seem to be the only examples right now
|
||||
* where this could easily lead to GPU hangs). Unfortunately we
|
||||
* have no straightforward way to detect that currently, so just
|
||||
* predicate any NoMask SEND instructions we find under control
|
||||
* flow.
|
||||
*
|
||||
* If this proves to have a measurable performance impact it can
|
||||
* be easily extended with a whitelist of messages we know we can
|
||||
* safely omit the predication for.
|
||||
*/
|
||||
if (depth && inst->force_writemask_all &&
|
||||
is_send(inst) && !inst->predicate) {
|
||||
/* We need to load the execution mask into the flag register by
|
||||
* using a builder with channel group matching the whole shader
|
||||
* (rather than the default which is derived from the original
|
||||
* instruction), in order to avoid getting a right-shifted
|
||||
* value.
|
||||
*/
|
||||
const fs_builder ubld = fs_builder(&s, block, inst)
|
||||
.exec_all().group(s.dispatch_width, 0);
|
||||
const fs_reg flag = retype(brw_flag_reg(0, 0),
|
||||
BRW_REGISTER_TYPE_UD);
|
||||
|
||||
/* Due to the lack of flag register allocation we need to save
|
||||
* and restore the flag register if it's live.
|
||||
*/
|
||||
const bool save_flag = flag_liveout &
|
||||
brw_fs_flag_mask(flag, s.dispatch_width / 8);
|
||||
const fs_reg tmp = ubld.group(8, 0).vgrf(flag.type);
|
||||
|
||||
if (save_flag) {
|
||||
ubld.group(8, 0).UNDEF(tmp);
|
||||
ubld.group(1, 0).MOV(tmp, flag);
|
||||
}
|
||||
|
||||
ubld.emit(FS_OPCODE_LOAD_LIVE_CHANNELS);
|
||||
|
||||
set_predicate(pred, inst);
|
||||
inst->flag_subreg = 0;
|
||||
inst->predicate_trivial = true;
|
||||
|
||||
if (save_flag)
|
||||
ubld.group(1, 0).at(block, inst->next).MOV(flag, tmp);
|
||||
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (inst == halt_start)
|
||||
depth--;
|
||||
|
||||
flag_liveout |= inst->flags_read(s.devinfo);
|
||||
}
|
||||
}
|
||||
|
||||
if (progress)
|
||||
s.invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
|
|
@ -93,6 +93,7 @@ libintel_compiler_brw_files = files(
|
|||
'brw_fs_thread_payload.cpp',
|
||||
'brw_fs_validate.cpp',
|
||||
'brw_fs_visitor.cpp',
|
||||
'brw_fs_workaround.cpp',
|
||||
'brw_inst.h',
|
||||
'brw_interpolation_map.c',
|
||||
'brw_ir.h',
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue