mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 04:38:03 +02:00
intel/elk: Remove Gfx9+-only passes
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27629>
This commit is contained in:
parent
fb2eee2aaa
commit
9f5213923e
2 changed files with 0 additions and 210 deletions
|
|
@ -5311,65 +5311,6 @@ elk_fs_visitor::lower_barycentrics()
|
|||
return progress;
|
||||
}
|
||||
|
||||
/**
|
||||
* Lower a derivative instruction as the floating-point difference of two
|
||||
* swizzles of the source, specified as \p swz0 and \p swz1.
|
||||
*/
|
||||
static bool
|
||||
lower_derivative(elk_fs_visitor *v, elk_bblock_t *block, elk_fs_inst *inst,
|
||||
unsigned swz0, unsigned swz1)
|
||||
{
|
||||
const fs_builder ubld = fs_builder(v, block, inst).exec_all();
|
||||
const elk_fs_reg tmp0 = ubld.vgrf(inst->src[0].type);
|
||||
const elk_fs_reg tmp1 = ubld.vgrf(inst->src[0].type);
|
||||
|
||||
ubld.emit(ELK_SHADER_OPCODE_QUAD_SWIZZLE, tmp0, inst->src[0], elk_imm_ud(swz0));
|
||||
ubld.emit(ELK_SHADER_OPCODE_QUAD_SWIZZLE, tmp1, inst->src[0], elk_imm_ud(swz1));
|
||||
|
||||
inst->resize_sources(2);
|
||||
inst->src[0] = negate(tmp0);
|
||||
inst->src[1] = tmp1;
|
||||
inst->opcode = ELK_OPCODE_ADD;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Lower derivative instructions on platforms where codegen cannot implement
|
||||
* them efficiently (i.e. XeHP).
|
||||
*/
|
||||
bool
|
||||
elk_fs_visitor::lower_derivatives()
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
if (devinfo->verx10 < 125)
|
||||
return false;
|
||||
|
||||
foreach_block_and_inst(block, elk_fs_inst, inst, cfg) {
|
||||
if (inst->opcode == ELK_FS_OPCODE_DDX_COARSE)
|
||||
progress |= lower_derivative(this, block, inst,
|
||||
ELK_SWIZZLE_XXXX, ELK_SWIZZLE_YYYY);
|
||||
|
||||
else if (inst->opcode == ELK_FS_OPCODE_DDX_FINE)
|
||||
progress |= lower_derivative(this, block, inst,
|
||||
ELK_SWIZZLE_XXZZ, ELK_SWIZZLE_YYWW);
|
||||
|
||||
else if (inst->opcode == ELK_FS_OPCODE_DDY_COARSE)
|
||||
progress |= lower_derivative(this, block, inst,
|
||||
ELK_SWIZZLE_XXXX, ELK_SWIZZLE_ZZZZ);
|
||||
|
||||
else if (inst->opcode == ELK_FS_OPCODE_DDY_FINE)
|
||||
progress |= lower_derivative(this, block, inst,
|
||||
ELK_SWIZZLE_XYXY, ELK_SWIZZLE_ZWZW);
|
||||
}
|
||||
|
||||
if (progress)
|
||||
invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
bool
|
||||
elk_fs_visitor::lower_find_live_channel()
|
||||
{
|
||||
|
|
@ -5844,8 +5785,6 @@ elk_fs_visitor::optimize()
|
|||
OPT(opt_algebraic);
|
||||
}
|
||||
|
||||
OPT(fixup_nomask_control_flow);
|
||||
|
||||
if (progress) {
|
||||
if (OPT(opt_copy_propagation))
|
||||
OPT(opt_algebraic);
|
||||
|
|
@ -5897,7 +5836,6 @@ elk_fs_visitor::optimize()
|
|||
}
|
||||
|
||||
progress = false;
|
||||
OPT(lower_derivatives);
|
||||
OPT(lower_regioning);
|
||||
if (progress) {
|
||||
if (OPT(opt_copy_propagation))
|
||||
|
|
@ -5935,152 +5873,6 @@ elk_fs_visitor::fixup_3src_null_dest()
|
|||
DEPENDENCY_VARIABLES);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the first instruction in the program that might start a region of
|
||||
* divergent control flow due to a HALT jump. There is no
|
||||
* find_halt_control_flow_region_end(), the region of divergence extends until
|
||||
* the only ELK_SHADER_OPCODE_HALT_TARGET in the program.
|
||||
*/
|
||||
static const elk_fs_inst *
|
||||
find_halt_control_flow_region_start(const elk_fs_visitor *v)
|
||||
{
|
||||
foreach_block_and_inst(block, elk_fs_inst, inst, v->cfg) {
|
||||
if (inst->opcode == ELK_OPCODE_HALT ||
|
||||
inst->opcode == ELK_SHADER_OPCODE_HALT_TARGET)
|
||||
return inst;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Work around the Gfx12 hardware bug filed as Wa_1407528679. EU fusion
|
||||
* can cause a BB to be executed with all channels disabled, which will lead
|
||||
* to the execution of any NoMask instructions in it, even though any
|
||||
* execution-masked instructions will be correctly shot down. This may break
|
||||
* assumptions of some NoMask SEND messages whose descriptor depends on data
|
||||
* generated by live invocations of the shader.
|
||||
*
|
||||
* This avoids the problem by predicating certain instructions on an ANY
|
||||
* horizontal predicate that makes sure that their execution is omitted when
|
||||
* all channels of the program are disabled.
|
||||
*/
|
||||
bool
|
||||
elk_fs_visitor::fixup_nomask_control_flow()
|
||||
{
|
||||
if (devinfo->ver != 12)
|
||||
return false;
|
||||
|
||||
const elk_predicate pred = dispatch_width > 16 ? ELK_PREDICATE_ALIGN1_ANY32H :
|
||||
dispatch_width > 8 ? ELK_PREDICATE_ALIGN1_ANY16H :
|
||||
ELK_PREDICATE_ALIGN1_ANY8H;
|
||||
const elk_fs_inst *halt_start = find_halt_control_flow_region_start(this);
|
||||
unsigned depth = 0;
|
||||
bool progress = false;
|
||||
|
||||
const fs_live_variables &live_vars = live_analysis.require();
|
||||
|
||||
/* Scan the program backwards in order to be able to easily determine
|
||||
* whether the flag register is live at any point.
|
||||
*/
|
||||
foreach_block_reverse_safe(block, cfg) {
|
||||
BITSET_WORD flag_liveout = live_vars.block_data[block->num]
|
||||
.flag_liveout[0];
|
||||
STATIC_ASSERT(ARRAY_SIZE(live_vars.block_data[0].flag_liveout) == 1);
|
||||
|
||||
foreach_inst_in_block_reverse_safe(elk_fs_inst, inst, block) {
|
||||
if (!inst->predicate && inst->exec_size >= 8)
|
||||
flag_liveout &= ~inst->flags_written(devinfo);
|
||||
|
||||
switch (inst->opcode) {
|
||||
case ELK_OPCODE_DO:
|
||||
case ELK_OPCODE_IF:
|
||||
/* Note that this doesn't handle ELK_OPCODE_HALT since only
|
||||
* the first one in the program closes the region of divergent
|
||||
* control flow due to any HALT instructions -- Instead this is
|
||||
* handled with the halt_start check below.
|
||||
*/
|
||||
depth--;
|
||||
break;
|
||||
|
||||
case ELK_OPCODE_WHILE:
|
||||
case ELK_OPCODE_ENDIF:
|
||||
case ELK_SHADER_OPCODE_HALT_TARGET:
|
||||
depth++;
|
||||
break;
|
||||
|
||||
default:
|
||||
/* Note that the vast majority of NoMask SEND instructions in the
|
||||
* program are harmless while executed in a block with all
|
||||
* channels disabled, since any instructions with side effects we
|
||||
* could hit here should be execution-masked.
|
||||
*
|
||||
* The main concern is NoMask SEND instructions where the message
|
||||
* descriptor or header depends on data generated by live
|
||||
* invocations of the shader (RESINFO and
|
||||
* ELK_FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD with a dynamically
|
||||
* computed surface index seem to be the only examples right now
|
||||
* where this could easily lead to GPU hangs). Unfortunately we
|
||||
* have no straightforward way to detect that currently, so just
|
||||
* predicate any NoMask SEND instructions we find under control
|
||||
* flow.
|
||||
*
|
||||
* If this proves to have a measurable performance impact it can
|
||||
* be easily extended with a whitelist of messages we know we can
|
||||
* safely omit the predication for.
|
||||
*/
|
||||
if (depth && inst->force_writemask_all &&
|
||||
is_send(inst) && !inst->predicate) {
|
||||
/* We need to load the execution mask into the flag register by
|
||||
* using a builder with channel group matching the whole shader
|
||||
* (rather than the default which is derived from the original
|
||||
* instruction), in order to avoid getting a right-shifted
|
||||
* value.
|
||||
*/
|
||||
const fs_builder ubld = fs_builder(this, block, inst)
|
||||
.exec_all().group(dispatch_width, 0);
|
||||
const elk_fs_reg flag = retype(elk_flag_reg(0, 0),
|
||||
ELK_REGISTER_TYPE_UD);
|
||||
|
||||
/* Due to the lack of flag register allocation we need to save
|
||||
* and restore the flag register if it's live.
|
||||
*/
|
||||
const bool save_flag = flag_liveout &
|
||||
flag_mask(flag, dispatch_width / 8);
|
||||
const elk_fs_reg tmp = ubld.group(8, 0).vgrf(flag.type);
|
||||
|
||||
if (save_flag) {
|
||||
ubld.group(8, 0).UNDEF(tmp);
|
||||
ubld.group(1, 0).MOV(tmp, flag);
|
||||
}
|
||||
|
||||
ubld.emit(ELK_FS_OPCODE_LOAD_LIVE_CHANNELS);
|
||||
|
||||
set_predicate(pred, inst);
|
||||
inst->flag_subreg = 0;
|
||||
inst->predicate_trivial = true;
|
||||
|
||||
if (save_flag)
|
||||
ubld.group(1, 0).at(block, inst->next).MOV(flag, tmp);
|
||||
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (inst == halt_start)
|
||||
depth--;
|
||||
|
||||
flag_liveout |= inst->flags_read(devinfo);
|
||||
}
|
||||
}
|
||||
|
||||
if (progress)
|
||||
invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
elk_fs_visitor::compute_max_register_pressure()
|
||||
{
|
||||
|
|
|
|||
|
|
@ -213,7 +213,6 @@ public:
|
|||
void allocate_registers(bool allow_spilling);
|
||||
uint32_t compute_max_register_pressure();
|
||||
void fixup_3src_null_dest();
|
||||
bool fixup_nomask_control_flow();
|
||||
void assign_curb_setup();
|
||||
void assign_urb_setup();
|
||||
void convert_attr_sources_to_hw_regs(elk_fs_inst *inst);
|
||||
|
|
@ -275,7 +274,6 @@ public:
|
|||
bool lower_minmax();
|
||||
bool lower_simd_width();
|
||||
bool lower_barycentrics();
|
||||
bool lower_derivatives();
|
||||
bool lower_find_live_channel();
|
||||
bool lower_scoreboard();
|
||||
bool lower_sub_sat();
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue