From 9f5213923e7d35274c1a9b481717be8c501653d2 Mon Sep 17 00:00:00 2001 From: Caio Oliveira Date: Tue, 13 Feb 2024 12:58:15 -0800 Subject: [PATCH] intel/elk: Remove Gfx9+-only passes Reviewed-by: Lionel Landwerlin Part-of: --- src/intel/compiler/elk/elk_fs.cpp | 208 ------------------------------ src/intel/compiler/elk/elk_fs.h | 2 - 2 files changed, 210 deletions(-) diff --git a/src/intel/compiler/elk/elk_fs.cpp b/src/intel/compiler/elk/elk_fs.cpp index d787529d975..600e0360575 100644 --- a/src/intel/compiler/elk/elk_fs.cpp +++ b/src/intel/compiler/elk/elk_fs.cpp @@ -5311,65 +5311,6 @@ elk_fs_visitor::lower_barycentrics() return progress; } -/** - * Lower a derivative instruction as the floating-point difference of two - * swizzles of the source, specified as \p swz0 and \p swz1. - */ -static bool -lower_derivative(elk_fs_visitor *v, elk_bblock_t *block, elk_fs_inst *inst, - unsigned swz0, unsigned swz1) -{ - const fs_builder ubld = fs_builder(v, block, inst).exec_all(); - const elk_fs_reg tmp0 = ubld.vgrf(inst->src[0].type); - const elk_fs_reg tmp1 = ubld.vgrf(inst->src[0].type); - - ubld.emit(ELK_SHADER_OPCODE_QUAD_SWIZZLE, tmp0, inst->src[0], elk_imm_ud(swz0)); - ubld.emit(ELK_SHADER_OPCODE_QUAD_SWIZZLE, tmp1, inst->src[0], elk_imm_ud(swz1)); - - inst->resize_sources(2); - inst->src[0] = negate(tmp0); - inst->src[1] = tmp1; - inst->opcode = ELK_OPCODE_ADD; - - return true; -} - -/** - * Lower derivative instructions on platforms where codegen cannot implement - * them efficiently (i.e. XeHP). - */ -bool -elk_fs_visitor::lower_derivatives() -{ - bool progress = false; - - if (devinfo->verx10 < 125) - return false; - - foreach_block_and_inst(block, elk_fs_inst, inst, cfg) { - if (inst->opcode == ELK_FS_OPCODE_DDX_COARSE) - progress |= lower_derivative(this, block, inst, - ELK_SWIZZLE_XXXX, ELK_SWIZZLE_YYYY); - - else if (inst->opcode == ELK_FS_OPCODE_DDX_FINE) - progress |= lower_derivative(this, block, inst, - ELK_SWIZZLE_XXZZ, ELK_SWIZZLE_YYWW); - - else if (inst->opcode == ELK_FS_OPCODE_DDY_COARSE) - progress |= lower_derivative(this, block, inst, - ELK_SWIZZLE_XXXX, ELK_SWIZZLE_ZZZZ); - - else if (inst->opcode == ELK_FS_OPCODE_DDY_FINE) - progress |= lower_derivative(this, block, inst, - ELK_SWIZZLE_XYXY, ELK_SWIZZLE_ZWZW); - } - - if (progress) - invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES); - - return progress; -} - bool elk_fs_visitor::lower_find_live_channel() { @@ -5844,8 +5785,6 @@ elk_fs_visitor::optimize() OPT(opt_algebraic); } - OPT(fixup_nomask_control_flow); - if (progress) { if (OPT(opt_copy_propagation)) OPT(opt_algebraic); @@ -5897,7 +5836,6 @@ elk_fs_visitor::optimize() } progress = false; - OPT(lower_derivatives); OPT(lower_regioning); if (progress) { if (OPT(opt_copy_propagation)) @@ -5935,152 +5873,6 @@ elk_fs_visitor::fixup_3src_null_dest() DEPENDENCY_VARIABLES); } -/** - * Find the first instruction in the program that might start a region of - * divergent control flow due to a HALT jump. There is no - * find_halt_control_flow_region_end(), the region of divergence extends until - * the only ELK_SHADER_OPCODE_HALT_TARGET in the program. - */ -static const elk_fs_inst * -find_halt_control_flow_region_start(const elk_fs_visitor *v) -{ - foreach_block_and_inst(block, elk_fs_inst, inst, v->cfg) { - if (inst->opcode == ELK_OPCODE_HALT || - inst->opcode == ELK_SHADER_OPCODE_HALT_TARGET) - return inst; - } - - return NULL; -} - -/** - * Work around the Gfx12 hardware bug filed as Wa_1407528679. EU fusion - * can cause a BB to be executed with all channels disabled, which will lead - * to the execution of any NoMask instructions in it, even though any - * execution-masked instructions will be correctly shot down. This may break - * assumptions of some NoMask SEND messages whose descriptor depends on data - * generated by live invocations of the shader. - * - * This avoids the problem by predicating certain instructions on an ANY - * horizontal predicate that makes sure that their execution is omitted when - * all channels of the program are disabled. - */ -bool -elk_fs_visitor::fixup_nomask_control_flow() -{ - if (devinfo->ver != 12) - return false; - - const elk_predicate pred = dispatch_width > 16 ? ELK_PREDICATE_ALIGN1_ANY32H : - dispatch_width > 8 ? ELK_PREDICATE_ALIGN1_ANY16H : - ELK_PREDICATE_ALIGN1_ANY8H; - const elk_fs_inst *halt_start = find_halt_control_flow_region_start(this); - unsigned depth = 0; - bool progress = false; - - const fs_live_variables &live_vars = live_analysis.require(); - - /* Scan the program backwards in order to be able to easily determine - * whether the flag register is live at any point. - */ - foreach_block_reverse_safe(block, cfg) { - BITSET_WORD flag_liveout = live_vars.block_data[block->num] - .flag_liveout[0]; - STATIC_ASSERT(ARRAY_SIZE(live_vars.block_data[0].flag_liveout) == 1); - - foreach_inst_in_block_reverse_safe(elk_fs_inst, inst, block) { - if (!inst->predicate && inst->exec_size >= 8) - flag_liveout &= ~inst->flags_written(devinfo); - - switch (inst->opcode) { - case ELK_OPCODE_DO: - case ELK_OPCODE_IF: - /* Note that this doesn't handle ELK_OPCODE_HALT since only - * the first one in the program closes the region of divergent - * control flow due to any HALT instructions -- Instead this is - * handled with the halt_start check below. - */ - depth--; - break; - - case ELK_OPCODE_WHILE: - case ELK_OPCODE_ENDIF: - case ELK_SHADER_OPCODE_HALT_TARGET: - depth++; - break; - - default: - /* Note that the vast majority of NoMask SEND instructions in the - * program are harmless while executed in a block with all - * channels disabled, since any instructions with side effects we - * could hit here should be execution-masked. - * - * The main concern is NoMask SEND instructions where the message - * descriptor or header depends on data generated by live - * invocations of the shader (RESINFO and - * ELK_FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD with a dynamically - * computed surface index seem to be the only examples right now - * where this could easily lead to GPU hangs). Unfortunately we - * have no straightforward way to detect that currently, so just - * predicate any NoMask SEND instructions we find under control - * flow. - * - * If this proves to have a measurable performance impact it can - * be easily extended with a whitelist of messages we know we can - * safely omit the predication for. - */ - if (depth && inst->force_writemask_all && - is_send(inst) && !inst->predicate) { - /* We need to load the execution mask into the flag register by - * using a builder with channel group matching the whole shader - * (rather than the default which is derived from the original - * instruction), in order to avoid getting a right-shifted - * value. - */ - const fs_builder ubld = fs_builder(this, block, inst) - .exec_all().group(dispatch_width, 0); - const elk_fs_reg flag = retype(elk_flag_reg(0, 0), - ELK_REGISTER_TYPE_UD); - - /* Due to the lack of flag register allocation we need to save - * and restore the flag register if it's live. - */ - const bool save_flag = flag_liveout & - flag_mask(flag, dispatch_width / 8); - const elk_fs_reg tmp = ubld.group(8, 0).vgrf(flag.type); - - if (save_flag) { - ubld.group(8, 0).UNDEF(tmp); - ubld.group(1, 0).MOV(tmp, flag); - } - - ubld.emit(ELK_FS_OPCODE_LOAD_LIVE_CHANNELS); - - set_predicate(pred, inst); - inst->flag_subreg = 0; - inst->predicate_trivial = true; - - if (save_flag) - ubld.group(1, 0).at(block, inst->next).MOV(flag, tmp); - - progress = true; - } - break; - } - - if (inst == halt_start) - depth--; - - flag_liveout |= inst->flags_read(devinfo); - } - } - - if (progress) - invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES); - - return progress; -} - uint32_t elk_fs_visitor::compute_max_register_pressure() { diff --git a/src/intel/compiler/elk/elk_fs.h b/src/intel/compiler/elk/elk_fs.h index 7c5642cc52d..b138556dda2 100644 --- a/src/intel/compiler/elk/elk_fs.h +++ b/src/intel/compiler/elk/elk_fs.h @@ -213,7 +213,6 @@ public: void allocate_registers(bool allow_spilling); uint32_t compute_max_register_pressure(); void fixup_3src_null_dest(); - bool fixup_nomask_control_flow(); void assign_curb_setup(); void assign_urb_setup(); void convert_attr_sources_to_hw_regs(elk_fs_inst *inst); @@ -275,7 +274,6 @@ public: bool lower_minmax(); bool lower_simd_width(); bool lower_barycentrics(); - bool lower_derivatives(); bool lower_find_live_channel(); bool lower_scoreboard(); bool lower_sub_sat();