intel/brw: Remove 'fs' prefix from passes and related functions

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32813>
This commit is contained in:
Caio Oliveira 2024-12-06 11:37:57 -08:00 committed by Marge Bot
parent 25384dccc0
commit e1aebf8a0c
40 changed files with 228 additions and 231 deletions

View file

@ -45,17 +45,17 @@ run_bs(fs_visitor &s, bool allow_spilling)
brw_calculate_cfg(s);
brw_fs_optimize(s);
brw_optimize(s);
s.assign_curb_setup();
brw_fs_lower_3src_null_dest(s);
brw_fs_workaround_memory_fence_before_eot(s);
brw_fs_workaround_emit_dummy_mov_instruction(s);
brw_lower_3src_null_dest(s);
brw_workaround_memory_fence_before_eot(s);
brw_workaround_emit_dummy_mov_instruction(s);
brw_allocate_registers(s, allow_spilling);
brw_fs_workaround_source_arf_before_eot(s);
brw_workaround_source_arf_before_eot(s);
return !s.failed;
}

View file

@ -83,17 +83,17 @@ run_cs(fs_visitor &s, bool allow_spilling)
brw_calculate_cfg(s);
brw_fs_optimize(s);
brw_optimize(s);
s.assign_curb_setup();
brw_fs_lower_3src_null_dest(s);
brw_fs_workaround_memory_fence_before_eot(s);
brw_fs_workaround_emit_dummy_mov_instruction(s);
brw_lower_3src_null_dest(s);
brw_workaround_memory_fence_before_eot(s);
brw_workaround_emit_dummy_mov_instruction(s);
brw_allocate_registers(s, allow_spilling);
brw_fs_workaround_source_arf_before_eot(s);
brw_workaround_source_arf_before_eot(s);
return !s.failed;
}

View file

@ -648,7 +648,7 @@ brw_emit_repclear_shader(fs_visitor &s)
s.first_non_payload_grf = s.payload().num_regs;
brw_fs_lower_scoreboard(s);
brw_lower_scoreboard(s);
}
/**
@ -1493,7 +1493,7 @@ run_fs(fs_visitor &s, bool allow_spilling, bool do_rep_send)
brw_calculate_cfg(s);
brw_fs_optimize(s);
brw_optimize(s);
s.assign_curb_setup();
@ -1502,13 +1502,13 @@ run_fs(fs_visitor &s, bool allow_spilling, bool do_rep_send)
brw_assign_urb_setup(s);
brw_fs_lower_3src_null_dest(s);
brw_fs_workaround_memory_fence_before_eot(s);
brw_fs_workaround_emit_dummy_mov_instruction(s);
brw_lower_3src_null_dest(s);
brw_workaround_memory_fence_before_eot(s);
brw_workaround_emit_dummy_mov_instruction(s);
brw_allocate_registers(s, allow_spilling);
brw_fs_workaround_source_arf_before_eot(s);
brw_workaround_source_arf_before_eot(s);
}
return !s.failed;

View file

@ -118,18 +118,18 @@ run_gs(fs_visitor &s)
brw_calculate_cfg(s);
brw_fs_optimize(s);
brw_optimize(s);
s.assign_curb_setup();
brw_assign_gs_urb_setup(s);
brw_fs_lower_3src_null_dest(s);
brw_fs_workaround_memory_fence_before_eot(s);
brw_fs_workaround_emit_dummy_mov_instruction(s);
brw_lower_3src_null_dest(s);
brw_workaround_memory_fence_before_eot(s);
brw_workaround_emit_dummy_mov_instruction(s);
brw_allocate_registers(s, true /* allow_spilling */);
brw_fs_workaround_source_arf_before_eot(s);
brw_workaround_source_arf_before_eot(s);
return !s.failed;
}

View file

@ -310,17 +310,17 @@ run_task_mesh(fs_visitor &s, bool allow_spilling)
brw_calculate_cfg(s);
brw_fs_optimize(s);
brw_optimize(s);
s.assign_curb_setup();
brw_fs_lower_3src_null_dest(s);
brw_fs_workaround_memory_fence_before_eot(s);
brw_fs_workaround_emit_dummy_mov_instruction(s);
brw_lower_3src_null_dest(s);
brw_workaround_memory_fence_before_eot(s);
brw_workaround_emit_dummy_mov_instruction(s);
brw_allocate_registers(s, allow_spilling);
brw_fs_workaround_source_arf_before_eot(s);
brw_workaround_source_arf_before_eot(s);
return !s.failed;
}

View file

@ -166,18 +166,18 @@ run_tcs(fs_visitor &s)
brw_calculate_cfg(s);
brw_fs_optimize(s);
brw_optimize(s);
s.assign_curb_setup();
brw_assign_tcs_urb_setup(s);
brw_fs_lower_3src_null_dest(s);
brw_fs_workaround_memory_fence_before_eot(s);
brw_fs_workaround_emit_dummy_mov_instruction(s);
brw_lower_3src_null_dest(s);
brw_workaround_memory_fence_before_eot(s);
brw_workaround_emit_dummy_mov_instruction(s);
brw_allocate_registers(s, true /* allow_spilling */);
brw_fs_workaround_source_arf_before_eot(s);
brw_workaround_source_arf_before_eot(s);
return !s.failed;
}

View file

@ -42,18 +42,18 @@ run_tes(fs_visitor &s)
brw_calculate_cfg(s);
brw_fs_optimize(s);
brw_optimize(s);
s.assign_curb_setup();
brw_assign_tes_urb_setup(s);
brw_fs_lower_3src_null_dest(s);
brw_fs_workaround_memory_fence_before_eot(s);
brw_fs_workaround_emit_dummy_mov_instruction(s);
brw_lower_3src_null_dest(s);
brw_workaround_memory_fence_before_eot(s);
brw_workaround_emit_dummy_mov_instruction(s);
brw_allocate_registers(s, true /* allow_spilling */);
brw_fs_workaround_source_arf_before_eot(s);
brw_workaround_source_arf_before_eot(s);
return !s.failed;
}

View file

@ -45,18 +45,18 @@ run_vs(fs_visitor &s)
brw_calculate_cfg(s);
brw_fs_optimize(s);
brw_optimize(s);
s.assign_curb_setup();
brw_assign_vs_urb_setup(s);
brw_fs_lower_3src_null_dest(s);
brw_fs_workaround_memory_fence_before_eot(s);
brw_fs_workaround_emit_dummy_mov_instruction(s);
brw_lower_3src_null_dest(s);
brw_workaround_memory_fence_before_eot(s);
brw_workaround_emit_dummy_mov_instruction(s);
brw_allocate_registers(s, true /* allow_spilling */);
brw_fs_workaround_source_arf_before_eot(s);
brw_workaround_source_arf_before_eot(s);
return !s.failed;
}

View file

@ -566,7 +566,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
!(src0.file == IMM && src2.file == IMM));
} else {
/* Having two immediate sources is allowed, but this should have been
* converted to a regular ADD by brw_fs_opt_algebraic.
* converted to a regular ADD by brw_opt_algebraic.
*/
assert(opcode != BRW_OPCODE_ADD3 ||
!(src0.file == IMM && src2.file == IMM));

View file

@ -1440,7 +1440,7 @@ brw_allocate_registers(fs_visitor &s, bool allow_spilling)
uint32_t best_register_pressure = UINT32_MAX;
enum instruction_scheduler_mode best_sched = SCHEDULE_NONE;
brw_fs_opt_compact_virtual_grfs(s);
brw_opt_compact_virtual_grfs(s);
if (s.needs_register_pressure)
s.shader_stats.max_register_pressure = brw_compute_max_register_pressure(s);
@ -1538,7 +1538,7 @@ brw_allocate_registers(fs_visitor &s, bool allow_spilling)
s.debug_optimizer(nir, "post_ra_alloc", 96, pass_num++);
brw_fs_opt_bank_conflicts(s);
brw_opt_bank_conflicts(s);
s.debug_optimizer(nir, "bank_conflict", 96, pass_num++);
@ -1554,7 +1554,7 @@ brw_allocate_registers(fs_visitor &s, bool allow_spilling)
* TODO: Change the passes above, then move this lowering to be part of
* assign_regs.
*/
brw_fs_lower_vgrfs_to_fixed_grfs(s);
brw_lower_vgrfs_to_fixed_grfs(s);
s.debug_optimizer(nir, "lowered_vgrfs_to_fixed_grfs", 96, pass_num++);
@ -1586,7 +1586,7 @@ brw_allocate_registers(fs_visitor &s, bool allow_spilling)
if (s.failed)
return;
brw_fs_lower_scoreboard(s);
brw_lower_scoreboard(s);
s.debug_optimizer(nir, "scoreboard", 96, pass_num++);
}

View file

@ -614,7 +614,7 @@ static inline void brw_fs_validate(const fs_visitor &s) {}
void brw_calculate_cfg(fs_visitor &s);
void brw_fs_optimize(fs_visitor &s);
void brw_optimize(fs_visitor &s);
instruction_scheduler *brw_prepare_scheduler(fs_visitor &s, void *mem_ctx);
void brw_schedule_instructions_pre_ra(fs_visitor &s, instruction_scheduler *sched,
@ -625,55 +625,55 @@ void brw_allocate_registers(fs_visitor &s, bool allow_spilling);
bool brw_assign_regs(fs_visitor &s, bool allow_spilling, bool spill_all);
void brw_assign_regs_trivial(fs_visitor &s);
bool brw_fs_lower_3src_null_dest(fs_visitor &s);
bool brw_fs_lower_alu_restrictions(fs_visitor &s);
bool brw_fs_lower_barycentrics(fs_visitor &s);
bool brw_fs_lower_constant_loads(fs_visitor &s);
bool brw_fs_lower_derivatives(fs_visitor &s);
bool brw_fs_lower_dpas(fs_visitor &s);
bool brw_fs_lower_find_live_channel(fs_visitor &s);
bool brw_fs_lower_integer_multiplication(fs_visitor &s);
bool brw_fs_lower_load_subgroup_invocation(fs_visitor &s);
bool brw_fs_lower_indirect_mov(fs_visitor &s);
bool brw_fs_lower_logical_sends(fs_visitor &s);
bool brw_fs_lower_pack(fs_visitor &s);
bool brw_fs_lower_load_payload(fs_visitor &s);
bool brw_fs_lower_regioning(fs_visitor &s);
bool brw_lower_3src_null_dest(fs_visitor &s);
bool brw_lower_alu_restrictions(fs_visitor &s);
bool brw_lower_barycentrics(fs_visitor &s);
bool brw_lower_constant_loads(fs_visitor &s);
bool brw_lower_csel(fs_visitor &s);
bool brw_lower_derivatives(fs_visitor &s);
bool brw_lower_dpas(fs_visitor &s);
bool brw_lower_find_live_channel(fs_visitor &s);
bool brw_lower_indirect_mov(fs_visitor &s);
bool brw_lower_integer_multiplication(fs_visitor &s);
bool brw_lower_load_payload(fs_visitor &s);
bool brw_lower_load_subgroup_invocation(fs_visitor &s);
bool brw_lower_logical_sends(fs_visitor &s);
bool brw_lower_pack(fs_visitor &s);
bool brw_lower_regioning(fs_visitor &s);
bool brw_lower_scalar_fp64_MAD(fs_visitor &s);
bool brw_fs_lower_scoreboard(fs_visitor &s);
bool brw_fs_lower_sends_overlapping_payload(fs_visitor &s);
bool brw_fs_lower_simd_width(fs_visitor &s);
bool brw_fs_lower_csel(fs_visitor &s);
bool brw_fs_lower_sub_sat(fs_visitor &s);
bool brw_fs_lower_subgroup_ops(fs_visitor &s);
bool brw_fs_lower_uniform_pull_constant_loads(fs_visitor &s);
void brw_fs_lower_vgrfs_to_fixed_grfs(fs_visitor &s);
bool brw_lower_scoreboard(fs_visitor &s);
bool brw_lower_sends_overlapping_payload(fs_visitor &s);
bool brw_lower_simd_width(fs_visitor &s);
bool brw_lower_sub_sat(fs_visitor &s);
bool brw_lower_subgroup_ops(fs_visitor &s);
bool brw_lower_uniform_pull_constant_loads(fs_visitor &s);
void brw_lower_vgrfs_to_fixed_grfs(fs_visitor &s);
bool brw_constant_fold_instruction(const intel_device_info *devinfo, fs_inst *inst);
bool brw_fs_opt_algebraic(fs_visitor &s);
bool brw_fs_opt_bank_conflicts(fs_visitor &s);
bool brw_fs_opt_cmod_propagation(fs_visitor &s);
bool brw_fs_opt_combine_constants(fs_visitor &s);
bool brw_fs_opt_compact_virtual_grfs(fs_visitor &s);
bool brw_fs_opt_copy_propagation(fs_visitor &s);
bool brw_fs_opt_copy_propagation_defs(fs_visitor &s);
bool brw_fs_opt_cse_defs(fs_visitor &s);
bool brw_fs_opt_dead_code_eliminate(fs_visitor &s);
bool brw_fs_opt_eliminate_find_live_channel(fs_visitor &s);
bool brw_fs_opt_register_coalesce(fs_visitor &s);
bool brw_fs_opt_remove_extra_rounding_modes(fs_visitor &s);
bool brw_fs_opt_remove_redundant_halts(fs_visitor &s);
bool brw_fs_opt_saturate_propagation(fs_visitor &s);
bool brw_fs_opt_split_sends(fs_visitor &s);
bool brw_fs_opt_split_virtual_grfs(fs_visitor &s);
bool brw_fs_opt_zero_samples(fs_visitor &s);
bool brw_opt_algebraic(fs_visitor &s);
bool brw_opt_bank_conflicts(fs_visitor &s);
bool brw_opt_cmod_propagation(fs_visitor &s);
bool brw_opt_combine_constants(fs_visitor &s);
bool brw_opt_combine_convergent_txf(fs_visitor &s);
bool brw_opt_compact_virtual_grfs(fs_visitor &s);
bool brw_opt_constant_fold_instruction(const intel_device_info *devinfo, fs_inst *inst);
bool brw_opt_copy_propagation(fs_visitor &s);
bool brw_opt_copy_propagation_defs(fs_visitor &s);
bool brw_opt_cse_defs(fs_visitor &s);
bool brw_opt_dead_code_eliminate(fs_visitor &s);
bool brw_opt_eliminate_find_live_channel(fs_visitor &s);
bool brw_opt_register_coalesce(fs_visitor &s);
bool brw_opt_remove_extra_rounding_modes(fs_visitor &s);
bool brw_opt_remove_redundant_halts(fs_visitor &s);
bool brw_opt_saturate_propagation(fs_visitor &s);
bool brw_opt_split_sends(fs_visitor &s);
bool brw_opt_split_virtual_grfs(fs_visitor &s);
bool brw_opt_zero_samples(fs_visitor &s);
bool brw_fs_workaround_emit_dummy_mov_instruction(fs_visitor &s);
bool brw_fs_workaround_memory_fence_before_eot(fs_visitor &s);
bool brw_fs_workaround_source_arf_before_eot(fs_visitor &s);
bool brw_fs_workaround_nomask_control_flow(fs_visitor &s);
bool brw_workaround_emit_dummy_mov_instruction(fs_visitor &s);
bool brw_workaround_memory_fence_before_eot(fs_visitor &s);
bool brw_workaround_nomask_control_flow(fs_visitor &s);
bool brw_workaround_source_arf_before_eot(fs_visitor &s);
/* Helpers. */
unsigned brw_fs_get_lowered_simd_width(const fs_visitor *shader,
const fs_inst *inst);
unsigned brw_get_lowered_simd_width(const fs_visitor *shader,
const fs_inst *inst);

View file

@ -616,8 +616,8 @@ optimize_extract_to_float(nir_to_brw_state &ntb, const fs_builder &bld,
* are not supported on Src0 and Src1 except for broadcast of a
* scalar."
*
* This restriction is enfored in brw_fs_lower_regioning. There is no
* reason to generate an optimized instruction that brw_fs_lower_regioning
* This restriction is enfored in brw_lower_regioning. There is no
* reason to generate an optimized instruction that brw_lower_regioning
* will have to break up later.
*/
if (devinfo->verx10 >= 125 && element != 0 && !is_uniform(op0))

View file

@ -54,7 +54,7 @@ brw_lower_scalar_fp64_MAD(fs_visitor &s)
* or VARYING_PULL_CONSTANT_LOAD instructions which load values into VGRFs.
*/
bool
brw_fs_lower_constant_loads(fs_visitor &s)
brw_lower_constant_loads(fs_visitor &s)
{
unsigned index, pull_index;
bool progress = false;
@ -121,7 +121,7 @@ brw_fs_lower_constant_loads(fs_visitor &s)
}
bool
brw_fs_lower_load_payload(fs_visitor &s)
brw_lower_load_payload(fs_visitor &s)
{
bool progress = false;
@ -177,7 +177,7 @@ brw_fs_lower_load_payload(fs_visitor &s)
* Or, for unsigned ==/!= comparisons, simply change the types.
*/
bool
brw_fs_lower_csel(fs_visitor &s)
brw_lower_csel(fs_visitor &s)
{
const intel_device_info *devinfo = s.devinfo;
bool progress = false;
@ -253,7 +253,7 @@ brw_fs_lower_csel(fs_visitor &s)
}
bool
brw_fs_lower_sub_sat(fs_visitor &s)
brw_lower_sub_sat(fs_visitor &s)
{
bool progress = false;
@ -353,7 +353,7 @@ brw_fs_lower_sub_sat(fs_visitor &s)
* component layout.
*/
bool
brw_fs_lower_barycentrics(fs_visitor &s)
brw_lower_barycentrics(fs_visitor &s)
{
const intel_device_info *devinfo = s.devinfo;
@ -446,7 +446,7 @@ lower_derivative(fs_visitor &s, bblock_t *block, fs_inst *inst,
* them efficiently (i.e. XeHP).
*/
bool
brw_fs_lower_derivatives(fs_visitor &s)
brw_lower_derivatives(fs_visitor &s)
{
bool progress = false;
@ -478,7 +478,7 @@ brw_fs_lower_derivatives(fs_visitor &s)
}
bool
brw_fs_lower_find_live_channel(fs_visitor &s)
brw_lower_find_live_channel(fs_visitor &s)
{
bool progress = false;
@ -583,7 +583,7 @@ brw_fs_lower_find_live_channel(fs_visitor &s)
* just adds a new vgrf for the second payload and copies it over.
*/
bool
brw_fs_lower_sends_overlapping_payload(fs_visitor &s)
brw_lower_sends_overlapping_payload(fs_visitor &s)
{
bool progress = false;
@ -629,7 +629,7 @@ brw_fs_lower_sends_overlapping_payload(fs_visitor &s)
* ARF NULL is not allowed. Fix that up by allocating a temporary GRF.
*/
bool
brw_fs_lower_3src_null_dest(fs_visitor &s)
brw_lower_3src_null_dest(fs_visitor &s)
{
bool progress = false;
@ -664,7 +664,7 @@ unsupported_64bit_type(const intel_device_info *devinfo,
* - Splitting 64-bit MOV/SEL into 2x32-bit where needed
*/
bool
brw_fs_lower_alu_restrictions(fs_visitor &s)
brw_lower_alu_restrictions(fs_visitor &s)
{
const intel_device_info *devinfo = s.devinfo;
bool progress = false;
@ -736,8 +736,8 @@ brw_fs_lower_alu_restrictions(fs_visitor &s)
}
static void
brw_fs_lower_vgrf_to_fixed_grf(const struct intel_device_info *devinfo, fs_inst *inst,
brw_reg *reg, bool compressed)
brw_lower_vgrf_to_fixed_grf(const struct intel_device_info *devinfo, fs_inst *inst,
brw_reg *reg, bool compressed)
{
if (reg->file != VGRF)
return;
@ -796,7 +796,7 @@ brw_fs_lower_vgrf_to_fixed_grf(const struct intel_device_info *devinfo, fs_inst
}
void
brw_fs_lower_vgrfs_to_fixed_grfs(fs_visitor &s)
brw_lower_vgrfs_to_fixed_grfs(fs_visitor &s)
{
assert(s.grf_used || !"Must be called after register allocation");
@ -818,9 +818,9 @@ brw_fs_lower_vgrfs_to_fixed_grfs(fs_visitor &s)
const bool compressed =
inst->dst.component_size(inst->exec_size) > REG_SIZE;
brw_fs_lower_vgrf_to_fixed_grf(s.devinfo, inst, &inst->dst, compressed);
brw_lower_vgrf_to_fixed_grf(s.devinfo, inst, &inst->dst, compressed);
for (int i = 0; i < inst->sources; i++) {
brw_fs_lower_vgrf_to_fixed_grf(s.devinfo, inst, &inst->src[i], compressed);
brw_lower_vgrf_to_fixed_grf(s.devinfo, inst, &inst->src[i], compressed);
}
}
@ -829,7 +829,7 @@ brw_fs_lower_vgrfs_to_fixed_grfs(fs_visitor &s)
}
bool
brw_fs_lower_load_subgroup_invocation(fs_visitor &s)
brw_lower_load_subgroup_invocation(fs_visitor &s)
{
bool progress = false;
@ -868,7 +868,7 @@ brw_fs_lower_load_subgroup_invocation(fs_visitor &s)
}
bool
brw_fs_lower_indirect_mov(fs_visitor &s)
brw_lower_indirect_mov(fs_visitor &s)
{
bool progress = false;

View file

@ -271,7 +271,7 @@ int8_using_mul_add(const fs_builder &bld, fs_inst *inst)
}
bool
brw_fs_lower_dpas(fs_visitor &v)
brw_lower_dpas(fs_visitor &v)
{
bool progress = false;

View file

@ -139,7 +139,7 @@ factor_uint32(uint32_t x, unsigned *result_a, unsigned *result_b)
}
static void
brw_fs_lower_mul_dword_inst(fs_visitor &s, fs_inst *inst, bblock_t *block)
brw_lower_mul_dword_inst(fs_visitor &s, fs_inst *inst, bblock_t *block)
{
const intel_device_info *devinfo = s.devinfo;
const fs_builder ibld(&s, block, inst);
@ -301,7 +301,7 @@ brw_fs_lower_mul_dword_inst(fs_visitor &s, fs_inst *inst, bblock_t *block)
}
static void
brw_fs_lower_mul_qword_inst(fs_visitor &s, fs_inst *inst, bblock_t *block)
brw_lower_mul_qword_inst(fs_visitor &s, fs_inst *inst, bblock_t *block)
{
const intel_device_info *devinfo = s.devinfo;
const fs_builder ibld(&s, block, inst);
@ -370,7 +370,7 @@ brw_fs_lower_mul_qword_inst(fs_visitor &s, fs_inst *inst, bblock_t *block)
}
static void
brw_fs_lower_mulh_inst(fs_visitor &s, fs_inst *inst, bblock_t *block)
brw_lower_mulh_inst(fs_visitor &s, fs_inst *inst, bblock_t *block)
{
const intel_device_info *devinfo = s.devinfo;
const fs_builder ibld(&s, block, inst);
@ -388,7 +388,7 @@ brw_fs_lower_mulh_inst(fs_visitor &s, fs_inst *inst, bblock_t *block)
lower_src_modifiers(&s, block, inst, 1);
/* Should have been lowered to 8-wide. */
assert(inst->exec_size <= brw_fs_get_lowered_simd_width(&s, inst));
assert(inst->exec_size <= brw_get_lowered_simd_width(&s, inst));
const unsigned acc_width = reg_unit(devinfo) * 8;
const brw_reg acc = suboffset(retype(brw_acc_reg(inst->exec_size), inst->dst.type),
inst->group % acc_width);
@ -414,7 +414,7 @@ brw_fs_lower_mulh_inst(fs_visitor &s, fs_inst *inst, bblock_t *block)
}
bool
brw_fs_lower_integer_multiplication(fs_visitor &s)
brw_lower_integer_multiplication(fs_visitor &s)
{
const intel_device_info *devinfo = s.devinfo;
bool progress = false;
@ -433,7 +433,7 @@ brw_fs_lower_integer_multiplication(fs_visitor &s)
inst->src[0].type == BRW_TYPE_UQ) &&
(inst->src[1].type == BRW_TYPE_Q ||
inst->src[1].type == BRW_TYPE_UQ)) {
brw_fs_lower_mul_qword_inst(s, inst, block);
brw_lower_mul_qword_inst(s, inst, block);
inst->remove(block);
progress = true;
} else if (!inst->dst.is_accumulator() &&
@ -441,12 +441,12 @@ brw_fs_lower_integer_multiplication(fs_visitor &s)
inst->dst.type == BRW_TYPE_UD) &&
(!devinfo->has_integer_dword_mul ||
devinfo->verx10 >= 125)) {
brw_fs_lower_mul_dword_inst(s, inst, block);
brw_lower_mul_dword_inst(s, inst, block);
inst->remove(block);
progress = true;
}
} else if (inst->opcode == SHADER_OPCODE_MULH) {
brw_fs_lower_mulh_inst(s, inst, block);
brw_lower_mulh_inst(s, inst, block);
inst->remove(block);
progress = true;
}

View file

@ -2388,7 +2388,7 @@ lower_get_buffer_size(const fs_builder &bld, fs_inst *inst)
}
bool
brw_fs_lower_logical_sends(fs_visitor &s)
brw_lower_logical_sends(fs_visitor &s)
{
const intel_device_info *devinfo = s.devinfo;
bool progress = false;
@ -2515,7 +2515,7 @@ brw_fs_lower_logical_sends(fs_visitor &s)
* source operand for all 8 or 16 of its channels.
*/
bool
brw_fs_lower_uniform_pull_constant_loads(fs_visitor &s)
brw_lower_uniform_pull_constant_loads(fs_visitor &s)
{
const intel_device_info *devinfo = s.devinfo;
bool progress = false;

View file

@ -29,7 +29,7 @@
using namespace brw;
bool
brw_fs_lower_pack(fs_visitor &s)
brw_lower_pack(fs_visitor &s)
{
bool progress = false;

View file

@ -803,7 +803,7 @@ namespace {
}
bool
brw_fs_lower_regioning(fs_visitor &s)
brw_lower_regioning(fs_visitor &s)
{
bool progress = false;

View file

@ -1342,7 +1342,7 @@ namespace {
}
bool
brw_fs_lower_scoreboard(fs_visitor &s)
brw_lower_scoreboard(fs_visitor &s)
{
if (s.devinfo->ver >= 12) {
const ordered_address *jps = ordered_inst_addresses(&s);

View file

@ -242,7 +242,7 @@ is_half_float_src_dst(const fs_inst *inst)
* original execution size.
*/
unsigned
brw_fs_get_lowered_simd_width(const fs_visitor *shader, const fs_inst *inst)
brw_get_lowered_simd_width(const fs_visitor *shader, const fs_inst *inst)
{
const struct brw_compiler *compiler = shader->compiler;
const struct intel_device_info *devinfo = compiler->devinfo;
@ -644,12 +644,12 @@ emit_zip(const fs_builder &lbld_before, const fs_builder &lbld_after,
}
bool
brw_fs_lower_simd_width(fs_visitor &s)
brw_lower_simd_width(fs_visitor &s)
{
bool progress = false;
foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) {
const unsigned lower_width = brw_fs_get_lowered_simd_width(&s, inst);
const unsigned lower_width = brw_get_lowered_simd_width(&s, inst);
/* No splitting required */
if (lower_width == inst->exec_size)

View file

@ -647,7 +647,7 @@ brw_lower_read_from_channel(fs_visitor &s, bblock_t *block, fs_inst *inst)
}
bool
brw_fs_lower_subgroup_ops(fs_visitor &s)
brw_lower_subgroup_ops(fs_visitor &s)
{
bool progress = false;

View file

@ -9,13 +9,13 @@
* profitable.
*
* Intel GPUs have a saturate destination modifier, and
* brw_fs_opt_saturate_propagation tries to replace explicit saturate
* brw_opt_saturate_propagation tries to replace explicit saturate
* operations with this destination modifier. That pass is limited in several
* ways. If the source of the explicit saturate is in a different block or if
* the source of the explicit saturate is live after the explicit saturate,
* brw_fs_opt_saturate_propagation will be unable to make progress.
* brw_opt_saturate_propagation will be unable to make progress.
*
* This optimization exists to help brw_fs_opt_saturate_propagation make more
* This optimization exists to help brw_opt_saturate_propagation make more
* progress. It tries to move NIR fsat instructions to the same block that
* contains the definition of its source. It does this only in cases where it
* will not create additional live values. It also attempts to do this only in
@ -95,7 +95,7 @@ collect_reaching_defs(nir_alu_instr *fsat, nir_instr_worklist *sources)
nir_def *def = fsat->src[0].src.ssa;
/* If the source of the fsat is in the same block,
* brw_fs_opt_saturate_propagation will already have enough information to
* brw_opt_saturate_propagation will already have enough information to
* do its job. Adding another fsat will not help.
*/
if (def->parent_instr->type == nir_instr_type_alu &&

View file

@ -10,7 +10,7 @@
using namespace brw;
void
brw_fs_optimize(fs_visitor &s)
brw_optimize(fs_visitor &s)
{
const nir_shader *nir = s.nir;
@ -44,12 +44,12 @@ brw_fs_optimize(fs_visitor &s)
})
s.assign_constant_locations();
OPT(brw_fs_lower_constant_loads);
OPT(brw_lower_constant_loads);
if (s.compiler->lower_dpas)
OPT(brw_fs_lower_dpas);
OPT(brw_lower_dpas);
OPT(brw_fs_opt_split_virtual_grfs);
OPT(brw_opt_split_virtual_grfs);
/* Before anything else, eliminate dead code. The results of some NIR
* instructions may effectively be calculated twice. Once when the
@ -57,27 +57,27 @@ brw_fs_optimize(fs_visitor &s)
* encountered. Wipe those away before algebraic optimizations and
* especially copy propagation can mix things up.
*/
OPT(brw_fs_opt_dead_code_eliminate);
OPT(brw_opt_dead_code_eliminate);
OPT(brw_fs_opt_remove_extra_rounding_modes);
OPT(brw_opt_remove_extra_rounding_modes);
OPT(brw_fs_opt_eliminate_find_live_channel);
OPT(brw_opt_eliminate_find_live_channel);
do {
progress = false;
pass_num = 0;
iteration++;
OPT(brw_fs_opt_algebraic);
OPT(brw_fs_opt_cse_defs);
if (!OPT(brw_fs_opt_copy_propagation_defs))
OPT(brw_fs_opt_copy_propagation);
OPT(brw_fs_opt_cmod_propagation);
OPT(brw_fs_opt_dead_code_eliminate);
OPT(brw_fs_opt_saturate_propagation);
OPT(brw_fs_opt_register_coalesce);
OPT(brw_opt_algebraic);
OPT(brw_opt_cse_defs);
if (!OPT(brw_opt_copy_propagation_defs))
OPT(brw_opt_copy_propagation);
OPT(brw_opt_cmod_propagation);
OPT(brw_opt_dead_code_eliminate);
OPT(brw_opt_saturate_propagation);
OPT(brw_opt_register_coalesce);
OPT(brw_fs_opt_compact_virtual_grfs);
OPT(brw_opt_compact_virtual_grfs);
} while (progress);
brw_shader_phase_update(s, BRW_SHADER_PHASE_AFTER_OPT_LOOP);
@ -86,107 +86,107 @@ brw_fs_optimize(fs_visitor &s)
pass_num = 0;
if (OPT(brw_opt_combine_convergent_txf))
OPT(brw_fs_opt_copy_propagation_defs);
OPT(brw_opt_copy_propagation_defs);
if (OPT(brw_fs_lower_pack)) {
OPT(brw_fs_opt_register_coalesce);
OPT(brw_fs_opt_dead_code_eliminate);
if (OPT(brw_lower_pack)) {
OPT(brw_opt_register_coalesce);
OPT(brw_opt_dead_code_eliminate);
}
OPT(brw_fs_lower_subgroup_ops);
OPT(brw_fs_lower_csel);
OPT(brw_fs_lower_simd_width);
OPT(brw_lower_subgroup_ops);
OPT(brw_lower_csel);
OPT(brw_lower_simd_width);
OPT(brw_lower_scalar_fp64_MAD);
OPT(brw_fs_lower_barycentrics);
OPT(brw_fs_lower_logical_sends);
OPT(brw_lower_barycentrics);
OPT(brw_lower_logical_sends);
brw_shader_phase_update(s, BRW_SHADER_PHASE_AFTER_EARLY_LOWERING);
/* After logical SEND lowering. */
if (!OPT(brw_fs_opt_copy_propagation_defs))
OPT(brw_fs_opt_copy_propagation);
if (!OPT(brw_opt_copy_propagation_defs))
OPT(brw_opt_copy_propagation);
/* Identify trailing zeros LOAD_PAYLOAD of sampler messages.
* Do this before splitting SENDs.
*/
if (OPT(brw_fs_opt_zero_samples)) {
if (!OPT(brw_fs_opt_copy_propagation_defs)) {
OPT(brw_fs_opt_copy_propagation);
if (OPT(brw_opt_zero_samples)) {
if (!OPT(brw_opt_copy_propagation_defs)) {
OPT(brw_opt_copy_propagation);
}
}
OPT(brw_fs_opt_split_sends);
OPT(brw_fs_workaround_nomask_control_flow);
OPT(brw_opt_split_sends);
OPT(brw_workaround_nomask_control_flow);
if (progress) {
/* Do both forms of copy propagation because it is important to
* eliminate as many cases of load_payload-of-load_payload as possible.
*/
OPT(brw_fs_opt_copy_propagation_defs);
OPT(brw_fs_opt_copy_propagation);
OPT(brw_opt_copy_propagation_defs);
OPT(brw_opt_copy_propagation);
/* Run after logical send lowering to give it a chance to CSE the
* LOAD_PAYLOAD instructions created to construct the payloads of
* e.g. texturing messages in cases where it wasn't possible to CSE the
* whole logical instruction.
*/
OPT(brw_fs_opt_cse_defs);
OPT(brw_fs_opt_register_coalesce);
OPT(brw_fs_opt_dead_code_eliminate);
OPT(brw_opt_cse_defs);
OPT(brw_opt_register_coalesce);
OPT(brw_opt_dead_code_eliminate);
}
OPT(brw_fs_opt_remove_redundant_halts);
OPT(brw_opt_remove_redundant_halts);
if (OPT(brw_fs_lower_load_payload)) {
OPT(brw_fs_opt_split_virtual_grfs);
if (OPT(brw_lower_load_payload)) {
OPT(brw_opt_split_virtual_grfs);
OPT(brw_fs_opt_register_coalesce);
OPT(brw_fs_lower_simd_width);
OPT(brw_fs_opt_dead_code_eliminate);
OPT(brw_opt_register_coalesce);
OPT(brw_lower_simd_width);
OPT(brw_opt_dead_code_eliminate);
}
brw_shader_phase_update(s, BRW_SHADER_PHASE_AFTER_MIDDLE_LOWERING);
OPT(brw_fs_lower_alu_restrictions);
OPT(brw_lower_alu_restrictions);
OPT(brw_fs_opt_combine_constants);
if (OPT(brw_fs_lower_integer_multiplication)) {
OPT(brw_opt_combine_constants);
if (OPT(brw_lower_integer_multiplication)) {
/* If lower_integer_multiplication made progress, it may have produced
* some 32x32-bit MULs in the process of lowering 64-bit MULs. Run it
* one more time to clean those up if they exist.
*/
OPT(brw_fs_lower_integer_multiplication);
OPT(brw_lower_integer_multiplication);
}
OPT(brw_fs_lower_sub_sat);
OPT(brw_lower_sub_sat);
progress = false;
OPT(brw_fs_lower_derivatives);
OPT(brw_fs_lower_regioning);
OPT(brw_lower_derivatives);
OPT(brw_lower_regioning);
/* Try both copy propagation passes. The defs one will likely not be
* able to handle everything at this point.
*/
const bool cp1 = OPT(brw_fs_opt_copy_propagation_defs);
const bool cp2 = OPT(brw_fs_opt_copy_propagation);
const bool cp1 = OPT(brw_opt_copy_propagation_defs);
const bool cp2 = OPT(brw_opt_copy_propagation);
if (cp1 || cp2)
OPT(brw_fs_opt_combine_constants);
OPT(brw_opt_combine_constants);
OPT(brw_fs_opt_dead_code_eliminate);
OPT(brw_fs_opt_register_coalesce);
OPT(brw_opt_dead_code_eliminate);
OPT(brw_opt_register_coalesce);
if (progress)
OPT(brw_fs_lower_simd_width);
OPT(brw_lower_simd_width);
OPT(brw_fs_lower_sends_overlapping_payload);
OPT(brw_lower_sends_overlapping_payload);
OPT(brw_fs_lower_uniform_pull_constant_loads);
OPT(brw_lower_uniform_pull_constant_loads);
OPT(brw_fs_lower_indirect_mov);
OPT(brw_lower_indirect_mov);
OPT(brw_fs_lower_find_live_channel);
OPT(brw_lower_find_live_channel);
OPT(brw_fs_lower_load_subgroup_invocation);
OPT(brw_lower_load_subgroup_invocation);
brw_shader_phase_update(s, BRW_SHADER_PHASE_AFTER_LATE_LOWERING);
}
@ -217,7 +217,7 @@ load_payload_sources_read_for_size(fs_inst *lp, unsigned size_read)
*/
bool
brw_fs_opt_zero_samples(fs_visitor &s)
brw_opt_zero_samples(fs_visitor &s)
{
bool progress = false;
@ -294,7 +294,7 @@ brw_fs_opt_zero_samples(fs_visitor &s)
* payload concatenation altogether.
*/
bool
brw_fs_opt_split_sends(fs_visitor &s)
brw_opt_split_sends(fs_visitor &s)
{
bool progress = false;
@ -374,7 +374,7 @@ brw_fs_opt_split_sends(fs_visitor &s)
* halt-target
*/
bool
brw_fs_opt_remove_redundant_halts(fs_visitor &s)
brw_opt_remove_redundant_halts(fs_visitor &s)
{
bool progress = false;
@ -423,7 +423,7 @@ brw_fs_opt_remove_redundant_halts(fs_visitor &s)
* analysis.
*/
bool
brw_fs_opt_eliminate_find_live_channel(fs_visitor &s)
brw_opt_eliminate_find_live_channel(fs_visitor &s)
{
bool progress = false;
unsigned depth = 0;
@ -517,7 +517,7 @@ out:
* mode once is enough for the full vector/matrix
*/
bool
brw_fs_opt_remove_extra_rounding_modes(fs_visitor &s)
brw_opt_remove_extra_rounding_modes(fs_visitor &s)
{
bool progress = false;
unsigned execution_mode = s.nir->info.float_controls_execution_mode;

View file

@ -135,7 +135,7 @@ fold_multiplicands_of_MAD(fs_inst *inst)
}
bool
brw_constant_fold_instruction(const intel_device_info *devinfo, fs_inst *inst)
brw_opt_constant_fold_instruction(const intel_device_info *devinfo, fs_inst *inst)
{
bool progress = false;
@ -200,7 +200,7 @@ brw_constant_fold_instruction(const intel_device_info *devinfo, fs_inst *inst)
fold_multiplicands_of_MAD(inst);
assert(inst->opcode == BRW_OPCODE_ADD);
ASSERTED bool folded = brw_constant_fold_instruction(devinfo, inst);
ASSERTED bool folded = brw_opt_constant_fold_instruction(devinfo, inst);
assert(folded);
progress = true;
@ -320,13 +320,13 @@ brw_constant_fold_instruction(const intel_device_info *devinfo, fs_inst *inst)
}
bool
brw_fs_opt_algebraic(fs_visitor &s)
brw_opt_algebraic(fs_visitor &s)
{
const intel_device_info *devinfo = s.devinfo;
bool progress = false;
foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) {
if (brw_constant_fold_instruction(devinfo, inst)) {
if (brw_opt_constant_fold_instruction(devinfo, inst)) {
progress = true;
continue;
}
@ -483,9 +483,7 @@ brw_fs_opt_algebraic(fs_visitor &s)
}
}
}
break;
case BRW_OPCODE_OR:
if (inst->src[0].equals(inst->src[1]) || inst->src[1].is_zero()) {
/* On Gfx8+, the OR instruction can have a source modifier that
@ -664,7 +662,6 @@ brw_fs_opt_algebraic(fs_visitor &s)
progress = true;
}
break;
case SHADER_OPCODE_BROADCAST:
if (is_uniform(inst->src[0])) {
inst->opcode = BRW_OPCODE_MOV;

View file

@ -887,7 +887,7 @@ namespace {
}
bool
brw_fs_opt_bank_conflicts(fs_visitor &s)
brw_opt_bank_conflicts(fs_visitor &s)
{
assert(s.grf_used || !"Must be called after register allocation");

View file

@ -558,7 +558,7 @@ opt_cmod_propagation_local(const intel_device_info *devinfo, bblock_t *block)
}
bool
brw_fs_opt_cmod_propagation(fs_visitor &s)
brw_opt_cmod_propagation(fs_visitor &s)
{
bool progress = false;

View file

@ -1289,7 +1289,7 @@ parcel_out_registers(const intel_device_info *devinfo,
}
bool
brw_fs_opt_combine_constants(fs_visitor &s)
brw_opt_combine_constants(fs_visitor &s)
{
const intel_device_info *devinfo = s.devinfo;
void *const_ctx = ralloc_context(NULL);

View file

@ -1342,7 +1342,7 @@ opt_copy_propagation_local(const brw_compiler *compiler, linear_ctx *lin_ctx,
if (constant_progress) {
commute_immediates(inst);
brw_constant_fold_instruction(compiler->devinfo, inst);
brw_opt_constant_fold_instruction(compiler->devinfo, inst);
progress = true;
}
@ -1415,7 +1415,7 @@ opt_copy_propagation_local(const brw_compiler *compiler, linear_ctx *lin_ctx,
}
bool
brw_fs_opt_copy_propagation(fs_visitor &s)
brw_opt_copy_propagation(fs_visitor &s)
{
bool progress = false;
void *copy_prop_ctx = ralloc_context(NULL);
@ -1808,7 +1808,7 @@ find_value_for_offset(fs_inst *def, const brw_reg &src, unsigned src_size)
}
bool
brw_fs_opt_copy_propagation_defs(fs_visitor &s)
brw_opt_copy_propagation_defs(fs_visitor &s)
{
const brw::def_analysis &defs = s.def_analysis.require();
unsigned *uses_deleted = new unsigned[defs.count()]();
@ -1880,7 +1880,7 @@ brw_fs_opt_copy_propagation_defs(fs_visitor &s)
if (constant_progress) {
commute_immediates(inst);
brw_constant_fold_instruction(s.compiler->devinfo, inst);
brw_opt_constant_fold_instruction(s.compiler->devinfo, inst);
}
}

View file

@ -398,7 +398,7 @@ remap_sources(fs_visitor &s, const brw::def_analysis &defs,
}
bool
brw_fs_opt_cse_defs(fs_visitor &s)
brw_opt_cse_defs(fs_visitor &s)
{
const intel_device_info *devinfo = s.devinfo;
const idom_tree &idom = s.idom_analysis.require();

View file

@ -97,7 +97,7 @@ can_eliminate_conditional_mod(const intel_device_info *devinfo,
}
bool
brw_fs_opt_dead_code_eliminate(fs_visitor &s)
brw_opt_dead_code_eliminate(fs_visitor &s)
{
const intel_device_info *devinfo = s.devinfo;

View file

@ -226,7 +226,7 @@ would_violate_eot_restriction(const brw::simple_allocator &alloc,
}
bool
brw_fs_opt_register_coalesce(fs_visitor &s)
brw_opt_register_coalesce(fs_visitor &s)
{
const intel_device_info *devinfo = s.devinfo;

View file

@ -190,7 +190,7 @@ opt_saturate_propagation_local(fs_visitor &s, bblock_t *block)
}
bool
brw_fs_opt_saturate_propagation(fs_visitor &s)
brw_opt_saturate_propagation(fs_visitor &s)
{
bool progress = false;

View file

@ -22,13 +22,13 @@ using namespace brw;
* elimination and coalescing.
*/
bool
brw_fs_opt_split_virtual_grfs(fs_visitor &s)
brw_opt_split_virtual_grfs(fs_visitor &s)
{
/* Compact the register file so we eliminate dead vgrfs. This
* only defines split points for live registers, so if we have
* too large dead registers they will hit assertions later.
*/
brw_fs_opt_compact_virtual_grfs(s);
brw_opt_compact_virtual_grfs(s);
unsigned num_vars = s.alloc.count;
@ -221,7 +221,7 @@ cleanup:
* overhead.
*/
bool
brw_fs_opt_compact_virtual_grfs(fs_visitor &s)
brw_opt_compact_virtual_grfs(fs_visitor &s)
{
bool progress = false;
int *remap_table = new int[s.alloc.count];

View file

@ -14,7 +14,7 @@ using namespace brw;
* Make sure this happens by introducing a dummy mov instruction.
*/
bool
brw_fs_workaround_emit_dummy_mov_instruction(fs_visitor &s)
brw_workaround_emit_dummy_mov_instruction(fs_visitor &s)
{
if (!intel_needs_workaround(s.devinfo, 14015360517))
return false;
@ -82,7 +82,7 @@ needs_dummy_fence(const intel_device_info *devinfo, fs_inst *inst)
* We probably need a better criteria in needs_dummy_fence().
*/
bool
brw_fs_workaround_memory_fence_before_eot(fs_visitor &s)
brw_workaround_memory_fence_before_eot(fs_visitor &s)
{
bool progress = false;
bool has_ugm_write_or_atomic = false;
@ -156,7 +156,7 @@ find_halt_control_flow_region_start(const fs_visitor *v)
* all channels of the program are disabled.
*/
bool
brw_fs_workaround_nomask_control_flow(fs_visitor &s)
brw_workaround_nomask_control_flow(fs_visitor &s)
{
if (s.devinfo->ver != 12)
return false;
@ -299,7 +299,7 @@ bytes_bitmask_to_words(unsigned b)
* accessed inside the next blocks, but this still should be good enough.
*/
bool
brw_fs_workaround_source_arf_before_eot(fs_visitor &s)
brw_workaround_source_arf_before_eot(fs_visitor &s)
{
bool progress = false;

View file

@ -97,7 +97,7 @@ lower_scoreboard(fs_visitor *v)
v->cfg->dump();
}
brw_fs_lower_scoreboard(*v);
brw_lower_scoreboard(*v);
if (print) {
fprintf(stderr, "\n= After =\n");

View file

@ -118,7 +118,7 @@ cmod_propagation(fs_visitor *v)
v->cfg->dump();
}
bool ret = brw_fs_opt_cmod_propagation(*v);
bool ret = brw_opt_cmod_propagation(*v);
if (print) {
fprintf(stderr, "\n= After =\n");

View file

@ -56,7 +56,7 @@ struct FSCombineConstantsTest : public ::testing::Test {
s->cfg->dump();
}
bool ret = brw_fs_opt_combine_constants(*s);
bool ret = brw_opt_combine_constants(*s);
if (print) {
fprintf(stderr, "\n= After =\n");

View file

@ -107,7 +107,7 @@ copy_propagation(fs_visitor *v)
v->cfg->dump();
}
bool ret = brw_fs_opt_copy_propagation(*v);
bool ret = brw_opt_copy_propagation(*v);
if (print) {
fprintf(stderr, "\n= After =\n");

View file

@ -90,7 +90,7 @@ cse(fs_visitor *v)
v->cfg->dump();
}
bool ret = brw_fs_opt_cse_defs(*v);
bool ret = brw_opt_cse_defs(*v);
if (print) {
fprintf(stderr, "\n= After =\n");

View file

@ -108,7 +108,7 @@ saturate_propagation(fs_visitor *v)
v->cfg->dump();
}
bool ret = brw_fs_opt_saturate_propagation(*v);
bool ret = brw_opt_saturate_propagation(*v);
if (print) {
fprintf(stderr, "\n= After =\n");