mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 20:10:14 +01:00
185 lines
6 KiB
C++
185 lines
6 KiB
C++
|
|
/*
|
||
|
|
* Copyright © 2024 Intel Corporation
|
||
|
|
*
|
||
|
|
* SPDX-License-Identifier: MIT
|
||
|
|
*/
|
||
|
|
#include "brw_shader.h"
|
||
|
|
#include "brw_cfg.h"
|
||
|
|
#include "brw_analysis.h"
|
||
|
|
#include "brw_builder.h"
|
||
|
|
|
||
|
|
/* Duplicated from brw_def_analysis::fully_defines. */
|
||
|
|
static bool
|
||
|
|
fully_defines(const brw_shader &s, brw_inst *inst)
|
||
|
|
{
|
||
|
|
return s.alloc.sizes[inst->dst.nr] * REG_SIZE == inst->size_written &&
|
||
|
|
!inst->is_partial_write();
|
||
|
|
}
|
||
|
|
|
||
|
|
bool
|
||
|
|
brw_insert_load_reg(brw_shader &s)
|
||
|
|
{
|
||
|
|
bool progress = false;
|
||
|
|
|
||
|
|
const brw_def_analysis &defs = s.def_analysis.require();
|
||
|
|
|
||
|
|
foreach_block_and_inst_safe(block, brw_inst, inst, s.cfg) {
|
||
|
|
/* These should not exist yet. */
|
||
|
|
assert(inst->opcode != SHADER_OPCODE_LOAD_REG);
|
||
|
|
|
||
|
|
/* These opcodes may have the right source and destination patterns to
|
||
|
|
* have their sources replaced by load_reg, but these instructions are
|
||
|
|
* special and / or wierd. They should not be modified.
|
||
|
|
*/
|
||
|
|
if (inst->opcode == SHADER_OPCODE_UNDEF ||
|
||
|
|
inst->opcode == BRW_OPCODE_DPAS) {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
|
||
|
|
/* If the destination is non-VGRF adding load_reg instructions will not
|
||
|
|
* help. If the destination is already SSA, nothing needs to be done.
|
||
|
|
*/
|
||
|
|
if (inst->dst.file != VGRF || defs.get(inst->dst) != NULL)
|
||
|
|
continue;
|
||
|
|
|
||
|
|
/* If there is a source that would cause def_analysis::update_for_reads
|
||
|
|
* to mark the def as invalid, adding load_reg for the sources will not
|
||
|
|
* help.
|
||
|
|
*/
|
||
|
|
if (inst->reads_accumulator_implicitly())
|
||
|
|
continue;
|
||
|
|
|
||
|
|
bool bad_source = false;
|
||
|
|
for (int i = 0; i < inst->sources; i++) {
|
||
|
|
if (inst->src[i].file == ARF &&
|
||
|
|
(inst->src[i].nr == BRW_ARF_ADDRESS ||
|
||
|
|
inst->src[i].nr == BRW_ARF_ACCUMULATOR ||
|
||
|
|
inst->src[i].nr == BRW_ARF_FLAG)) {
|
||
|
|
bad_source = true;
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
if (bad_source)
|
||
|
|
continue;
|
||
|
|
|
||
|
|
/* If the instruction does not fully define the destination, adding
|
||
|
|
* load_reg instructions will not help.
|
||
|
|
*/
|
||
|
|
if (!fully_defines(s, inst))
|
||
|
|
continue;
|
||
|
|
|
||
|
|
if (inst->exec_size < 8)
|
||
|
|
continue;
|
||
|
|
|
||
|
|
assert(inst->exec_size == 8 || inst->exec_size == 16 ||
|
||
|
|
inst->exec_size == 32);
|
||
|
|
|
||
|
|
const unsigned mask = (inst->exec_size / 8) - 1;
|
||
|
|
|
||
|
|
/* Replace any non-SSA sources with load_reg of the source. */
|
||
|
|
const brw_builder bld = brw_builder(inst);
|
||
|
|
for (int i = 0; i < inst->sources; i++) {
|
||
|
|
/* LOAD_REG only operates on VGRF sources. If the source is not VGRF,
|
||
|
|
* skip it.
|
||
|
|
*/
|
||
|
|
if (inst->src[i].file != VGRF)
|
||
|
|
continue;
|
||
|
|
|
||
|
|
/* The source is already a def, so don't add a LOAD_REG. */
|
||
|
|
if (defs.get(inst->src[i]) != NULL)
|
||
|
|
continue;
|
||
|
|
|
||
|
|
/* Cases of stride != 1 are difficult to handle correctly. For
|
||
|
|
* example, when stride is 0, the source may have been written by
|
||
|
|
* NoMask instruction that cannot be seen from here. In this case,
|
||
|
|
* emitting a non-NoMask LOAD_REG may not actually copy the value
|
||
|
|
* that the instruction is trying to read.
|
||
|
|
*
|
||
|
|
* This means that is_scalar sources in larger exec sizes are not
|
||
|
|
* handled. Since enough information is available in the source, this
|
||
|
|
* could be added later.
|
||
|
|
*/
|
||
|
|
if (inst->src[i].stride != 1)
|
||
|
|
continue;
|
||
|
|
|
||
|
|
/* If the size of the VGRF allocation is not an even multiple of
|
||
|
|
* the SIMD size, don't emit a load_reg. This can occur for sparse
|
||
|
|
* texture loads. These will have SIMD-size values for the texture
|
||
|
|
* data and a single SIMD1 register for the residency information.
|
||
|
|
*/
|
||
|
|
if ((s.alloc.sizes[inst->src[i].nr] & mask) != 0)
|
||
|
|
continue;
|
||
|
|
|
||
|
|
brw_reg_type t =
|
||
|
|
brw_type_with_size(BRW_TYPE_UD,
|
||
|
|
brw_type_size_bits(inst->src[i].type));
|
||
|
|
brw_reg old_src = brw_vgrf(inst->src[i].nr, t);
|
||
|
|
brw_reg new_src;
|
||
|
|
|
||
|
|
/* Since the sources of a LOAD_REG will likely not be defs,
|
||
|
|
* none of the existing optimizations passes will eliminate
|
||
|
|
* redundant LOAD_REG instructions. Search back though this
|
||
|
|
* block to find a LOAD_REG of the same value to avoid emitting
|
||
|
|
* too many redundant instructions.
|
||
|
|
*/
|
||
|
|
foreach_inst_in_block_reverse_starting_from(brw_inst, scan_inst, inst) {
|
||
|
|
if (scan_inst->dst.file == old_src.file &&
|
||
|
|
scan_inst->dst.nr == old_src.nr) {
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (scan_inst->opcode == SHADER_OPCODE_LOAD_REG &&
|
||
|
|
scan_inst->exec_size == inst->exec_size &&
|
||
|
|
scan_inst->force_writemask_all == inst->force_writemask_all &&
|
||
|
|
old_src.equals(scan_inst->src[0])) {
|
||
|
|
new_src = scan_inst->dst;
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
if (new_src.file == BAD_FILE)
|
||
|
|
new_src = bld.LOAD_REG(old_src);
|
||
|
|
|
||
|
|
inst->src[i].nr = new_src.nr;
|
||
|
|
progress = true;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
if (progress)
|
||
|
|
s.invalidate_analysis(BRW_DEPENDENCY_INSTRUCTIONS |
|
||
|
|
BRW_DEPENDENCY_VARIABLES);
|
||
|
|
|
||
|
|
return progress;
|
||
|
|
}
|
||
|
|
|
||
|
|
bool
|
||
|
|
brw_lower_load_reg(brw_shader &s)
|
||
|
|
{
|
||
|
|
bool progress = false;
|
||
|
|
|
||
|
|
foreach_block_and_inst_safe(block, brw_inst, inst, s.cfg) {
|
||
|
|
if (inst->opcode == SHADER_OPCODE_LOAD_REG) {
|
||
|
|
const brw_builder ibld = brw_builder(inst);
|
||
|
|
|
||
|
|
const unsigned bytes = inst->size_written;
|
||
|
|
const unsigned type_bytes = brw_type_size_bytes(inst->dst.type);
|
||
|
|
const unsigned bytes_per_mov = inst->exec_size * type_bytes;
|
||
|
|
|
||
|
|
for (unsigned i = 0; i < bytes; i += bytes_per_mov) {
|
||
|
|
ibld.MOV(byte_offset(inst->dst, i),
|
||
|
|
byte_offset(inst->src[0], i));
|
||
|
|
}
|
||
|
|
|
||
|
|
inst->remove();
|
||
|
|
progress = true;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
if (progress)
|
||
|
|
s.invalidate_analysis(BRW_DEPENDENCY_INSTRUCTIONS |
|
||
|
|
BRW_DEPENDENCY_VARIABLES);
|
||
|
|
|
||
|
|
return progress;
|
||
|
|
}
|