mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-06 15:20:17 +01:00
intel/brw: Use SHADER_OPCODE_SEND_GATHER in Xe3
Add an optimization pass to turn regular SENDs into SEND_GATHERs. This allows the payload to be "broken" into smaller pieces that can be further optimized, which _may_ result in - less register pressure (no need to contiguous space), and - less instructions (no need to MOV to such space). For debugging, the INTEL_DEBUG=no-send-gather option skips this optimization, and reporting how many opportunities were missed. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Lionel Landwerlin <None> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32410>
This commit is contained in:
parent
26d4d04d63
commit
b6b32933ad
4 changed files with 87 additions and 1 deletions
|
|
@ -559,6 +559,7 @@ bool brw_opt_register_coalesce(fs_visitor &s);
|
|||
bool brw_opt_remove_extra_rounding_modes(fs_visitor &s);
|
||||
bool brw_opt_remove_redundant_halts(fs_visitor &s);
|
||||
bool brw_opt_saturate_propagation(fs_visitor &s);
|
||||
bool brw_opt_send_to_send_gather(fs_visitor &s);
|
||||
bool brw_opt_split_sends(fs_visitor &s);
|
||||
bool brw_opt_split_virtual_grfs(fs_visitor &s);
|
||||
bool brw_opt_zero_samples(fs_visitor &s);
|
||||
|
|
|
|||
|
|
@ -7,6 +7,8 @@
|
|||
#include "brw_fs.h"
|
||||
#include "brw_builder.h"
|
||||
|
||||
#include "dev/intel_debug.h"
|
||||
|
||||
using namespace brw;
|
||||
|
||||
void
|
||||
|
|
@ -113,6 +115,9 @@ brw_optimize(fs_visitor &s)
|
|||
}
|
||||
}
|
||||
|
||||
if (s.devinfo->ver >= 30)
|
||||
OPT(brw_opt_send_to_send_gather);
|
||||
|
||||
OPT(brw_opt_split_sends);
|
||||
OPT(brw_workaround_nomask_control_flow);
|
||||
|
||||
|
|
@ -563,3 +568,80 @@ brw_opt_remove_extra_rounding_modes(fs_visitor &s)
|
|||
|
||||
return progress;
|
||||
}
|
||||
|
||||
bool
|
||||
brw_opt_send_to_send_gather(fs_visitor &s)
|
||||
{
|
||||
const intel_device_info *devinfo = s.devinfo;
|
||||
bool progress = false;
|
||||
|
||||
assert(devinfo->ver >= 30);
|
||||
|
||||
const unsigned unit = reg_unit(devinfo);
|
||||
assert(unit == 2);
|
||||
|
||||
unsigned count = 0;
|
||||
|
||||
foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) {
|
||||
if (inst->opcode != SHADER_OPCODE_SEND)
|
||||
continue;
|
||||
|
||||
/* For 1-2 registers, send-gather offers no benefits over split-send. */
|
||||
if (inst->mlen + inst->ex_mlen <= 2 * unit)
|
||||
continue;
|
||||
|
||||
assert(inst->mlen % unit == 0);
|
||||
assert(inst->ex_mlen % unit == 0);
|
||||
|
||||
struct {
|
||||
brw_reg src;
|
||||
unsigned phys_len;
|
||||
} payload[2] = {
|
||||
{ inst->src[2], inst->mlen / unit },
|
||||
{ inst->src[3], inst->ex_mlen / unit },
|
||||
};
|
||||
|
||||
const unsigned num_payload_sources = payload[0].phys_len + payload[1].phys_len;
|
||||
|
||||
/* Limited by Src0.Length in the SEND instruction. */
|
||||
if (num_payload_sources > 15)
|
||||
continue;
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_NO_SEND_GATHER)) {
|
||||
count++;
|
||||
continue;
|
||||
}
|
||||
|
||||
inst->resize_sources(3 + num_payload_sources);
|
||||
/* Sources 0 and 1 remain the same. Source 2 will be filled
|
||||
* after register allocation.
|
||||
*/
|
||||
inst->src[2] = {};
|
||||
|
||||
int idx = 3;
|
||||
for (unsigned p = 0; p < ARRAY_SIZE(payload); p++) {
|
||||
for (unsigned i = 0; i < payload[p].phys_len; i++) {
|
||||
inst->src[idx++] = byte_offset(payload[p].src,
|
||||
i * reg_unit(devinfo) * REG_SIZE);
|
||||
}
|
||||
}
|
||||
assert(idx == inst->sources);
|
||||
|
||||
inst->opcode = SHADER_OPCODE_SEND_GATHER;
|
||||
inst->mlen = 0;
|
||||
inst->ex_mlen = 0;
|
||||
|
||||
progress = true;
|
||||
}
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_NO_SEND_GATHER)) {
|
||||
fprintf(stderr, "Ignored %u opportunities to try SEND_GATHER in %s shader.\n",
|
||||
count, _mesa_shader_stage_to_string(s.stage));
|
||||
}
|
||||
|
||||
if (progress)
|
||||
s.invalidate_analysis(DEPENDENCY_INSTRUCTION_DETAIL |
|
||||
DEPENDENCY_INSTRUCTION_DATA_FLOW);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -115,6 +115,7 @@ static const struct debug_control debug_control[] = {
|
|||
{ "reg-pressure", DEBUG_REG_PRESSURE },
|
||||
{ "shader-print", DEBUG_SHADER_PRINT },
|
||||
{ "cl-quiet", DEBUG_CL_QUIET },
|
||||
{ "no-send-gather", DEBUG_NO_SEND_GATHER },
|
||||
{ NULL, 0 }
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -106,6 +106,7 @@ extern uint64_t intel_debug;
|
|||
#define DEBUG_BVH_BLAS_IR_AS (1ull << 58)
|
||||
#define DEBUG_BVH_TLAS_IR_AS (1ull << 59)
|
||||
#define DEBUG_BVH_NO_BUILD (1ull << 60)
|
||||
#define DEBUG_NO_SEND_GATHER (1ull << 61)
|
||||
|
||||
#define DEBUG_ANY (~0ull)
|
||||
|
||||
|
|
@ -115,7 +116,8 @@ extern uint64_t intel_debug;
|
|||
/* These flags may affect program generation */
|
||||
#define DEBUG_DISK_CACHE_MASK \
|
||||
(DEBUG_NO_DUAL_OBJECT_GS | DEBUG_SPILL_FS | \
|
||||
DEBUG_SPILL_VEC4 | DEBUG_NO_COMPACTION | DEBUG_DO32 | DEBUG_SOFT64)
|
||||
DEBUG_SPILL_VEC4 | DEBUG_NO_COMPACTION | DEBUG_DO32 | DEBUG_SOFT64 | \
|
||||
DEBUG_NO_SEND_GATHER)
|
||||
|
||||
/* Flags to determine what bvh to dump out */
|
||||
#define DEBUG_BVH_ANV (DEBUG_BVH_BLAS | DEBUG_BVH_TLAS)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue