mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-01 14:38:06 +02:00
intel/brw: Add SHADER_OPCODE_SEND_GATHER
Starting in Xe3, there's a variant of SEND that take the register numbers from the ARF scalar register, and don't require them to be contiguous. The new opcode added here represents that kind of SEND. To make the original sources still reachable, we keep them around during the IR, just ignoring them at generator time. This allow software scoreboard to properly reason the dependencies without trying to decode the contents of ARF scalar register being used. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Lionel Landwerlin <None> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32410>
This commit is contained in:
parent
2fca22347c
commit
650ec7169d
8 changed files with 69 additions and 5 deletions
|
|
@ -264,6 +264,19 @@ enum opcode {
|
|||
*/
|
||||
SHADER_OPCODE_SEND,
|
||||
|
||||
/**
|
||||
* A variant of SEND that collects its sources to form an input.
|
||||
*
|
||||
* Source 0: Message descriptor ("desc").
|
||||
* Source 1: Message extended descriptor ("ex_desc").
|
||||
* Source 2: Before register allocation must be BAD_FILE,
|
||||
* after that, the ARF scalar register containing
|
||||
* the (physical) numbers of the payload sources.
|
||||
* Source 3..n: Payload sources. For this opcode, they must each
|
||||
* have the size of a physical GRF.
|
||||
*/
|
||||
SHADER_OPCODE_SEND_GATHER,
|
||||
|
||||
/**
|
||||
* An "undefined" write which does nothing but indicates to liveness that
|
||||
* we don't care about any values in the register which predate this
|
||||
|
|
|
|||
|
|
@ -206,6 +206,7 @@ fs_inst::is_send_from_grf() const
|
|||
{
|
||||
switch (opcode) {
|
||||
case SHADER_OPCODE_SEND:
|
||||
case SHADER_OPCODE_SEND_GATHER:
|
||||
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
|
||||
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
|
||||
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
|
||||
|
|
@ -240,6 +241,7 @@ fs_inst::is_control_source(unsigned arg) const
|
|||
return arg == 1 || arg == 2;
|
||||
|
||||
case SHADER_OPCODE_SEND:
|
||||
case SHADER_OPCODE_SEND_GATHER:
|
||||
return arg == 0 || arg == 1;
|
||||
|
||||
case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
|
||||
|
|
@ -278,6 +280,9 @@ fs_inst::is_payload(unsigned arg) const
|
|||
case SHADER_OPCODE_SEND:
|
||||
return arg == 2 || arg == 3;
|
||||
|
||||
case SHADER_OPCODE_SEND_GATHER:
|
||||
return arg >= 2;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
@ -609,6 +614,14 @@ fs_inst::size_read(const struct intel_device_info *devinfo, int arg) const
|
|||
}
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_SEND_GATHER:
|
||||
if (arg >= 3) {
|
||||
/* SEND_GATHER is Xe3+, so no need to pass devinfo around. */
|
||||
const unsigned reg_unit = 2;
|
||||
return REG_SIZE * reg_unit;
|
||||
}
|
||||
break;
|
||||
|
||||
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
|
||||
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
|
||||
if (arg == 0)
|
||||
|
|
|
|||
|
|
@ -168,7 +168,13 @@ brw_generator::generate_send(fs_inst *inst,
|
|||
struct brw_reg payload,
|
||||
struct brw_reg payload2)
|
||||
{
|
||||
const bool gather = false;
|
||||
const bool gather = inst->opcode == SHADER_OPCODE_SEND_GATHER;
|
||||
if (gather) {
|
||||
assert(payload.file == ARF);
|
||||
assert(payload.nr == BRW_ARF_SCALAR);
|
||||
assert(payload2.file == ARF);
|
||||
assert(payload2.nr == BRW_ARF_NULL);
|
||||
}
|
||||
|
||||
if (ex_desc.file == IMM && ex_desc.ud == 0) {
|
||||
brw_send_indirect_message(p, inst->sfid, dst, payload, desc, inst->eot, gather);
|
||||
|
|
@ -854,7 +860,14 @@ brw_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||
brw_set_default_group(p, inst->group);
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < inst->sources; i++) {
|
||||
/* For SEND_GATHER, the payload sources are represented inside the
|
||||
* scalar register in src[2], so we can skip them.
|
||||
*/
|
||||
const unsigned num_sources =
|
||||
inst->opcode == SHADER_OPCODE_SEND_GATHER ? 3 : inst->sources;
|
||||
assert(num_sources <= ARRAY_SIZE(src));
|
||||
|
||||
for (unsigned int i = 0; i < num_sources; i++) {
|
||||
src[i] = normalize_brw_reg_for_encoding(&inst->src[i]);
|
||||
/* The accumulator result appears to get used for the
|
||||
* conditional modifier generation. When negating a UD
|
||||
|
|
@ -1147,6 +1160,7 @@ brw_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||
break;
|
||||
|
||||
case SHADER_OPCODE_SEND:
|
||||
case SHADER_OPCODE_SEND_GATHER:
|
||||
generate_send(inst, dst, src[0], src[1], src[2],
|
||||
inst->ex_mlen > 0 ? src[3] : brw_null_reg());
|
||||
send_count++;
|
||||
|
|
|
|||
|
|
@ -130,6 +130,13 @@ namespace {
|
|||
if (inst->opcode == SHADER_OPCODE_SEND) {
|
||||
ss = DIV_ROUND_UP(inst->size_read(devinfo, 2), REG_SIZE) +
|
||||
DIV_ROUND_UP(inst->size_read(devinfo, 3), REG_SIZE);
|
||||
} else if (inst->opcode == SHADER_OPCODE_SEND_GATHER) {
|
||||
ss = inst->mlen;
|
||||
/* If haven't lowered yet, count the sources. */
|
||||
if (!ss) {
|
||||
for (int i = 3; i < inst->sources; i++)
|
||||
ss += DIV_ROUND_UP(inst->size_read(devinfo, i), REG_SIZE);
|
||||
}
|
||||
} else {
|
||||
for (unsigned i = 0; i < inst->sources; i++)
|
||||
ss = MAX2(ss, DIV_ROUND_UP(inst->size_read(devinfo, i), REG_SIZE));
|
||||
|
|
@ -597,6 +604,7 @@ namespace {
|
|||
0, 0, 0, 0, 0, 0);
|
||||
|
||||
case SHADER_OPCODE_SEND:
|
||||
case SHADER_OPCODE_SEND_GATHER:
|
||||
switch (info.sfid) {
|
||||
case GFX6_SFID_DATAPORT_CONSTANT_CACHE:
|
||||
/* See FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD */
|
||||
|
|
|
|||
|
|
@ -2629,15 +2629,22 @@ brw_lower_send_descriptors(fs_visitor &s)
|
|||
bool progress = false;
|
||||
|
||||
foreach_block_and_inst (block, fs_inst, inst, s.cfg) {
|
||||
if (inst->opcode != SHADER_OPCODE_SEND)
|
||||
if (inst->opcode != SHADER_OPCODE_SEND &&
|
||||
inst->opcode != SHADER_OPCODE_SEND_GATHER)
|
||||
continue;
|
||||
|
||||
const brw_builder ubld = brw_builder(&s, block, inst).exec_all().group(1, 0);
|
||||
|
||||
/* Descriptor */
|
||||
const unsigned rlen = inst->dst.is_null() ? 0 : inst->size_written / REG_SIZE;
|
||||
unsigned mlen = inst->mlen;
|
||||
if (inst->opcode == SHADER_OPCODE_SEND_GATHER) {
|
||||
assert(inst->sources >= 3);
|
||||
mlen = (inst->sources - 3) * reg_unit(devinfo);
|
||||
}
|
||||
|
||||
uint32_t desc_imm = inst->desc |
|
||||
brw_message_desc(devinfo, inst->mlen, rlen, inst->header_size);
|
||||
brw_message_desc(devinfo, mlen, rlen, inst->header_size);
|
||||
|
||||
assert(inst->src[0].file != BAD_FILE);
|
||||
assert(inst->src[1].file != BAD_FILE);
|
||||
|
|
|
|||
|
|
@ -120,6 +120,8 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
|
|||
|
||||
case SHADER_OPCODE_SEND:
|
||||
return "send";
|
||||
case SHADER_OPCODE_SEND_GATHER:
|
||||
return "send_gather";
|
||||
|
||||
case SHADER_OPCODE_UNDEF:
|
||||
return "undef";
|
||||
|
|
|
|||
|
|
@ -201,6 +201,7 @@ fs_inst::has_side_effects() const
|
|||
{
|
||||
switch (opcode) {
|
||||
case SHADER_OPCODE_SEND:
|
||||
case SHADER_OPCODE_SEND_GATHER:
|
||||
return send_has_side_effects;
|
||||
|
||||
case BRW_OPCODE_SYNC:
|
||||
|
|
@ -227,7 +228,8 @@ bool
|
|||
fs_inst::is_volatile() const
|
||||
{
|
||||
return opcode == SHADER_OPCODE_MEMORY_LOAD_LOGICAL ||
|
||||
(opcode == SHADER_OPCODE_SEND && send_is_volatile);
|
||||
((opcode == SHADER_OPCODE_SEND ||
|
||||
opcode == SHADER_OPCODE_SEND_GATHER) && send_is_volatile);
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
|
|
|
|||
|
|
@ -299,6 +299,11 @@ brw_validate(const fs_visitor &s)
|
|||
fsv_assert(is_uniform(inst->src[0]) && is_uniform(inst->src[1]));
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_SEND_GATHER:
|
||||
fsv_assert(is_uniform(inst->src[0]) && is_uniform(inst->src[1]));
|
||||
fsv_assert(devinfo->ver >= 30);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_MOV:
|
||||
fsv_assert(inst->sources == 1);
|
||||
break;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue