mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 09:08:10 +02:00
jay/lower_scoreboard: use SYNC.allrd/allwr
This collapses piles of silliness. Totals: CodeSize: 71626288 -> 70710000 (-1.28%) Totals from 1634 (61.73% of 2647) affected shaders: CodeSize: 66319376 -> 65403088 (-1.38%) Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41398>
This commit is contained in:
parent
c1dc9d3b1a
commit
5636a57f60
3 changed files with 40 additions and 25 deletions
|
|
@ -5,6 +5,7 @@
|
||||||
|
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include "compiler/brw/brw_eu_defines.h"
|
#include "compiler/brw/brw_eu_defines.h"
|
||||||
|
#include "util/bitscan.h"
|
||||||
#include "util/bitset.h"
|
#include "util/bitset.h"
|
||||||
#include "util/macros.h"
|
#include "util/macros.h"
|
||||||
#include "util/u_math.h"
|
#include "util/u_math.h"
|
||||||
|
|
@ -33,9 +34,14 @@ def_to_gpr(jay_function *func, jay_inst *I, jay_def x)
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
jay_SYNC_nop(jay_builder *b, struct tgl_swsb dep)
|
sync_sbids(jay_builder *b, uint32_t mask, enum tgl_sbid_mode mode)
|
||||||
{
|
{
|
||||||
jay_SYNC(b, TGL_SYNC_NOP)->dep = dep;
|
if (util_is_power_of_two_nonzero(mask)) {
|
||||||
|
jay_SYNC(b, jay_null(), TGL_SYNC_NOP)->dep =
|
||||||
|
tgl_swsb_sbid(mode, util_logbase2(mask));
|
||||||
|
} else if (mask) {
|
||||||
|
jay_SYNC(b, mask, mode == TGL_SBID_DST ? TGL_SYNC_ALLWR : TGL_SYNC_ALLRD);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -51,6 +57,7 @@ lower_send_local(jay_function *func, jay_block *block)
|
||||||
|
|
||||||
jay_foreach_inst_in_block_safe(block, I) {
|
jay_foreach_inst_in_block_safe(block, I) {
|
||||||
jay_builder b = jay_init_builder(func, jay_before_inst(I));
|
jay_builder b = jay_init_builder(func, jay_before_inst(I));
|
||||||
|
uint32_t sbid_dst = 0, sbid_src = 0;
|
||||||
|
|
||||||
/* Read-after-write */
|
/* Read-after-write */
|
||||||
jay_foreach_src(I, s) {
|
jay_foreach_src(I, s) {
|
||||||
|
|
@ -58,7 +65,7 @@ lower_send_local(jay_function *func, jay_block *block)
|
||||||
|
|
||||||
u_foreach_bit(sbid, busy) {
|
u_foreach_bit(sbid, busy) {
|
||||||
if (BITSET_TEST_COUNT(tokens[sbid].writing, src.base, src.width)) {
|
if (BITSET_TEST_COUNT(tokens[sbid].writing, src.base, src.width)) {
|
||||||
jay_SYNC_nop(&b, tgl_swsb_sbid(TGL_SBID_DST, sbid));
|
sbid_dst |= BITFIELD_BIT(sbid);
|
||||||
busy &= ~BITFIELD_BIT(sbid);
|
busy &= ~BITFIELD_BIT(sbid);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -70,36 +77,29 @@ lower_send_local(jay_function *func, jay_block *block)
|
||||||
|
|
||||||
u_foreach_bit(sbid, busy) {
|
u_foreach_bit(sbid, busy) {
|
||||||
if (BITSET_TEST_COUNT(tokens[sbid].writing, dst.base, dst.width)) {
|
if (BITSET_TEST_COUNT(tokens[sbid].writing, dst.base, dst.width)) {
|
||||||
jay_SYNC_nop(&b, tgl_swsb_sbid(TGL_SBID_DST, sbid));
|
sbid_dst |= BITFIELD_BIT(sbid);
|
||||||
busy &= ~BITFIELD_BIT(sbid);
|
|
||||||
} else if (BITSET_TEST_COUNT(tokens[sbid].reading, dst.base,
|
} else if (BITSET_TEST_COUNT(tokens[sbid].reading, dst.base,
|
||||||
dst.width)) {
|
dst.width)) {
|
||||||
jay_SYNC_nop(&b, tgl_swsb_sbid(TGL_SBID_SRC, sbid));
|
sbid_src |= BITFIELD_BIT(sbid);
|
||||||
BITSET_ZERO(tokens[sbid].reading);
|
BITSET_ZERO(tokens[sbid].reading);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Lower ordering barriers */
|
|
||||||
if (I->op == JAY_OPCODE_SCHEDULE_BARRIER) {
|
|
||||||
if (busy) {
|
|
||||||
jay_SYNC(&b, TGL_SYNC_ALLWR);
|
|
||||||
busy = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
jay_remove_instruction(I);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (I->op == JAY_OPCODE_SEND && !jay_send_eot(I)) {
|
if (I->op == JAY_OPCODE_SEND && !jay_send_eot(I)) {
|
||||||
unsigned sbid = (roundrobin++) % NUM_TOKENS;
|
unsigned sbid = (roundrobin++) % NUM_TOKENS;
|
||||||
jay_set_send_sbid(I, sbid);
|
jay_set_send_sbid(I, sbid);
|
||||||
|
|
||||||
if (!(busy & BITSET_BIT(sbid))) {
|
if (!(busy & BITFIELD_BIT(sbid))) {
|
||||||
busy |= BITSET_BIT(sbid);
|
busy |= BITFIELD_BIT(sbid);
|
||||||
BITSET_ZERO(tokens[sbid].writing);
|
BITSET_ZERO(tokens[sbid].writing);
|
||||||
BITSET_ZERO(tokens[sbid].reading);
|
BITSET_ZERO(tokens[sbid].reading);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* SBID.set implies SBID.dst (which implies SBID.src), so elide */
|
||||||
|
sbid_dst &= ~BITFIELD_BIT(sbid);
|
||||||
|
sbid_src &= ~BITFIELD_BIT(sbid);
|
||||||
|
|
||||||
struct gpr_range dst = def_to_gpr(func, I, I->dst);
|
struct gpr_range dst = def_to_gpr(func, I, I->dst);
|
||||||
BITSET_SET_COUNT(tokens[sbid].writing, dst.base, dst.width);
|
BITSET_SET_COUNT(tokens[sbid].writing, dst.base, dst.width);
|
||||||
|
|
||||||
|
|
@ -111,18 +111,29 @@ lower_send_local(jay_function *func, jay_block *block)
|
||||||
/* Barriers are non-EOT gateway messages. Insert the needed SYNC */
|
/* Barriers are non-EOT gateway messages. Insert the needed SYNC */
|
||||||
if (jay_send_sfid(I) == BRW_SFID_MESSAGE_GATEWAY) {
|
if (jay_send_sfid(I) == BRW_SFID_MESSAGE_GATEWAY) {
|
||||||
b.cursor = jay_after_inst(I);
|
b.cursor = jay_after_inst(I);
|
||||||
jay_SYNC(&b, TGL_SYNC_BAR);
|
jay_SYNC(&b, jay_null(), TGL_SYNC_BAR);
|
||||||
}
|
}
|
||||||
|
} else if (I->op == JAY_OPCODE_SCHEDULE_BARRIER) {
|
||||||
|
sbid_dst |= busy;
|
||||||
|
}
|
||||||
|
|
||||||
|
b.cursor = jay_before_inst(I);
|
||||||
|
assert(((sbid_dst & sbid_src) == 0) && "by construction");
|
||||||
|
|
||||||
|
busy &= ~sbid_dst;
|
||||||
|
sync_sbids(&b, sbid_dst, TGL_SBID_DST);
|
||||||
|
sync_sbids(&b, sbid_src, TGL_SBID_SRC);
|
||||||
|
|
||||||
|
if (I->op == JAY_OPCODE_SCHEDULE_BARRIER) {
|
||||||
|
/* Lowered above into a sync, but removed late to keep the cursor */
|
||||||
|
jay_remove_instruction(I);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Sync on block boundaries. */
|
/* Sync on block boundaries. */
|
||||||
if (block != jay_last_block(func)) {
|
if (block != jay_last_block(func)) {
|
||||||
jay_builder b = jay_init_builder(func, jay_before_jump(block));
|
jay_builder b = jay_init_builder(func, jay_before_jump(block));
|
||||||
|
sync_sbids(&b, busy, TGL_SBID_DST);
|
||||||
u_foreach_bit(sbid, busy) {
|
|
||||||
jay_SYNC_nop(&b, tgl_swsb_sbid(TGL_SBID_DST, sbid));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -397,7 +408,7 @@ lower_trivial(jay_function *func)
|
||||||
I->dep = tgl_swsb_dst_dep(tgl_swsb_sbid(TGL_SBID_SET, 0), 1);
|
I->dep = tgl_swsb_dst_dep(tgl_swsb_sbid(TGL_SBID_SET, 0), 1);
|
||||||
|
|
||||||
jay_builder b = jay_init_builder(func, jay_after_inst(I));
|
jay_builder b = jay_init_builder(func, jay_after_inst(I));
|
||||||
jay_SYNC_nop(&b, tgl_swsb_sbid(TGL_SBID_DST, 0));
|
sync_sbids(&b, BITFIELD_BIT(0), TGL_SBID_DST);
|
||||||
} else {
|
} else {
|
||||||
I->dep = tgl_swsb_regdist(1);
|
I->dep = tgl_swsb_regdist(1);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -128,7 +128,7 @@ op('shl', 2, 'u32 u64 u16 s16 s32 s64', Props.CMOD | Props.NEGATE0)
|
||||||
op('shr', 2, 'u32 u64 u16 s16 s32 s64', Props.CMOD | Props.NEGATE0)
|
op('shr', 2, 'u32 u64 u16 s16 s32 s64', Props.CMOD | Props.NEGATE0)
|
||||||
|
|
||||||
op('quad_swizzle', 1, 'u1 u32', 0, ['enum jay_quad_swizzle swizzle'])
|
op('quad_swizzle', 1, 'u1 u32', 0, ['enum jay_quad_swizzle swizzle'])
|
||||||
op('sync', 0, None, Props.NO_DEST, ['enum tgl_sync_function op'])
|
op('sync', 1, 'u32', Props.NO_DEST, ['enum tgl_sync_function op'])
|
||||||
op('schedule_barrier', 0, None, Props.NO_DEST)
|
op('schedule_barrier', 0, None, Props.NO_DEST)
|
||||||
|
|
||||||
for n in ['brd', 'illegal', 'goto', 'join', 'if', 'else',
|
for n in ['brd', 'illegal', 'goto', 'join', 'if', 'else',
|
||||||
|
|
|
||||||
|
|
@ -413,6 +413,10 @@ emit(struct brw_codegen *p,
|
||||||
|
|
||||||
case JAY_OPCODE_SYNC:
|
case JAY_OPCODE_SYNC:
|
||||||
brw_SYNC(p, jay_sync_op(I));
|
brw_SYNC(p, jay_sync_op(I));
|
||||||
|
|
||||||
|
if (!jay_is_null(I->src[0])) {
|
||||||
|
brw_set_src0(p, brw_eu_last_inst(p), stride(SRC(0), 0, 1, 0));
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case JAY_OPCODE_CMP:
|
case JAY_OPCODE_CMP:
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue