mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-21 01:38:23 +02:00
jay: rewrite demote/terminate/helper/halt handling
* implement terminate * fix HALT brokenness on all shader stages (we need a real end block) * optimize demote codegen a ton * optimize gl_HelperInvocation/gl_SampleMask * optimize "all lanes demoted" via HALT.any * optimize scheduling of stores/atomics/demotes in FS * optimize some texturing with helper invocations Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/42097>
This commit is contained in:
parent
52d4d47edc
commit
9cc686ac72
16 changed files with 405 additions and 178 deletions
|
|
@ -1039,7 +1039,6 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
|||
case nir_intrinsic_dpas_intel:
|
||||
case nir_intrinsic_convert_cmat_intel:
|
||||
case nir_intrinsic_load_coverage_mask_intel:
|
||||
case nir_intrinsic_load_dispatch_mask_intel:
|
||||
case nir_intrinsic_isberd_nv:
|
||||
case nir_intrinsic_isbewr_nv:
|
||||
case nir_intrinsic_vild_nv:
|
||||
|
|
|
|||
|
|
@ -2686,9 +2686,6 @@ system_value("simd_width_intel", 1)
|
|||
# IndirectDataStartAddress
|
||||
system_value("indirect_address_intel", 1)
|
||||
|
||||
# The dispatch mask as provided in the FS payload.
|
||||
system_value("dispatch_mask_intel", 1)
|
||||
|
||||
# The raw coverage mask as provided in the FS payload.
|
||||
# The semantics of it depend on the HW state.
|
||||
system_value("coverage_mask_intel", 1)
|
||||
|
|
@ -2704,8 +2701,8 @@ intrinsic("load_reloc_const_intel", dest_comp=1, bit_sizes=[32],
|
|||
indices=[PARAM_IDX, BASE], flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
|
||||
# Write a render target
|
||||
# src[] = { color, dual_color, src0_alpha, omask, depth, stencil, predicate }
|
||||
intrinsic("store_render_target_intel", [4, 4, 1, 1, 1, 1, 1], indices=[TARGET], bit_sizes=[32, 32, 32, 32, 32, 32, 1])
|
||||
# src[] = { color, dual_color, src0_alpha, omask, depth, stencil }
|
||||
intrinsic("store_render_target_intel", [4, 4, 1, 1, 1, 1], indices=[TARGET], bit_sizes=[32, 32, 32, 32, 32, 32])
|
||||
|
||||
# Shuffle with an offset in bytes instead of a lane index.
|
||||
# src[] = { payload, lane offset in bytes }
|
||||
|
|
|
|||
|
|
@ -72,10 +72,15 @@ assign_flag(struct flag_ra *ra,
|
|||
jay_def tmp = jay_alloc_def(ra->b, file, 1);
|
||||
|
||||
unsigned num_flags = jay_num_regs(ra->b->shader, FLAG);
|
||||
if (ra->b->shader->helpers_tracked) {
|
||||
/* Helper tracking uses the last flag by definition */
|
||||
num_flags--;
|
||||
}
|
||||
|
||||
tmp.reg = tie ? tie->reg : ballot ? 0 : ((ra->roundrobin++) % num_flags);
|
||||
|
||||
/* Uniform access (via a UFLAG or an inverse-ballot) would clobber the zero
|
||||
* for a ballot. We could refine this further but this should be ok for now.
|
||||
* for a ballot. TODO: This needs to be reworked to get the flag back.
|
||||
*/
|
||||
if (!ballot &&
|
||||
tmp.reg == 0 &&
|
||||
|
|
@ -84,6 +89,8 @@ assign_flag(struct flag_ra *ra,
|
|||
assert(!tie);
|
||||
tmp.reg = 1;
|
||||
ra->roundrobin++;
|
||||
|
||||
assert(num_flags >= 2); /* XXX: Not always true, FIXME */
|
||||
}
|
||||
|
||||
if (jay_index(canonical) < ra->nr_vars) {
|
||||
|
|
@ -193,6 +200,16 @@ assign_block(struct flag_ra *ra)
|
|||
I->type = JAY_TYPE_U32;
|
||||
I->dst = canonicalize_flag(I->dst);
|
||||
continue;
|
||||
} else if (I->op == JAY_OPCODE_SEND &&
|
||||
jay_send_skip_helpers(I) &&
|
||||
jay_is_no_mask(I)) {
|
||||
|
||||
/* jay_lower_helpers will clobber flag 0 to handle this case, see the
|
||||
* logic there. Evict whatever was there.
|
||||
*/
|
||||
ra->flag_to_global[0] = 0;
|
||||
assert(!I->predication);
|
||||
continue;
|
||||
} else if (I->type == JAY_TYPE_U1) {
|
||||
/* Boolean logic turns into bitwise logic on the canonical form */
|
||||
if (!jay_is_null(I->dst)) {
|
||||
|
|
|
|||
|
|
@ -8,12 +8,14 @@
|
|||
#include "compiler/brw/brw_eu_defines.h"
|
||||
#include "compiler/brw/brw_nir.h"
|
||||
#include "compiler/brw/brw_sampler.h"
|
||||
#include "compiler/gen/gen_enums.h"
|
||||
#include "compiler/intel_nir.h"
|
||||
#include "compiler/intel_shader_enums.h"
|
||||
#include "compiler/list.h"
|
||||
#include "intel/dev/intel_debug.h"
|
||||
#include "mda/debug_archiver.h"
|
||||
#include "util/bitscan.h"
|
||||
#include "util/bitset.h"
|
||||
#include "util/lut.h"
|
||||
#include "util/macros.h"
|
||||
#include "util/u_math.h"
|
||||
|
|
@ -81,10 +83,7 @@ struct nir_to_jay_state {
|
|||
const struct intel_device_info *devinfo;
|
||||
|
||||
jay_builder bld;
|
||||
|
||||
jay_block *current_block;
|
||||
jay_block *after_block;
|
||||
jay_block *break_block;
|
||||
jay_block *current_block, *after_block, *break_block, *exit_block;
|
||||
|
||||
unsigned indent;
|
||||
bool needs_final_halt;
|
||||
|
|
@ -832,19 +831,6 @@ scalars_equal(nir_scalar a, nir_scalar b)
|
|||
nir_scalar_as_uint(a) == nir_scalar_as_uint(b));
|
||||
}
|
||||
|
||||
static void
|
||||
jay_emit_halt_target(struct nir_to_jay_state *nj)
|
||||
{
|
||||
/* This final halt will re-enable the channels which got masked off by first
|
||||
* HALT.
|
||||
*/
|
||||
if (nj->needs_final_halt) {
|
||||
/* This avoids re-emitting the halt after EOT send */
|
||||
nj->needs_final_halt = false;
|
||||
jay_HALT_TARGET(&nj->bld);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
jay_emit_fb_write(struct nir_to_jay_state *nj, nir_intrinsic_instr *intr)
|
||||
{
|
||||
|
|
@ -860,8 +846,6 @@ jay_emit_fb_write(struct nir_to_jay_state *nj, nir_intrinsic_instr *intr)
|
|||
const int target = MAX2(((signed) nir_intrinsic_target(intr)), 0);
|
||||
const bool last = !nir_instr_next(&intr->instr);
|
||||
|
||||
jay_emit_halt_target(nj);
|
||||
|
||||
/* The hardware freaks out if we give it an omask without multisampling. */
|
||||
if (!b->shader->prog_data->fs.uses_omask) {
|
||||
omask = jay_null();
|
||||
|
|
@ -941,15 +925,10 @@ jay_emit_fb_write(struct nir_to_jay_state *nj, nir_intrinsic_instr *intr)
|
|||
srcs[len++] = jay_extract(packed, i);
|
||||
}
|
||||
|
||||
jay_inst *send =
|
||||
jay_SEND(b, .sfid = GEN_SFID_RENDER_CACHE, .check_tdr = true,
|
||||
.msg_desc = desc | (ex_desc << 32), .srcs = srcs, .nr_srcs = len,
|
||||
.type = JAY_TYPE_U32, .eot = last, .split = split);
|
||||
|
||||
/* Handle the disable predicate. It is logically inverted. */
|
||||
if (!nir_src_is_zero(intr->src[6])) {
|
||||
jay_add_predicate(b, send, jay_negate(nj_src(intr->src[6])));
|
||||
}
|
||||
jay_SEND(b, .sfid = GEN_SFID_RENDER_CACHE, .check_tdr = true,
|
||||
.msg_desc = desc | (ex_desc << 32), .srcs = srcs, .nr_srcs = len,
|
||||
.type = JAY_TYPE_U32, .eot = last, .split = split,
|
||||
.skip_helpers = true);
|
||||
}
|
||||
|
||||
static enum lsc_data_size
|
||||
|
|
@ -1572,19 +1551,6 @@ jay_emit_intrinsic(struct nir_to_jay_state *nj, nir_intrinsic_instr *intr)
|
|||
jay_MOV(b, dst, fs->coverage_mask);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_dispatch_mask_intel: {
|
||||
jay_def mask = jay_extract(nj->payload.u0, 15);
|
||||
|
||||
if (nj->s->dispatch_width == 32) {
|
||||
/* TODO: Optimize */
|
||||
jay_def hi = jay_extract(nj->payload.u1, 15);
|
||||
mask = jay_BFI2_u32(b, 0xffff0000, hi, mask);
|
||||
}
|
||||
|
||||
jay_MOV(b, dst, mask);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_subgroup_invocation: {
|
||||
jay_def lid = jay_alloc_def(b, UGPR, s->dispatch_width / 2);
|
||||
jay_LANE_ID_8(b, jay_extract_range(lid, 0, 4));
|
||||
|
|
@ -1600,8 +1566,16 @@ jay_emit_intrinsic(struct nir_to_jay_state *nj, nir_intrinsic_instr *intr)
|
|||
}
|
||||
|
||||
case nir_intrinsic_demote:
|
||||
jay_DEMOTE_u32(b, jay_null(), jay_null());
|
||||
break;
|
||||
case nir_intrinsic_demote_if:
|
||||
/* TODO: Already lowered, but need to implement for performance. */
|
||||
jay_DEMOTE(b, JAY_TYPE_U1, nj_src(intr->src[0]), 0)->conditional_mod =
|
||||
GEN_CONDITION_NE;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_helper_invocation:
|
||||
case nir_intrinsic_is_helper_invocation:
|
||||
jay_HELPER_SEL(b, dst, 1, 0);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_ddx:
|
||||
|
|
@ -2455,7 +2429,8 @@ jay_emit_texture(struct nir_to_jay_state *nj, nir_tex_instr *tex)
|
|||
.ex_desc = desc_ex_src, .header = header, .srcs = payload,
|
||||
.nr_srcs = n_sources, .type = JAY_TYPE_U32,
|
||||
.src_type = { src_type }, .dst = tmp, .uniform = payload_uniform,
|
||||
.bindless = surface_bindless, .pure = true);
|
||||
.bindless = surface_bindless, .pure = true,
|
||||
.skip_helpers = tex->skip_helpers);
|
||||
|
||||
/* If we sampled into a temporary, copy out to the final */
|
||||
if (residency) {
|
||||
|
|
@ -2484,7 +2459,8 @@ jay_emit_jump(struct nir_to_jay_state *nj, nir_jump_instr *instr)
|
|||
break;
|
||||
case nir_jump_halt:
|
||||
nj->needs_final_halt = true;
|
||||
jay_HALT(&nj->bld);
|
||||
jay_block_add_successor(nj->current_block, nj->exit_block, GPR);
|
||||
jay_HALT(&nj->bld, false);
|
||||
break;
|
||||
case nir_jump_return:
|
||||
/* Should be lowered */
|
||||
|
|
@ -2754,8 +2730,16 @@ static void
|
|||
jay_emit_eot(struct nir_to_jay_state *nj)
|
||||
{
|
||||
jay_builder *b = &nj->bld;
|
||||
b->cursor = jay_after_block(nj->exit_block);
|
||||
|
||||
jay_emit_halt_target(nj);
|
||||
/* Jump target for HALT */
|
||||
if (nj->needs_final_halt) {
|
||||
if (nj->s->stage == MESA_SHADER_FRAGMENT) {
|
||||
assert(nj->s->helpers_tracked);
|
||||
} else {
|
||||
jay_HALT_TARGET(&nj->bld);
|
||||
}
|
||||
}
|
||||
|
||||
if (mesa_shader_stage_is_compute(nj->nir->info.stage)) {
|
||||
jay_def u0 = nj->payload.u0;
|
||||
|
|
@ -2773,12 +2757,18 @@ jay_emit_eot(struct nir_to_jay_state *nj)
|
|||
.uniform = true);
|
||||
} else if (nj->nir->info.stage == MESA_SHADER_VERTEX ||
|
||||
nj->nir->info.stage == MESA_SHADER_TESS_EVAL) {
|
||||
jay_block *block = jay_last_block(nj->f);
|
||||
jay_block *block = jay_last_source_block(nj->f);
|
||||
jay_inst *I = jay_last_inst(block);
|
||||
|
||||
assert(!nj->needs_final_halt && "halt not supported with URB");
|
||||
|
||||
/* TODO: What if this isn't the case? Do we need a no-op store...? */
|
||||
assert(I && I->op == JAY_OPCODE_SEND && jay_send_sfid(I) == GEN_SFID_URB);
|
||||
|
||||
/* Pluck out the final SEND and put it in the exit block */
|
||||
jay_set_send_eot(I, true);
|
||||
jay_remove_instruction(I);
|
||||
jay_builder_insert(b, I);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -3012,6 +3002,14 @@ setup_fragment_payload(struct nir_to_jay_state *nj, struct payload_builder *p)
|
|||
}
|
||||
}
|
||||
|
||||
/* INIT_HELPERS reads UGPRs but has no SSA write. Therefore to minimize
|
||||
* pressure, we want to hoist it as much as possible.
|
||||
*/
|
||||
if (nj->s->helpers_tracked) {
|
||||
jay_INIT_HELPERS(&nj->bld, jay_extract(nj->payload.u0, 15),
|
||||
payload_u1(nj, 15, 1));
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(split_gprs); ++i) {
|
||||
if (!jay_is_null(split[i]) && split_gprs[i].def->file == UGPR) {
|
||||
*(split_gprs[i].def) =
|
||||
|
|
@ -3178,7 +3176,11 @@ jay_from_nir_function(const struct intel_device_info *devinfo,
|
|||
jay_setup_payload(&nj);
|
||||
}
|
||||
|
||||
nj.exit_block = jay_create_block(&nj);
|
||||
jay_emit_cf_list(&nj, &impl->body);
|
||||
jay_block_add_successor(nj.current_block, nj.exit_block, GPR);
|
||||
|
||||
list_addtail(&nj.exit_block->link, &f->blocks);
|
||||
jay_emit_eot(&nj);
|
||||
jay_remove_unreachable_blocks(f);
|
||||
}
|
||||
|
|
@ -3216,8 +3218,9 @@ jay_compile(const struct intel_device_info *devinfo,
|
|||
INTEL_DEBUG(intel_debug_flag_for_shader_stage(nir->info.stage)) &&
|
||||
!(nir->info.internal || NIR_DEBUG(PRINT_INTERNAL));
|
||||
|
||||
bool track_helpers = false;
|
||||
unsigned simd_width =
|
||||
jay_process_nir(devinfo, nir, prog_data, key, archiver);
|
||||
jay_process_nir(devinfo, nir, prog_data, key, archiver, &track_helpers);
|
||||
|
||||
if (debug) {
|
||||
/* We can't use nir_print_shader since it reindexes SSA defs. */
|
||||
|
|
@ -3232,6 +3235,7 @@ jay_compile(const struct intel_device_info *devinfo,
|
|||
s->devinfo = devinfo;
|
||||
s->prog_data = prog_data;
|
||||
s->archiver = archiver;
|
||||
s->helpers_tracked = track_helpers;
|
||||
|
||||
nir_foreach_function_impl(impl, nir) {
|
||||
jay_from_nir_function(devinfo, nir, s, impl);
|
||||
|
|
@ -3288,6 +3292,10 @@ jay_compile(const struct intel_device_info *devinfo,
|
|||
JAY_PASS(s, jay_insert_payload_swizzle);
|
||||
}
|
||||
|
||||
if (s->stage == MESA_SHADER_FRAGMENT && s->helpers_tracked) {
|
||||
JAY_PASS(s, jay_lower_helpers);
|
||||
}
|
||||
|
||||
if (!(jay_debug & JAY_DBG_NOOPT)) {
|
||||
/* jay_assign_accumulators uses a conservative liveness analysis for
|
||||
* predication, so assign accumulators before predicating for better
|
||||
|
|
|
|||
|
|
@ -787,6 +787,12 @@ typedef struct jay_shader {
|
|||
unsigned scratch_size;
|
||||
unsigned payload_gprs, payload_ugprs, push_grfs;
|
||||
|
||||
/**
|
||||
* In a fragment shader, whether a helper invocation flag is tracked. Flag RA
|
||||
* must reserve the relevant flag.
|
||||
*/
|
||||
bool helpers_tracked;
|
||||
|
||||
/**
|
||||
* Ralloc linear context. Since we don't typically free as we go,
|
||||
* most allocations should go through this context for efficiency.
|
||||
|
|
@ -1126,7 +1132,7 @@ jay_new_block(jay_function *f)
|
|||
static inline bool
|
||||
jay_op_is_control_flow(enum jay_opcode op)
|
||||
{
|
||||
return op >= JAY_OPCODE_BRD && op <= JAY_OPCODE_LOOP_ONCE;
|
||||
return op >= JAY_OPCODE_BRD && op <= JAY_OPCODE_HALT;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -1201,6 +1207,9 @@ jay_first_predecessor(jay_block *block, enum jay_file file)
|
|||
#define jay_foreach_block_rev(f, v) \
|
||||
list_for_each_entry_rev(jay_block, v, &f->blocks, link)
|
||||
|
||||
#define jay_foreach_block_safe_rev(f, v) \
|
||||
list_for_each_entry_safe_rev(jay_block, v, &f->blocks, link)
|
||||
|
||||
#define jay_foreach_block_from(f, from, v) \
|
||||
list_for_each_entry_from(jay_block, v, from, &f->blocks, link)
|
||||
|
||||
|
|
@ -1238,7 +1247,7 @@ jay_first_predecessor(jay_block *block, enum jay_file file)
|
|||
jay_foreach_inst_in_block_safe(block, v)
|
||||
|
||||
#define jay_foreach_inst_in_func_safe_rev(func, block, v) \
|
||||
jay_foreach_block_rev(func, block) \
|
||||
jay_foreach_block_safe_rev(func, block) \
|
||||
jay_foreach_inst_in_block_safe_rev(block, v)
|
||||
|
||||
#define jay_foreach_inst_in_shader(s, func, inst) \
|
||||
|
|
@ -1355,6 +1364,15 @@ jay_last_block(jay_function *f)
|
|||
return list_last_entry(&f->blocks, jay_block, link);
|
||||
}
|
||||
|
||||
static inline jay_block *
|
||||
jay_last_source_block(jay_function *f)
|
||||
{
|
||||
if (list_is_empty(&f->blocks) || list_is_singular(&f->blocks))
|
||||
return NULL;
|
||||
else
|
||||
return list_last_entry(&jay_last_block(f)->link, jay_block, link);
|
||||
}
|
||||
|
||||
static inline jay_inst *
|
||||
jay_last_inst(jay_block *block)
|
||||
{
|
||||
|
|
@ -1373,11 +1391,14 @@ jay_next_block(jay_block *block)
|
|||
static inline void
|
||||
jay_block_add_successor(jay_block *block, jay_block *succ, enum jay_file file)
|
||||
{
|
||||
/* Prune duplicate successors so the caller doesn't need to worry */
|
||||
jay_block **succs = jay_successors(block, file);
|
||||
unsigned i = succs[0] ? 1 : 0;
|
||||
if (succs[0] == succ || succs[1] == succ) {
|
||||
return;
|
||||
}
|
||||
|
||||
assert(succ && succs[0] != succ && succs[1] != succ);
|
||||
assert(succs[i] == NULL && "at most 2 successors");
|
||||
unsigned i = succs[0] ? 1 : 0;
|
||||
assert(succ && succs[i] == NULL && "at most 2 successors");
|
||||
|
||||
succs[i] = succ;
|
||||
util_dynarray_append(jay_predecessors(succ, file), block);
|
||||
|
|
|
|||
191
src/intel/compiler/jay/jay_lower_helpers.c
Normal file
191
src/intel/compiler/jay/jay_lower_helpers.c
Normal file
|
|
@ -0,0 +1,191 @@
|
|||
/*
|
||||
* Copyright 2026 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include "compiler/gen/gen_enums.h"
|
||||
#include "util/list.h"
|
||||
#include "util/u_dynarray.h"
|
||||
#include "jay_builder.h"
|
||||
#include "jay_builder_opcodes.h"
|
||||
#include "jay_ir.h"
|
||||
#include "jay_opcodes.h"
|
||||
#include "jay_private.h"
|
||||
|
||||
struct ctx {
|
||||
jay_block *last_source_block;
|
||||
jay_def helper_flag;
|
||||
bool halted, uses_terminate;
|
||||
unsigned instr_left;
|
||||
};
|
||||
|
||||
/*
|
||||
* Takes src, a linked list containing the element pivot in the middle, and dst
|
||||
* an empty list. Moves all elements up to and including pivot from src to dst,
|
||||
* leaving the rest in dst. Semantically equivalent to a loop of list_move but
|
||||
* O(1) time regardless of the position of pivot in the list.
|
||||
*/
|
||||
static void
|
||||
list_partition(struct list_head *src,
|
||||
struct list_head *dst,
|
||||
struct list_head *pivot)
|
||||
{
|
||||
/* dst runs from src[0] to pivot */
|
||||
dst->next = src->next;
|
||||
dst->prev = pivot;
|
||||
dst->next->prev = dst;
|
||||
|
||||
/* src runs from pivot[1:] to end of src */
|
||||
src->next = pivot->next;
|
||||
src->prev = src->prev;
|
||||
|
||||
src->next->prev = src;
|
||||
pivot->next = dst;
|
||||
|
||||
list_validate(dst);
|
||||
list_validate(src);
|
||||
}
|
||||
|
||||
static void
|
||||
process_block(struct ctx *ctx, jay_builder *b, jay_block *block)
|
||||
{
|
||||
jay_foreach_inst_in_block_safe_rev(block, I) {
|
||||
b->cursor = jay_before_inst(I);
|
||||
|
||||
if (I->op == JAY_OPCODE_INIT_HELPERS) {
|
||||
jay_NOT(b, ctx->helper_flag, I->src[0])->type = JAY_TYPE_U16;
|
||||
|
||||
if (!jay_is_null(I->src[1])) {
|
||||
jay_def hi = ctx->helper_flag;
|
||||
hi.hi = true;
|
||||
jay_NOT(b, hi, I->src[1])->type = JAY_TYPE_U16;
|
||||
}
|
||||
|
||||
jay_remove_instruction(I);
|
||||
} else if (I->op == JAY_OPCODE_HALT) {
|
||||
ctx->halted = ctx->uses_terminate = true;
|
||||
} else if (I->op == JAY_OPCODE_DEMOTE) {
|
||||
enum gen_condition cond = I->conditional_mod;
|
||||
jay_def x = I->src[0], y = I->src[1];
|
||||
|
||||
/* Unconditional discard */
|
||||
if (!cond) {
|
||||
cond = GEN_CONDITION_EQ;
|
||||
I->type = JAY_TYPE_U32;
|
||||
x = y = jay_bare_reg(UGPR, 0);
|
||||
}
|
||||
|
||||
jay_inst *cmp = jay_CMP(b, I->type, cond, ctx->helper_flag, x, y);
|
||||
jay_add_predicate(b, cmp, jay_negate(ctx->helper_flag));
|
||||
jay_remove_instruction(I);
|
||||
|
||||
/* We are allowed to halt after a demote if all lanes are inactive
|
||||
* for performance, but it's not required for correctness. Only do
|
||||
* it if it's likely profitable.
|
||||
*
|
||||
* We assume a shader either uses SPIR-V demote or terminate, but
|
||||
* not both. If the shader uses terminate, there will be an actual
|
||||
* HALT instruction after us so we don't bother with a second HALT
|
||||
* here. Strictly there's a corner case here if all non-helpers are
|
||||
* terminated but lanes spawned as helpers are not terminated, but
|
||||
* this is probably reasonable as a tradeoff.
|
||||
*/
|
||||
if (ctx->instr_left > 6 && !ctx->uses_terminate) {
|
||||
jay_inst *halt = jay_HALT(b, true);
|
||||
halt = jay_add_predicate(b, halt, ctx->helper_flag);
|
||||
ctx->halted = true;
|
||||
|
||||
jay_block *split = jay_new_block(b->func);
|
||||
split->indent = block->indent;
|
||||
|
||||
list_partition(&block->instructions, &split->instructions,
|
||||
&halt->link);
|
||||
list_addtail(&split->link, &block->link);
|
||||
|
||||
/* The split block either falls through or jumps to the exit */
|
||||
for (unsigned file = GPR; file <= UGPR; ++file) {
|
||||
jay_foreach_predecessor(block, pred, file) {
|
||||
jay_block **succs = jay_successors(*pred, file);
|
||||
unsigned idx = succs[0] == block ? 0 : 1;
|
||||
succs[idx] = split;
|
||||
}
|
||||
}
|
||||
typed_memcpy(&split->physical_preds, &block->physical_preds, 1);
|
||||
typed_memcpy(&split->logical_preds, &block->logical_preds, 1);
|
||||
util_dynarray_init(&block->physical_preds, block);
|
||||
util_dynarray_init(&block->logical_preds, block);
|
||||
|
||||
jay_block_add_successor(split, block, GPR);
|
||||
jay_block_add_successor(split, jay_last_block(b->func), GPR);
|
||||
return;
|
||||
}
|
||||
} else if (I->op == JAY_OPCODE_HELPER_SEL) {
|
||||
jay_SEL(b, JAY_TYPE_U32, I->dst, I->src[0], I->src[1],
|
||||
ctx->helper_flag);
|
||||
jay_remove_instruction(I);
|
||||
} else if (I->op == JAY_OPCODE_SEND && jay_send_skip_helpers(I)) {
|
||||
if (jay_is_no_mask(I)) {
|
||||
/* jay_assign_flags ensured this is free for us, see logic there */
|
||||
jay_def t = jay_bare_reg(UFLAG, 0);
|
||||
jay_inst *not = jay_NOT(b, jay_null(), ctx->helper_flag);
|
||||
not->type = JAY_TYPE_U | b->shader->dispatch_width;
|
||||
jay_set_conditional_mod(b, not, t, GEN_CONDITION_NE);
|
||||
jay_add_predicate(b, I, t);
|
||||
} else {
|
||||
jay_add_predicate(b, I, jay_negate(ctx->helper_flag));
|
||||
}
|
||||
}
|
||||
|
||||
++ctx->instr_left;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
jay_lower_helpers(jay_shader *shader)
|
||||
{
|
||||
jay_function *entrypoint = jay_shader_get_entrypoint(shader);
|
||||
jay_block *exit_block = jay_last_block(entrypoint);
|
||||
jay_block *last_source_block = jay_last_source_block(entrypoint);
|
||||
|
||||
/* By ABI with jay_assign_flags, the last flag is used to track helpers */
|
||||
assert(shader->helpers_tracked);
|
||||
unsigned helper_flag_no = jay_num_regs(shader, FLAG) - 1;
|
||||
struct ctx ctx = { .helper_flag = jay_bare_reg(FLAG, helper_flag_no) };
|
||||
jay_builder b = jay_init_builder(entrypoint, jay_after_block(exit_block));
|
||||
|
||||
jay_foreach_block_rev(entrypoint, block) {
|
||||
process_block(&ctx, &b, block);
|
||||
}
|
||||
|
||||
/* Fill out the exit block */
|
||||
b.cursor = jay_after_block(exit_block);
|
||||
if (ctx.halted) {
|
||||
jay_HALT_TARGET(&b);
|
||||
}
|
||||
|
||||
/* Try to pluck out the last instruction and use it for EOT. This breaks SSA
|
||||
* dominance invariants but that's why this is a post-RA, post-sched pass.
|
||||
* Only SWSB has to deal with the resulting mess.
|
||||
*
|
||||
* There may be no such send (in case of an unconditional terminate). In that
|
||||
* case, insert a predicated-out null RT write to use for EOT.
|
||||
*/
|
||||
jay_inst *send = jay_last_inst(last_source_block);
|
||||
if (send && send->op == JAY_OPCODE_SEND && jay_send_eot(send)) {
|
||||
jay_remove_instruction(send);
|
||||
jay_builder_insert(&b, send);
|
||||
} else {
|
||||
jay_def dummy = jay_bare_reg(GPR, 0);
|
||||
dummy.num_values_m1 = 4 - 1;
|
||||
|
||||
unsigned op = shader->dispatch_width == 32 ?
|
||||
XE2_DATAPORT_RENDER_TARGET_WRITE_SIMD32_SINGLE_SOURCE :
|
||||
BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
|
||||
uint64_t desc = brw_fb_write_desc(shader->devinfo, 0, op, true, false);
|
||||
uint64_t ex_desc = (1 << 20) /* null rt */;
|
||||
|
||||
send = jay_SEND(&b, .sfid = GEN_SFID_RENDER_CACHE, .check_tdr = true,
|
||||
.msg_desc = desc | (ex_desc << 32), .nr_srcs = 1,
|
||||
.srcs = &dummy, .type = JAY_TYPE_U32, .eot = true);
|
||||
send = jay_add_predicate(&b, send, jay_negate(ctx.helper_flag));
|
||||
}
|
||||
}
|
||||
|
|
@ -96,7 +96,7 @@ try_swap_src01(jay_inst *I)
|
|||
if (I->op == JAY_OPCODE_SEL) {
|
||||
/* sel(a, b, p) = sel(b, a, !p) */
|
||||
I->src[2].negate ^= true;
|
||||
} else if (I->op == JAY_OPCODE_CMP) {
|
||||
} else if (I->op == JAY_OPCODE_CMP || I->op == JAY_OPCODE_DEMOTE) {
|
||||
I->conditional_mod = gen_condition_swap_sources(I->conditional_mod);
|
||||
} else if (I->op == JAY_OPCODE_BFN) {
|
||||
jay_set_bfn_ctrl(I, util_lut3_swap_sources(jay_bfn_ctrl(I), 0, 1));
|
||||
|
|
|
|||
|
|
@ -46,21 +46,6 @@ nj_index_ssa_defs(nir_shader *nir)
|
|||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_helper_invocation(nir_builder *b, nir_intrinsic_instr *intr, void *_)
|
||||
{
|
||||
if (intr->intrinsic != nir_intrinsic_load_helper_invocation)
|
||||
return false;
|
||||
|
||||
/* TODO: Is this right for multisampling? */
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
nir_def *active =
|
||||
nir_inot(b, nir_inverse_ballot(b, nir_load_dispatch_mask_intel(b)));
|
||||
|
||||
nir_def_replace(&intr->def, active);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_frag_coord(nir_builder *b, nir_intrinsic_instr *intr, void *simd_)
|
||||
{
|
||||
|
|
@ -178,8 +163,7 @@ insert_rt_store(nir_builder *b,
|
|||
nir_def *src0_colour,
|
||||
nir_def *depth,
|
||||
nir_def *stencil,
|
||||
nir_def *sample_mask,
|
||||
nir_def *disable)
|
||||
nir_def *sample_mask)
|
||||
{
|
||||
bool null_rt = target < 0;
|
||||
|
||||
|
|
@ -197,8 +181,7 @@ insert_rt_store(nir_builder *b,
|
|||
nir_def *src0_alpha = nir_channel_or_undef(b, src0_colour ?: colour, 3);
|
||||
|
||||
nir_store_render_target_intel(b, colour, dual_colour, src0_alpha,
|
||||
sample_mask, depth, stencil, disable,
|
||||
.target = target);
|
||||
sample_mask, depth, stencil, .target = target);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -216,14 +199,10 @@ lower_fragment_outputs(nir_function_impl *impl,
|
|||
|
||||
nir_def *undef = nir_undef(b, 1, 32);
|
||||
|
||||
nir_def *disable = b->shader->info.fs.uses_discard ?
|
||||
nir_is_helper_invocation(b, 1) :
|
||||
nir_imm_false(b);
|
||||
|
||||
if (ctx.dual_blend) {
|
||||
insert_rt_store(b, 0, ctx.colour[0], ctx.colour[1], NULL,
|
||||
ctx.depth ?: undef, ctx.stencil ?: undef,
|
||||
ctx.sample_mask ?: undef, disable);
|
||||
ctx.sample_mask ?: undef);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -239,83 +218,13 @@ lower_fragment_outputs(nir_function_impl *impl,
|
|||
if (ctx.colour[i]) {
|
||||
insert_rt_store(b, i, ctx.colour[i], NULL,
|
||||
i > 0 ? ctx.colour[0] : NULL, ctx.depth ?: undef,
|
||||
ctx.stencil ?: undef, ctx.sample_mask ?: undef,
|
||||
disable);
|
||||
ctx.stencil ?: undef, ctx.sample_mask ?: undef);
|
||||
}
|
||||
}
|
||||
|
||||
insert_rt_store(b, last, last >= 0 ? ctx.colour[last] : NULL, NULL,
|
||||
last > 0 ? ctx.colour[0] : NULL, ctx.depth ?: undef,
|
||||
ctx.stencil ?: undef, ctx.sample_mask ?: undef, disable);
|
||||
}
|
||||
|
||||
/**
|
||||
* Drop render target stores with unconditional discards.
|
||||
*/
|
||||
static bool
|
||||
opt_unconditional_discards(nir_shader *nir)
|
||||
{
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
||||
nir_block *block = nir_impl_last_block(impl);
|
||||
|
||||
bool progress = false;
|
||||
bool any_remaining_rt_writes = false;
|
||||
|
||||
nir_foreach_instr_reverse_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
|
||||
if (intr->intrinsic == nir_intrinsic_store_render_target_intel) {
|
||||
nir_scalar discard = nir_scalar_resolved(intr->src[6].ssa, 0);
|
||||
if (nir_scalar_is_const(discard) && nir_scalar_as_bool(discard)) {
|
||||
/* Drop store with unconditional discard */
|
||||
nir_instr_remove(instr);
|
||||
progress = true;
|
||||
} else {
|
||||
/* This RT store might actually happen */
|
||||
any_remaining_rt_writes = true;
|
||||
}
|
||||
} else if ((intr->intrinsic == nir_intrinsic_demote ||
|
||||
intr->intrinsic == nir_intrinsic_terminate) &&
|
||||
!any_remaining_rt_writes) {
|
||||
/* Delete unconditional demotes/terminates in the end block... */
|
||||
nir_instr_remove(instr);
|
||||
progress = true;
|
||||
} else {
|
||||
/* ...but stop if we find an intrinsic that has a side-effect */
|
||||
const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
|
||||
if (!(info->flags & NIR_INTRINSIC_CAN_ELIMINATE))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* See if discards still exist in the program and flag accordingly */
|
||||
nir->info.fs.uses_discard = false;
|
||||
|
||||
nir_foreach_block(block, impl) {
|
||||
nir_foreach_instr(instr, block) {
|
||||
if (instr->type == nir_instr_type_intrinsic) {
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
if (intr->intrinsic == nir_intrinsic_demote ||
|
||||
intr->intrinsic == nir_intrinsic_demote_if ||
|
||||
intr->intrinsic == nir_intrinsic_terminate ||
|
||||
intr->intrinsic == nir_intrinsic_terminate_if)
|
||||
nir->info.fs.uses_discard = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* If we eliminated all RT stores, add a Null RT store to end the thread. */
|
||||
if (!any_remaining_rt_writes) {
|
||||
nir_builder b = nir_builder_at(nir_after_impl(impl));
|
||||
nir_def *undef = nir_undef(&b, 1, 32);
|
||||
insert_rt_store(&b, -1, NULL, NULL, NULL, undef, undef, undef,
|
||||
nir_imm_true(&b));
|
||||
}
|
||||
|
||||
return nir_progress(progress, impl, nir_metadata_control_flow);
|
||||
ctx.stencil ?: undef, ctx.sample_mask ?: undef);
|
||||
}
|
||||
|
||||
unsigned
|
||||
|
|
@ -323,7 +232,8 @@ jay_process_nir(const struct intel_device_info *devinfo,
|
|||
nir_shader *nir,
|
||||
union brw_any_prog_data *prog_data,
|
||||
union brw_any_prog_key *key,
|
||||
debug_archiver *archiver)
|
||||
debug_archiver *archiver,
|
||||
bool *track_helpers)
|
||||
{
|
||||
enum mesa_shader_stage stage = nir->info.stage;
|
||||
struct brw_compiler compiler = { .devinfo = devinfo };
|
||||
|
|
@ -475,10 +385,14 @@ jay_process_nir(const struct intel_device_info *devinfo,
|
|||
|
||||
lower_fragment_outputs(nir_shader_get_entrypoint(nir), devinfo,
|
||||
key->fs.nr_color_regions, simd_width);
|
||||
JAY_NIR_PASS(nir_lower_helper_writes, true);
|
||||
JAY_NIR_PASS(nir_lower_is_helper_invocation);
|
||||
JAY_NIR_PASS(nir_shader_intrinsics_pass, lower_helper_invocation,
|
||||
nir_metadata_control_flow, NULL);
|
||||
|
||||
/* nir_lower_terminate_to_demote will hamper our ability to schedule
|
||||
* terminates (since it turns them into real control flow), so run
|
||||
* nir_opt_move_discards_to_top first as a prepass. That should help
|
||||
* scheduling demotes too (which is more important).
|
||||
*/
|
||||
JAY_NIR_PASS(nir_opt_move_discards_to_top);
|
||||
JAY_NIR_PASS(nir_lower_terminate_to_demote);
|
||||
|
||||
if (key->fs.alpha_to_coverage != INTEL_NEVER) {
|
||||
/* Run constant fold optimization in order to get the correct source
|
||||
|
|
@ -495,8 +409,6 @@ jay_process_nir(const struct intel_device_info *devinfo,
|
|||
*/
|
||||
brw_nir_optimize(pt);
|
||||
|
||||
NIR_PASS(_, nir, opt_unconditional_discards);
|
||||
|
||||
// TODO
|
||||
// JAY_NIR_PASS(brw_nir_move_interpolation_to_top);
|
||||
|
||||
|
|
@ -556,10 +468,31 @@ jay_process_nir(const struct intel_device_info *devinfo,
|
|||
|
||||
/* Run divergence analysis at the end */
|
||||
nir_sweep(nir);
|
||||
nj_index_ssa_defs(nir);
|
||||
nir_divergence_analysis(nir);
|
||||
|
||||
if (stage != MESA_SHADER_FRAGMENT)
|
||||
if (stage == MESA_SHADER_FRAGMENT) {
|
||||
/* Certain features require tracking helpers for correctness */
|
||||
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
||||
*track_helpers |= nir->info.fs.uses_discard || nir->info.writes_memory;
|
||||
*track_helpers |= BITSET_TEST(nir->info.system_values_read,
|
||||
SYSTEM_VALUE_HELPER_INVOCATION);
|
||||
|
||||
/* ...but this is more subtle. nir_opt_load_skip_helpers flags texturing
|
||||
* operations that we can skip for bandwidth savings. We need divergence
|
||||
* info for this, so we run late.
|
||||
*
|
||||
* We may or may not want to force track_helpers on if this makes
|
||||
* progress. Possibly driconf'ing on furmark makes sense.
|
||||
*/
|
||||
struct nir_opt_load_skip_helpers_options skip_helpers = {
|
||||
.no_add_divergence = true
|
||||
};
|
||||
JAY_NIR_PASS(nir_opt_load_skip_helpers, &skip_helpers);
|
||||
} else {
|
||||
jay_populate_prog_data(devinfo, nir, prog_data, key, nr_packed_regs);
|
||||
}
|
||||
|
||||
/* This must be the very last pass since nir_print itself will reindex! */
|
||||
nj_index_ssa_defs(nir);
|
||||
return simd_width;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -81,7 +81,6 @@ op('bfrev', 1, 'u32', Props.NEGATE)
|
|||
op('cbit', 1, 'u32')
|
||||
op('cmp', 2, 'u32', Props.NEGATE | Props.CMOD)
|
||||
|
||||
|
||||
# With an 8/16-bit type, `index` specifies the element index of the source
|
||||
# within the 32-bit word. For example, if src_type == U16 and index == 1, this
|
||||
# converts the upper 16-bits of the input.
|
||||
|
|
@ -134,9 +133,11 @@ op('schedule_barrier', 0, None, Props.NO_DEST)
|
|||
|
||||
for n in ['brd', 'illegal', 'goto', 'join', 'if', 'else',
|
||||
'endif', 'while', 'break', 'cont', 'call', 'calla', 'jmpi', 'ret',
|
||||
'loop_once', 'halt', 'halt_target']:
|
||||
'loop_once', 'halt_target']:
|
||||
op(n, 0, None, Props.NO_DEST)
|
||||
|
||||
op('halt', 0, None, Props.NO_DEST, ['bool predicate_all'])
|
||||
|
||||
op('send', 4, None, Props.SIDE_EFFECTS, [
|
||||
'gen_sfid sfid',
|
||||
'uint8_t sbid',
|
||||
|
|
@ -234,6 +235,15 @@ op('dpas', 3, 'u32', 0, [
|
|||
'uint8_t pad[3]',
|
||||
])
|
||||
|
||||
# Initialize helper invocations. Takes 16-bit halves of the dispatch mask.
|
||||
op('init_helpers', 2, 'u16', Props.NO_DEST)
|
||||
|
||||
# Compare the arguments and demote based on the result.
|
||||
op('demote', 2, 'u1 u16 u32 u64 s16 s32 s64 f16 f32 f64', Props.NEGATE | Props.NO_DEST)
|
||||
|
||||
# Equivalent to NIR bcsel(@is_helper_invocation, source 0, source 1)
|
||||
op('helper_sel', 2, 'u1 u32')
|
||||
|
||||
OPCODES = _opcodes
|
||||
|
||||
ENUMS: 'Mapping[str, tuple[str, list[str]]]' = {
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "compiler/gen/gen_enums.h"
|
||||
#include "util/bitset.h"
|
||||
#include "util/lut.h"
|
||||
#include "jay_builder.h"
|
||||
|
|
@ -121,6 +122,30 @@ propagate_not(jay_inst *I, unsigned s, jay_inst *mod)
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fuse demote(cmp(x, y) != 0) to demote(x CMP y).
|
||||
*/
|
||||
static void
|
||||
fuse_demote(jay_inst *demote, jay_inst **defs)
|
||||
{
|
||||
if (!(jay_is_ssa(demote->src[0]) &&
|
||||
jay_is_zero(demote->src[1]) &&
|
||||
demote->type == JAY_TYPE_U1 &&
|
||||
demote->conditional_mod == GEN_CONDITION_NE)) {
|
||||
return;
|
||||
}
|
||||
|
||||
jay_inst *cmp = defs[jay_index(demote->src[0])];
|
||||
if (cmp->op != JAY_OPCODE_CMP || cmp->predication) {
|
||||
return;
|
||||
}
|
||||
|
||||
demote->conditional_mod = cmp->conditional_mod;
|
||||
demote->src[0] = cmp->src[0];
|
||||
demote->src[1] = cmp->src[1];
|
||||
demote->type = cmp->type;
|
||||
}
|
||||
|
||||
static void
|
||||
propagate_forwards(jay_function *f)
|
||||
{
|
||||
|
|
@ -156,6 +181,11 @@ propagate_forwards(jay_function *f)
|
|||
if (I->op == JAY_OPCODE_PHI_SRC || I->op == JAY_OPCODE_SEND)
|
||||
continue;
|
||||
|
||||
/* We fuse demote forwards & upfront to avoid fighting cmod prop */
|
||||
if (I->op == JAY_OPCODE_DEMOTE) {
|
||||
fuse_demote(I, defs);
|
||||
}
|
||||
|
||||
jay_foreach_ssa_src(I, s) {
|
||||
/* Copy propagate whole vectors */
|
||||
jay_def src = I->src[s];
|
||||
|
|
|
|||
|
|
@ -34,7 +34,8 @@ unsigned jay_process_nir(const struct intel_device_info *devinfo,
|
|||
nir_shader *nir,
|
||||
union brw_any_prog_data *prog_data,
|
||||
union brw_any_prog_key *key,
|
||||
debug_archiver *archiver);
|
||||
debug_archiver *archiver,
|
||||
bool *track_helpers);
|
||||
|
||||
void jay_compute_liveness(jay_function *f);
|
||||
void jay_calculate_register_demands(jay_function *f);
|
||||
|
|
@ -84,6 +85,7 @@ void jay_schedule_pressure(jay_shader *s);
|
|||
|
||||
void jay_lower_pre_ra(jay_shader *s);
|
||||
void jay_lower_post_ra(jay_shader *s);
|
||||
void jay_lower_helpers(jay_shader *s);
|
||||
void jay_lower_spill(jay_function *func);
|
||||
void jay_lower_simd_width(jay_shader *s);
|
||||
void jay_lower_scoreboard(jay_shader *s);
|
||||
|
|
|
|||
|
|
@ -100,9 +100,17 @@ populate_dag(struct sched_ctx *ctx,
|
|||
address = ctx->dag.node;
|
||||
}
|
||||
|
||||
/* Serialize side effects for now */
|
||||
/* Serialize side effects for now, including SENDs which need to be
|
||||
* predicated away after a demote.
|
||||
*/
|
||||
if ((I->op == JAY_OPCODE_SEND && !jay_send_pure(I)) ||
|
||||
I->op == JAY_OPCODE_SCHEDULE_BARRIER) {
|
||||
I->op == JAY_OPCODE_SCHEDULE_BARRIER ||
|
||||
I->op == JAY_OPCODE_INIT_HELPERS ||
|
||||
I->op == JAY_OPCODE_DEMOTE ||
|
||||
I->op == JAY_OPCODE_HELPER_SEL ||
|
||||
(I->op == JAY_OPCODE_SEND &&
|
||||
func->shader->helpers_tracked &&
|
||||
jay_send_skip_helpers(I))) {
|
||||
|
||||
jay_dag_add_edge(&ctx->dag, sidefx);
|
||||
sidefx = ctx->dag.node;
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ max_simd_width(const jay_shader *shader, const jay_inst *I)
|
|||
I->op == JAY_OPCODE_EXTRACT_BYTE_PER_8LANES ||
|
||||
I->op == JAY_OPCODE_OFFSET_PACKED_PIXEL_COORDS ||
|
||||
I->op == JAY_OPCODE_DESWIZZLE_ODD ||
|
||||
I->op == JAY_OPCODE_INIT_HELPERS ||
|
||||
I->op == JAY_OPCODE_MUL_32 ||
|
||||
I->op == JAY_OPCODE_SHUFFLE ||
|
||||
I->op == JAY_OPCODE_ZIP_UGPR16) {
|
||||
|
|
|
|||
|
|
@ -79,7 +79,7 @@ to_gen_operand(
|
|||
gen_operand R;
|
||||
unsigned reg = d.reg, count = jay_num_values(d);
|
||||
unsigned offset_B = 0, grf = 0;
|
||||
assert(!hi || d.file == GPR);
|
||||
assert(!hi || d.file == GPR || d.file == FLAG);
|
||||
|
||||
if (count && (d.file == GPR || d.file == UGPR)) {
|
||||
struct jay_register_block block =
|
||||
|
|
@ -189,7 +189,8 @@ to_gen_operand(
|
|||
* SIMD1 instructions and are never SIMD split.
|
||||
*/
|
||||
assert(simd_offs == 0 || idx >= 0);
|
||||
unsigned offs_B = d.reg * (f->shader->dispatch_width / 8);
|
||||
unsigned offs_B =
|
||||
(d.reg * (f->shader->dispatch_width / 8)) + (hi ? 2 : 0);
|
||||
R = gen_flag(offs_B / 2);
|
||||
} else if (d.file == J_ADDRESS) {
|
||||
R = gen_address(d.reg);
|
||||
|
|
@ -580,6 +581,14 @@ emit(struct jay_codegen *jc,
|
|||
}
|
||||
break;
|
||||
|
||||
case JAY_OPCODE_HALT:
|
||||
if (jay_halt_predicate_all(I)) {
|
||||
assert(I->predication);
|
||||
gen->pred_control =
|
||||
jc->devinfo->ver >= 20 ? GEN_PREDICATE_XE2_ALL : GEN_PREDICATE_ALLV;
|
||||
}
|
||||
break;
|
||||
|
||||
case JAY_OPCODE_HALT_TARGET:
|
||||
/* HALT temporarily disables channels, and the same instruction is used
|
||||
* to re-enable them: once all channels are disabled, then they are
|
||||
|
|
|
|||
|
|
@ -33,7 +33,9 @@ block_state_for_inst(jay_inst *I)
|
|||
if (I->op == JAY_OPCODE_PHI_DST || I->op == JAY_OPCODE_PRELOAD) {
|
||||
return STATE_PHI_DST;
|
||||
} else if (I->op == JAY_OPCODE_PHI_SRC ||
|
||||
(jay_op_is_control_flow(I->op) && I->op != JAY_OPCODE_ELSE)) {
|
||||
(jay_op_is_control_flow(I->op) &&
|
||||
I->op != JAY_OPCODE_ELSE &&
|
||||
I->op != JAY_OPCODE_HALT_TARGET)) {
|
||||
return STATE_LATE;
|
||||
} else {
|
||||
return STATE_NORMAL;
|
||||
|
|
@ -238,10 +240,6 @@ validate_inst(struct validate_state *validate, jay_inst *I)
|
|||
validate_flagness(validate, I->dst, I->type, "destination");
|
||||
validate_flagness(validate, I->cond_flag, JAY_TYPE_U1, "cond_flag");
|
||||
|
||||
CHECK(!I->conditional_mod ||
|
||||
!jay_is_null(I->cond_flag) ||
|
||||
I->op == JAY_OPCODE_CSEL);
|
||||
|
||||
/* These assumptions are baked into the definition of broadcast_flag and
|
||||
* required to ensure correctness with the lane masking.
|
||||
*/
|
||||
|
|
@ -256,8 +254,10 @@ validate_inst(struct validate_state *validate, jay_inst *I)
|
|||
CHECK(I->cond_flag.file != FLAG || I->dst.file != UGPR);
|
||||
|
||||
/* Standard modifiers only allowed on some instructions */
|
||||
CHECK(!I->conditional_mod || opinfo->cmod || I->op == JAY_OPCODE_CSEL);
|
||||
CHECK(!I->saturate || opinfo->sat);
|
||||
CHECK(!I->conditional_mod ||
|
||||
(I->op == JAY_OPCODE_CSEL || I->op == JAY_OPCODE_DEMOTE) ||
|
||||
(!jay_is_null(I->cond_flag) && opinfo->cmod));
|
||||
|
||||
unsigned num_srcs = I->num_srcs;
|
||||
|
||||
|
|
|
|||
|
|
@ -54,6 +54,7 @@ libintel_compiler_jay_files = files(
|
|||
'jay_ir.h',
|
||||
'jay_insert_fp_mode.c',
|
||||
'jay_liveness.c',
|
||||
'jay_lower_helpers.c',
|
||||
'jay_lower_post_ra.c',
|
||||
'jay_lower_pre_ra.c',
|
||||
'jay_lower_scoreboard.c',
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue