mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-24 10:38:11 +02:00
jay: Implement fragment shader stencil writes
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41688>
This commit is contained in:
parent
b01d286083
commit
49299050ea
6 changed files with 34 additions and 12 deletions
|
|
@ -462,6 +462,7 @@ struct jayb_send_params {
|
|||
enum jay_type src_type[2];
|
||||
unsigned nr_srcs;
|
||||
uint32_t ex_desc_imm;
|
||||
int split; /**< explicit split point */
|
||||
bool eot;
|
||||
bool check_tdr;
|
||||
bool uniform;
|
||||
|
|
@ -538,15 +539,9 @@ _jay_SEND(jay_builder *b, const struct jayb_send_params p)
|
|||
I->src[2] = p.nr_srcs > 0 ? p.srcs[0] : jay_null();
|
||||
I->src[3] = p.nr_srcs > 1 ? p.srcs[1] : jay_null();
|
||||
} else {
|
||||
/* Otherwise, we need to pick a point to split at.
|
||||
*
|
||||
* Heuristic: don't split render targer writes becuase RA gets confused
|
||||
* with the EOT requirements. Split everything else in half.
|
||||
*
|
||||
* TODO: Come up with a better heuristic.
|
||||
*/
|
||||
/* Otherwise, we need to pick a point to split at. */
|
||||
assert(info->type_0 == info->type_1);
|
||||
unsigned split = !p.check_tdr ? (p.nr_srcs / 2) : p.nr_srcs;
|
||||
unsigned split = p.split > 0 ? p.split : p.nr_srcs / 2;
|
||||
I->src[2] = jay_collect_vectors(b, &p.srcs[0], split);
|
||||
I->src[3] = jay_collect_vectors(b, &p.srcs[split], p.nr_srcs - split);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -804,6 +804,7 @@ jay_emit_fb_write(jay_builder *b, nir_intrinsic_instr *intr)
|
|||
jay_def srcs[4 + 16 + 4 + 1 + 16];
|
||||
|
||||
unsigned len = 0;
|
||||
int split = -1;
|
||||
|
||||
if (!jay_is_null(src0_alpha))
|
||||
srcs[len++] = jay_as_gpr(b, src0_alpha);
|
||||
|
|
@ -816,7 +817,16 @@ jay_emit_fb_write(jay_builder *b, nir_intrinsic_instr *intr)
|
|||
if (!jay_is_null(depth))
|
||||
srcs[len++] = jay_as_gpr(b, depth);
|
||||
|
||||
assert(jay_is_null(stencil) && "TODO: stencil");
|
||||
if (!jay_is_null(stencil)) {
|
||||
jay_def packed = jay_alloc_def(b, UGPR, jay_ugpr_per_grf(b->shader));
|
||||
jay_BYTE_PACK(b, packed, jay_as_gpr(b, stencil));
|
||||
|
||||
/* Split send before stencil */
|
||||
split = len;
|
||||
|
||||
for (unsigned i = 0; i < jay_num_values(packed); i++)
|
||||
srcs[len++] = jay_extract(packed, i);
|
||||
}
|
||||
|
||||
/* Optimize out unconditional discards (probably should do this in NIR) */
|
||||
if (nir_src_is_const(intr->src[5]) && nir_src_as_bool(intr->src[5])) {
|
||||
|
|
@ -824,10 +834,15 @@ jay_emit_fb_write(jay_builder *b, nir_intrinsic_instr *intr)
|
|||
srcs[i] = jay_UNDEF_u32(b);
|
||||
}
|
||||
|
||||
/* Our current send splitting heuristic is bad, override it. */
|
||||
if (split == -1) {
|
||||
split = len;
|
||||
}
|
||||
|
||||
jay_inst *send =
|
||||
jay_SEND(b, .sfid = BRW_SFID_RENDER_CACHE, .check_tdr = true,
|
||||
.msg_desc = desc | (ex_desc << 32), .srcs = srcs, .nr_srcs = len,
|
||||
.type = JAY_TYPE_U32, .eot = last);
|
||||
.type = JAY_TYPE_U32, .eot = last, .split = split);
|
||||
|
||||
/* Handle the disable predicate. It is logically inverted. */
|
||||
if (!nir_src_is_const(intr->src[5]) || nir_src_as_bool(intr->src[5])) {
|
||||
|
|
|
|||
|
|
@ -151,7 +151,6 @@ collect_fragment_output(nir_builder *b, nir_intrinsic_instr *intr, void *ctx_)
|
|||
} else if (loc == FRAG_RESULT_DEPTH) {
|
||||
out = &ctx->depth;
|
||||
} else if (loc == FRAG_RESULT_STENCIL) {
|
||||
UNREACHABLE("todo");
|
||||
out = &ctx->stencil;
|
||||
} else if (loc == FRAG_RESULT_SAMPLE_MASK) {
|
||||
UNREACHABLE("todo");
|
||||
|
|
|
|||
|
|
@ -166,6 +166,12 @@ op('extract_byte_per_8lanes', 2, 'u32')
|
|||
op('shr_odd_subspans_by_4', 1, 'u16')
|
||||
op('and_u32_u16', 2, 'u32')
|
||||
|
||||
# Copy the first byte of each lane, treating the destination as if it were
|
||||
# effectively JAY_STRIDE_1 (which doesn't exist). Because the destination
|
||||
# doesn't follow proper lane alignments, this should not write to GPRs.
|
||||
# This is used for stencil outputs in render target write messages.
|
||||
op('byte_pack', 1, 'u32')
|
||||
|
||||
# Pixel coord calculations. expand_quad replicates out the per-2x2 values from
|
||||
# its source g0.[10...13] and - in the case of SIMD32 - g1.[10...13] into a
|
||||
# per-lane value. Then offset_packed_pixel_coords adds the appropriate packed
|
||||
|
|
|
|||
|
|
@ -153,7 +153,9 @@ propagate_forwards(jay_function *f)
|
|||
}
|
||||
|
||||
/* Don't propagate into phis yet - TODO: File awareness */
|
||||
if (I->op == JAY_OPCODE_PHI_SRC || I->op == JAY_OPCODE_SEND)
|
||||
if (I->op == JAY_OPCODE_PHI_SRC ||
|
||||
I->op == JAY_OPCODE_SEND ||
|
||||
I->op == JAY_OPCODE_BYTE_PACK)
|
||||
continue;
|
||||
|
||||
jay_foreach_ssa_src(I, s) {
|
||||
|
|
|
|||
|
|
@ -488,6 +488,11 @@ emit(struct brw_codegen *p,
|
|||
brw_MOV(p, dst, stride(retype(SRC(simd_offs), BRW_TYPE_UB), 1, 8, 0));
|
||||
break;
|
||||
|
||||
case JAY_OPCODE_BYTE_PACK:
|
||||
brw_MOV(p, stride(retype(dst, BRW_TYPE_UB), 1, 1, 0),
|
||||
stride(retype(SRC(0), BRW_TYPE_UB), 4, 1, 0));
|
||||
break;
|
||||
|
||||
case JAY_OPCODE_SHR_ODD_SUBSPANS_BY_4:
|
||||
brw_SHR(p, dst, SRC(0), brw_imm_uv(0x44440000));
|
||||
break;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue