mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 11:18:08 +02:00
pan: Centralize preload registers
Rather than having preload registers hardcoded over multiple files, gather them in one place with an enum abstraction. This should simplify updates to the preload registers. Reviewed-by: Eric R. Smith <eric.smith@collabora.com> Reviewed-by: Lorenzo Rossi <lorenzo.rossi@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40643>
This commit is contained in:
parent
1e052f0bb5
commit
1f0370616a
4 changed files with 228 additions and 61 deletions
|
|
@ -321,8 +321,9 @@ bi_make_affinity(uint64_t clobber, unsigned count, bool split_file)
|
||||||
static void
|
static void
|
||||||
bi_mark_interference(bi_block *block, struct lcra_state *l, uint8_t *live,
|
bi_mark_interference(bi_block *block, struct lcra_state *l, uint8_t *live,
|
||||||
uint64_t preload_live, unsigned node_count, bool is_blend,
|
uint64_t preload_live, unsigned node_count, bool is_blend,
|
||||||
bool split_file, bool aligned_sr)
|
bool split_file, unsigned arch)
|
||||||
{
|
{
|
||||||
|
bool aligned_sr = arch >= 9;
|
||||||
bi_foreach_instr_in_block_rev(block, ins) {
|
bi_foreach_instr_in_block_rev(block, ins) {
|
||||||
/* Mark all registers live after the instruction as
|
/* Mark all registers live after the instruction as
|
||||||
* interfering with the destination */
|
* interfering with the destination */
|
||||||
|
|
@ -383,8 +384,10 @@ bi_mark_interference(bi_block *block, struct lcra_state *l, uint8_t *live,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!is_blend && ins->op == BI_OPCODE_BLEND) {
|
if (!is_blend && ins->op == BI_OPCODE_BLEND) {
|
||||||
/* Blend shaders might clobber r0-r15, r48. */
|
/* Blend shaders might clobber r0-r15, blend link reg. */
|
||||||
uint64_t clobber = BITFIELD64_MASK(16) | BITFIELD64_BIT(48);
|
uint64_t clobber =
|
||||||
|
BITFIELD64_MASK(16) |
|
||||||
|
BITFIELD64_BIT(bi_preload_reg(BI_PRELOAD_BLEND_LINK, arch));
|
||||||
|
|
||||||
for (unsigned i = 0; i < node_count; ++i) {
|
for (unsigned i = 0; i < node_count; ++i) {
|
||||||
if (live[i])
|
if (live[i])
|
||||||
|
|
@ -410,7 +413,7 @@ bi_compute_interference(bi_context *ctx, struct lcra_state *l, bool full_regs)
|
||||||
uint8_t *live = mem_dup(blk->live_out, ctx->ssa_alloc);
|
uint8_t *live = mem_dup(blk->live_out, ctx->ssa_alloc);
|
||||||
|
|
||||||
bi_mark_interference(blk, l, live, blk->reg_live_out, ctx->ssa_alloc,
|
bi_mark_interference(blk, l, live, blk->reg_live_out, ctx->ssa_alloc,
|
||||||
ctx->inputs->is_blend, !full_regs, ctx->arch >= 9);
|
ctx->inputs->is_blend, !full_regs, ctx->arch);
|
||||||
|
|
||||||
free(live);
|
free(live);
|
||||||
}
|
}
|
||||||
|
|
@ -438,36 +441,43 @@ bi_allocate_registers(bi_context *ctx, bool *success, bool full_regs)
|
||||||
bi_foreach_dest(ins, d)
|
bi_foreach_dest(ins, d)
|
||||||
l->affinity[ins->dest[d].value] = default_affinity;
|
l->affinity[ins->dest[d].value] = default_affinity;
|
||||||
|
|
||||||
/* Blend shaders expect the src colour to be in r0-r3 */
|
/* Blend shaders expect the src colour to be in blend_src0_c0
|
||||||
|
* through c3 */
|
||||||
if (ins->op == BI_OPCODE_BLEND && !ctx->inputs->is_blend) {
|
if (ins->op == BI_OPCODE_BLEND && !ctx->inputs->is_blend) {
|
||||||
assert(bi_is_ssa(ins->src[0]));
|
assert(bi_is_ssa(ins->src[0]));
|
||||||
l->solutions[ins->src[0].value] = 0;
|
l->solutions[ins->src[0].value] =
|
||||||
|
bi_preload_reg(BI_PRELOAD_BLEND_SRC0_C0, ctx->arch);
|
||||||
|
|
||||||
/* Dual source blend input in r4-r7 */
|
/* Dual source blend input in blend_src1_c0 through c3 */
|
||||||
if (bi_is_ssa(ins->src[4]))
|
if (bi_is_ssa(ins->src[4]))
|
||||||
l->solutions[ins->src[4].value] = 4;
|
l->solutions[ins->src[4].value] =
|
||||||
|
bi_preload_reg(BI_PRELOAD_BLEND_SRC1_C0, ctx->arch);
|
||||||
|
|
||||||
/* Writes to R48 */
|
/* Writes to blend link */
|
||||||
if (!bi_is_null(ins->dest[0]))
|
if (!bi_is_null(ins->dest[0]))
|
||||||
l->solutions[ins->dest[0].value] = 48;
|
l->solutions[ins->dest[0].value] =
|
||||||
|
bi_preload_reg(BI_PRELOAD_BLEND_LINK, ctx->arch);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Coverage mask writes stay in R60 */
|
/* Coverage mask writes stay in the cumulative coverage reg */
|
||||||
if ((ins->op == BI_OPCODE_ATEST || ins->op == BI_OPCODE_ZS_EMIT) &&
|
if ((ins->op == BI_OPCODE_ATEST || ins->op == BI_OPCODE_ZS_EMIT) &&
|
||||||
!bi_is_null(ins->dest[0])) {
|
!bi_is_null(ins->dest[0])) {
|
||||||
l->solutions[ins->dest[0].value] = 60;
|
l->solutions[ins->dest[0].value] =
|
||||||
|
bi_preload_reg(BI_PRELOAD_CUMULATIVE_COVERAGE, ctx->arch);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Experimentally, it seems coverage masks inputs to ATEST must
|
/* Experimentally, it seems coverage masks inputs to ATEST must
|
||||||
* be in R60. Otherwise coverage mask writes do not work with
|
* be in the cumulative coverage reg. Otherwise coverage mask
|
||||||
* early-ZS with pixel-frequency-shading (this combination of
|
* writes do not work with early-ZS with pixel-frequency-shading
|
||||||
* settings is legal if depth/stencil writes are disabled).
|
* (this combination of settings is legal if depth/stencil
|
||||||
* Allowing a FAU index also seems to work on Valhall, at least.
|
* writes are disabled). Allowing a FAU index also seems to
|
||||||
|
* work on Valhall, at least.
|
||||||
*/
|
*/
|
||||||
if (ins->op == BI_OPCODE_ATEST) {
|
if (ins->op == BI_OPCODE_ATEST) {
|
||||||
assert(bi_is_ssa(ins->src[0]) || ins->src[0].type == BI_INDEX_FAU);
|
assert(bi_is_ssa(ins->src[0]) || ins->src[0].type == BI_INDEX_FAU);
|
||||||
if (bi_is_ssa(ins->src[0]))
|
if (bi_is_ssa(ins->src[0]))
|
||||||
l->solutions[ins->src[0].value] = 60;
|
l->solutions[ins->src[0].value] =
|
||||||
|
bi_preload_reg(BI_PRELOAD_CUMULATIVE_COVERAGE, ctx->arch);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -492,8 +502,10 @@ bi_allocate_registers(bi_context *ctx, bool *success, bool full_regs)
|
||||||
|
|
||||||
if (ctx->inputs->is_blend) {
|
if (ctx->inputs->is_blend) {
|
||||||
/* We're allowed to coalesce the moves to these */
|
/* We're allowed to coalesce the moves to these */
|
||||||
affinity |= BITFIELD64_BIT(48);
|
affinity |=
|
||||||
affinity |= BITFIELD64_BIT(60);
|
BITFIELD64_BIT(bi_preload_reg(BI_PRELOAD_BLEND_LINK, ctx->arch));
|
||||||
|
affinity |= BITFIELD64_BIT(
|
||||||
|
bi_preload_reg(BI_PRELOAD_CUMULATIVE_COVERAGE, ctx->arch));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Try to coalesce */
|
/* Try to coalesce */
|
||||||
|
|
@ -595,14 +607,15 @@ bi_choose_spill_node(bi_context *ctx, struct lcra_state *l)
|
||||||
bi_foreach_instr_global(ctx, ins) {
|
bi_foreach_instr_global(ctx, ins) {
|
||||||
bi_foreach_dest(ins, d) {
|
bi_foreach_dest(ins, d) {
|
||||||
/* Don't allow spilling coverage mask writes because the
|
/* Don't allow spilling coverage mask writes because the
|
||||||
* register preload logic assumes it will stay in R60.
|
* register preload logic assumes it will stay in the
|
||||||
* This could be optimized.
|
* cumulative coverage reg. This could be optimized.
|
||||||
*/
|
*/
|
||||||
if (ins->no_spill || ins->op == BI_OPCODE_ATEST ||
|
if (ins->no_spill || ins->op == BI_OPCODE_ATEST ||
|
||||||
ins->op == BI_OPCODE_ZS_EMIT ||
|
ins->op == BI_OPCODE_ZS_EMIT ||
|
||||||
(ins->op == BI_OPCODE_MOV_I32 &&
|
(ins->op == BI_OPCODE_MOV_I32 &&
|
||||||
ins->src[0].type == BI_INDEX_REGISTER &&
|
ins->src[0].type == BI_INDEX_REGISTER &&
|
||||||
ins->src[0].value == 60)) {
|
ins->src[0].value ==
|
||||||
|
bi_preload_reg(BI_PRELOAD_CUMULATIVE_COVERAGE, ctx->arch))) {
|
||||||
BITSET_SET(no_spill, ins->dest[d].value);
|
BITSET_SET(no_spill, ins->dest[d].value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -32,8 +32,9 @@ static void pan_stats_verbose(FILE *f, const char *prefix, bi_context *ctx,
|
||||||
static bi_block *emit_cf_list(bi_context *ctx, struct exec_list *list);
|
static bi_block *emit_cf_list(bi_context *ctx, struct exec_list *list);
|
||||||
|
|
||||||
static bi_index
|
static bi_index
|
||||||
bi_preload(bi_builder *b, unsigned reg)
|
bi_preload(bi_builder *b, enum bi_preload val)
|
||||||
{
|
{
|
||||||
|
unsigned reg = bi_preload_reg(val, b->shader->arch);
|
||||||
if (bi_is_null(b->shader->preloaded[reg])) {
|
if (bi_is_null(b->shader->preloaded[reg])) {
|
||||||
/* Insert at the beginning of the shader */
|
/* Insert at the beginning of the shader */
|
||||||
bi_builder b_ = *b;
|
bi_builder b_ = *b;
|
||||||
|
|
@ -50,7 +51,7 @@ static bi_index
|
||||||
bi_coverage(bi_builder *b)
|
bi_coverage(bi_builder *b)
|
||||||
{
|
{
|
||||||
if (bi_is_null(b->shader->coverage))
|
if (bi_is_null(b->shader->coverage))
|
||||||
b->shader->coverage = bi_preload(b, 60);
|
b->shader->coverage = bi_preload(b, BI_PRELOAD_CUMULATIVE_COVERAGE);
|
||||||
|
|
||||||
return b->shader->coverage;
|
return b->shader->coverage;
|
||||||
}
|
}
|
||||||
|
|
@ -63,20 +64,20 @@ bi_coverage(bi_builder *b)
|
||||||
static inline bi_index
|
static inline bi_index
|
||||||
bi_vertex_id(bi_builder *b)
|
bi_vertex_id(bi_builder *b)
|
||||||
{
|
{
|
||||||
return bi_preload(b, (b->shader->arch >= 9) ? 60 : 61);
|
return bi_preload(b, BI_PRELOAD_VERTEX_ID);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bi_index
|
static inline bi_index
|
||||||
bi_instance_id(bi_builder *b)
|
bi_instance_id(bi_builder *b)
|
||||||
{
|
{
|
||||||
return bi_preload(b, (b->shader->arch >= 9) ? 61 : 62);
|
return bi_preload(b, BI_PRELOAD_INSTANCE_ID);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bi_index
|
static inline bi_index
|
||||||
bi_draw_id(bi_builder *b)
|
bi_draw_id(bi_builder *b)
|
||||||
{
|
{
|
||||||
assert(b->shader->arch >= 9);
|
assert(b->shader->arch >= 9);
|
||||||
return bi_preload(b, 62);
|
return bi_preload(b, BI_PRELOAD_DRAW_ID);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -258,8 +259,9 @@ bi_varying_src0_for_barycentric(bi_builder *b, nir_intrinsic_instr *intr)
|
||||||
{
|
{
|
||||||
switch (intr->intrinsic) {
|
switch (intr->intrinsic) {
|
||||||
case nir_intrinsic_load_barycentric_centroid:
|
case nir_intrinsic_load_barycentric_centroid:
|
||||||
|
return bi_preload(b, BI_PRELOAD_CENTROID_ID);
|
||||||
case nir_intrinsic_load_barycentric_sample:
|
case nir_intrinsic_load_barycentric_sample:
|
||||||
return bi_preload(b, 61);
|
return bi_preload(b, BI_PRELOAD_SAMPLE_ID);
|
||||||
|
|
||||||
/* Need to put the sample ID in the top 16-bits */
|
/* Need to put the sample ID in the top 16-bits */
|
||||||
case nir_intrinsic_load_barycentric_at_sample:
|
case nir_intrinsic_load_barycentric_at_sample:
|
||||||
|
|
@ -328,7 +330,8 @@ bi_varying_src0_for_barycentric(bi_builder *b, nir_intrinsic_instr *intr)
|
||||||
|
|
||||||
case nir_intrinsic_load_barycentric_pixel:
|
case nir_intrinsic_load_barycentric_pixel:
|
||||||
default:
|
default:
|
||||||
return b->shader->arch >= 9 ? bi_preload(b, 61) : bi_dontcare(b);
|
return b->shader->arch >= 9 ? bi_preload(b, BI_PRELOAD_CENTROID_ID)
|
||||||
|
: bi_dontcare(b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -550,7 +553,8 @@ bi_emit_lea_attr(bi_builder *b, nir_intrinsic_instr *intr)
|
||||||
unsigned snap4 = 0x5E;
|
unsigned snap4 = 0x5E;
|
||||||
uint32_t format = identity | (snap4 << 12) | (regfmt << 24);
|
uint32_t format = identity | (snap4 << 12) | (regfmt << 24);
|
||||||
bi_collect_v3i32_to(b, bi_def_index(&intr->def),
|
bi_collect_v3i32_to(b, bi_def_index(&intr->def),
|
||||||
bi_preload(b, 58), bi_preload(b, 59),
|
bi_preload(b, BI_PRELOAD_POS_RESULT_PTR_LO),
|
||||||
|
bi_preload(b, BI_PRELOAD_POS_RESULT_PTR_HI),
|
||||||
bi_imm_u32(format));
|
bi_imm_u32(format));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
@ -838,8 +842,8 @@ bi_load_sample_id_to(bi_builder *b, bi_index dst)
|
||||||
* seem to read garbage (despite being architecturally defined
|
* seem to read garbage (despite being architecturally defined
|
||||||
* as zero), so use a 5-bit mask instead of 8-bits */
|
* as zero), so use a 5-bit mask instead of 8-bits */
|
||||||
|
|
||||||
bi_rshift_and_i32_to(b, dst, bi_preload(b, 61), bi_imm_u32(0x1f),
|
bi_rshift_and_i32_to(b, dst, bi_preload(b, BI_PRELOAD_SAMPLE_ID),
|
||||||
bi_imm_u8(16), false);
|
bi_imm_u32(0x1f), bi_imm_u8(16), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bi_index
|
static bi_index
|
||||||
|
|
@ -872,12 +876,24 @@ static void
|
||||||
bi_emit_load_blend_input(bi_builder *b, nir_intrinsic_instr *instr)
|
bi_emit_load_blend_input(bi_builder *b, nir_intrinsic_instr *instr)
|
||||||
{
|
{
|
||||||
nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
|
nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
|
||||||
unsigned base = sem.dual_source_blend_index * 4;
|
|
||||||
unsigned size = nir_alu_type_get_type_size(nir_intrinsic_dest_type(instr));
|
unsigned size = nir_alu_type_get_type_size(nir_intrinsic_dest_type(instr));
|
||||||
assert(size == 16 || size == 32);
|
assert(size == 16 || size == 32);
|
||||||
|
|
||||||
bi_index srcs[] = {bi_preload(b, base + 0), bi_preload(b, base + 1),
|
bi_index srcs[4];
|
||||||
bi_preload(b, base + 2), bi_preload(b, base + 3)};
|
switch (sem.dual_source_blend_index) {
|
||||||
|
case 0:
|
||||||
|
srcs[0] = bi_preload(b, BI_PRELOAD_BLEND_SRC0_C0);
|
||||||
|
srcs[1] = bi_preload(b, BI_PRELOAD_BLEND_SRC0_C1);
|
||||||
|
srcs[2] = bi_preload(b, BI_PRELOAD_BLEND_SRC0_C2);
|
||||||
|
srcs[3] = bi_preload(b, BI_PRELOAD_BLEND_SRC0_C3);
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
srcs[0] = bi_preload(b, BI_PRELOAD_BLEND_SRC1_C0);
|
||||||
|
srcs[1] = bi_preload(b, BI_PRELOAD_BLEND_SRC1_C1);
|
||||||
|
srcs[2] = bi_preload(b, BI_PRELOAD_BLEND_SRC1_C2);
|
||||||
|
srcs[3] = bi_preload(b, BI_PRELOAD_BLEND_SRC1_C3);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
bi_emit_collect_to(b, bi_def_index(&instr->def), srcs, size == 32 ? 4 : 2);
|
bi_emit_collect_to(b, bi_def_index(&instr->def), srcs, size == 32 ? 4 : 2);
|
||||||
}
|
}
|
||||||
|
|
@ -1759,7 +1775,7 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_intrinsic_load_cumulative_coverage_pan:
|
case nir_intrinsic_load_cumulative_coverage_pan:
|
||||||
bi_mov_i32_to(b, dst, bi_preload(b, 60));
|
bi_mov_i32_to(b, dst, bi_preload(b, BI_PRELOAD_CUMULATIVE_COVERAGE));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_intrinsic_load_blend_descriptor_pan: {
|
case nir_intrinsic_load_blend_descriptor_pan: {
|
||||||
|
|
@ -1841,16 +1857,15 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
|
||||||
}
|
}
|
||||||
|
|
||||||
case nir_intrinsic_blend_return_pan:
|
case nir_intrinsic_blend_return_pan:
|
||||||
/* Jump back to the fragment shader, return address is stored
|
/* Jump back to the fragment shader. On Valhall, only jump if the address
|
||||||
* in r48 (see above). On Valhall, only jump if the address is
|
* is nonzero. The check is free there and it implements the "jump to 0
|
||||||
* nonzero. The check is free there and it implements the "jump
|
* terminates the blend shader" that's automatic on Bifrost.
|
||||||
* to 0 terminates the blend shader" that's automatic on
|
|
||||||
* Bifrost.
|
|
||||||
*/
|
*/
|
||||||
if (b->shader->arch >= 9)
|
if (b->shader->arch >= 9)
|
||||||
bi_branchzi(b, bi_preload(b, 48), bi_preload(b, 48), BI_CMPF_NE);
|
bi_branchzi(b, bi_preload(b, BI_PRELOAD_BLEND_LINK),
|
||||||
|
bi_preload(b, BI_PRELOAD_BLEND_LINK), BI_CMPF_NE);
|
||||||
else
|
else
|
||||||
bi_jump(b, bi_preload(b, 48));
|
bi_jump(b, bi_preload(b, BI_PRELOAD_BLEND_LINK));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_intrinsic_load_ubo:
|
case nir_intrinsic_load_ubo:
|
||||||
|
|
@ -2008,7 +2023,7 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
|
||||||
|
|
||||||
case nir_intrinsic_load_pixel_coord:
|
case nir_intrinsic_load_pixel_coord:
|
||||||
/* Vectorized load of the preloaded i16vec2 */
|
/* Vectorized load of the preloaded i16vec2 */
|
||||||
bi_mov_i32_to(b, dst, bi_preload(b, 59));
|
bi_mov_i32_to(b, dst, bi_preload(b, BI_PRELOAD_POSITION_XY));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_intrinsic_load_texel_buf_conv_pan:
|
case nir_intrinsic_load_texel_buf_conv_pan:
|
||||||
|
|
@ -2033,7 +2048,7 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_intrinsic_load_idvs_output_buf_index_pan:
|
case nir_intrinsic_load_idvs_output_buf_index_pan:
|
||||||
bi_mov_i32_to(b, dst, bi_preload(b, 59));
|
bi_mov_i32_to(b, dst, bi_preload(b, BI_PRELOAD_INTERNAL_ID));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_intrinsic_lea_attr_pan:
|
case nir_intrinsic_lea_attr_pan:
|
||||||
|
|
@ -2067,8 +2082,9 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_intrinsic_load_sample_mask_in:
|
case nir_intrinsic_load_sample_mask_in:
|
||||||
/* r61[0:15] contains the coverage bitmap */
|
/* [0:15] contains the coverage bitmap */
|
||||||
bi_u16_to_u32_to(b, dst, bi_half(bi_preload(b, 61), false));
|
bi_u16_to_u32_to(
|
||||||
|
b, dst, bi_half(bi_preload(b, BI_PRELOAD_RASTERIZER_COVERAGE), false));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_intrinsic_load_sample_mask:
|
case nir_intrinsic_load_sample_mask:
|
||||||
|
|
@ -2080,12 +2096,12 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_intrinsic_load_primitive_id:
|
case nir_intrinsic_load_primitive_id:
|
||||||
bi_mov_i32_to(b, dst, bi_preload(b, 57));
|
bi_mov_i32_to(b, dst, bi_preload(b, BI_PRELOAD_PRIMITIVE_ID));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_intrinsic_load_front_face: {
|
case nir_intrinsic_load_front_face: {
|
||||||
/* (r58 & 1) == 0 means primitive is front facing */
|
/* (primitive_flags & 1) == 0 means primitive is front facing */
|
||||||
bi_index primitive_facing = bi_preload(b, 58);
|
bi_index primitive_facing = bi_preload(b, BI_PRELOAD_PRIMITIVE_FLAGS);
|
||||||
|
|
||||||
/* Starting with v11, there is more fields defined in the primitive flags */
|
/* Starting with v11, there is more fields defined in the primitive flags */
|
||||||
if (b->shader->arch >= 11)
|
if (b->shader->arch >= 11)
|
||||||
|
|
@ -2150,20 +2166,23 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
|
||||||
}
|
}
|
||||||
|
|
||||||
case nir_intrinsic_load_local_invocation_id:
|
case nir_intrinsic_load_local_invocation_id:
|
||||||
bi_collect_v3i32_to(b, dst,
|
bi_collect_v3i32_to(
|
||||||
bi_u16_to_u32(b, bi_half(bi_preload(b, 55), 0)),
|
b, dst,
|
||||||
bi_u16_to_u32(b, bi_half(bi_preload(b, 55), 1)),
|
bi_u16_to_u32(b, bi_half(bi_preload(b, BI_PRELOAD_LOCAL_ID_0), 0)),
|
||||||
bi_u16_to_u32(b, bi_half(bi_preload(b, 56), 0)));
|
bi_u16_to_u32(b, bi_half(bi_preload(b, BI_PRELOAD_LOCAL_ID_1), 1)),
|
||||||
|
bi_u16_to_u32(b, bi_half(bi_preload(b, BI_PRELOAD_LOCAL_ID_2), 0)));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_intrinsic_load_workgroup_id:
|
case nir_intrinsic_load_workgroup_id:
|
||||||
bi_collect_v3i32_to(b, dst, bi_preload(b, 57), bi_preload(b, 58),
|
bi_collect_v3i32_to(b, dst, bi_preload(b, BI_PRELOAD_WORKGROUP_ID_0),
|
||||||
bi_preload(b, 59));
|
bi_preload(b, BI_PRELOAD_WORKGROUP_ID_1),
|
||||||
|
bi_preload(b, BI_PRELOAD_WORKGROUP_ID_2));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_intrinsic_load_global_invocation_id:
|
case nir_intrinsic_load_global_invocation_id:
|
||||||
bi_collect_v3i32_to(b, dst, bi_preload(b, 60), bi_preload(b, 61),
|
bi_collect_v3i32_to(b, dst, bi_preload(b, BI_PRELOAD_GLOBAL_ID_0),
|
||||||
bi_preload(b, 62));
|
bi_preload(b, BI_PRELOAD_GLOBAL_ID_1),
|
||||||
|
bi_preload(b, BI_PRELOAD_GLOBAL_ID_2));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_intrinsic_shader_clock:
|
case nir_intrinsic_shader_clock:
|
||||||
|
|
@ -2190,7 +2209,9 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
|
||||||
case nir_intrinsic_load_view_index:
|
case nir_intrinsic_load_view_index:
|
||||||
case nir_intrinsic_load_layer_id:
|
case nir_intrinsic_load_layer_id:
|
||||||
assert(b->shader->arch >= 9);
|
assert(b->shader->arch >= 9);
|
||||||
bi_mov_i32_to(b, dst, bi_u8_to_u32(b, bi_byte(bi_preload(b, 62), 0)));
|
bi_mov_i32_to(
|
||||||
|
b, dst,
|
||||||
|
bi_u8_to_u32(b, bi_byte(bi_preload(b, BI_PRELOAD_FRAME_ARG), 0)));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_intrinsic_load_ssbo_address:
|
case nir_intrinsic_load_ssbo_address:
|
||||||
|
|
|
||||||
|
|
@ -1116,6 +1116,137 @@ enum bi_idvs_mode {
|
||||||
|
|
||||||
#define BI_MAX_REGS 64
|
#define BI_MAX_REGS 64
|
||||||
|
|
||||||
|
enum bi_preload {
|
||||||
|
/* Compute */
|
||||||
|
BI_PRELOAD_LOCAL_ID_0,
|
||||||
|
BI_PRELOAD_LOCAL_ID_1,
|
||||||
|
BI_PRELOAD_LOCAL_ID_2,
|
||||||
|
BI_PRELOAD_WORKGROUP_ID_0,
|
||||||
|
BI_PRELOAD_WORKGROUP_ID_1,
|
||||||
|
BI_PRELOAD_WORKGROUP_ID_2,
|
||||||
|
BI_PRELOAD_GLOBAL_ID_0,
|
||||||
|
BI_PRELOAD_GLOBAL_ID_1,
|
||||||
|
BI_PRELOAD_GLOBAL_ID_2,
|
||||||
|
/* Vertex */
|
||||||
|
BI_PRELOAD_POS_RESULT_PTR_LO,
|
||||||
|
BI_PRELOAD_POS_RESULT_PTR_HI,
|
||||||
|
BI_PRELOAD_INTERNAL_ID,
|
||||||
|
BI_PRELOAD_VERTEX_ID,
|
||||||
|
BI_PRELOAD_INSTANCE_ID,
|
||||||
|
BI_PRELOAD_DRAW_ID,
|
||||||
|
BI_PRELOAD_VIEW_ID,
|
||||||
|
/* Fragment */
|
||||||
|
BI_PRELOAD_PRIMITIVE_ID,
|
||||||
|
BI_PRELOAD_PRIMITIVE_FLAGS,
|
||||||
|
BI_PRELOAD_POSITION_XY,
|
||||||
|
BI_PRELOAD_CUMULATIVE_COVERAGE,
|
||||||
|
BI_PRELOAD_RASTERIZER_COVERAGE,
|
||||||
|
BI_PRELOAD_SAMPLE_ID,
|
||||||
|
BI_PRELOAD_CENTROID_ID,
|
||||||
|
BI_PRELOAD_FRAME_ARG,
|
||||||
|
/* Blend */
|
||||||
|
BI_PRELOAD_BLEND_SRC0_C0,
|
||||||
|
BI_PRELOAD_BLEND_SRC0_C1,
|
||||||
|
BI_PRELOAD_BLEND_SRC0_C2,
|
||||||
|
BI_PRELOAD_BLEND_SRC0_C3,
|
||||||
|
BI_PRELOAD_BLEND_SRC1_C0,
|
||||||
|
BI_PRELOAD_BLEND_SRC1_C1,
|
||||||
|
BI_PRELOAD_BLEND_SRC1_C2,
|
||||||
|
BI_PRELOAD_BLEND_SRC1_C3,
|
||||||
|
BI_PRELOAD_BLEND_LINK,
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline unsigned
|
||||||
|
bi_preload_reg(enum bi_preload val, unsigned arch)
|
||||||
|
{
|
||||||
|
switch (val) {
|
||||||
|
/* Compute */
|
||||||
|
case BI_PRELOAD_LOCAL_ID_0:
|
||||||
|
/* Bits [15;0] */
|
||||||
|
return 55;
|
||||||
|
case BI_PRELOAD_LOCAL_ID_1:
|
||||||
|
/* Bits [31;16] */
|
||||||
|
return 55;
|
||||||
|
case BI_PRELOAD_LOCAL_ID_2:
|
||||||
|
/* Bits [15;0] */
|
||||||
|
return 56;
|
||||||
|
case BI_PRELOAD_WORKGROUP_ID_0:
|
||||||
|
return 57;
|
||||||
|
case BI_PRELOAD_WORKGROUP_ID_1:
|
||||||
|
return 58;
|
||||||
|
case BI_PRELOAD_WORKGROUP_ID_2:
|
||||||
|
return 59;
|
||||||
|
case BI_PRELOAD_GLOBAL_ID_0:
|
||||||
|
return 60;
|
||||||
|
case BI_PRELOAD_GLOBAL_ID_1:
|
||||||
|
return 61;
|
||||||
|
case BI_PRELOAD_GLOBAL_ID_2:
|
||||||
|
return 62;
|
||||||
|
/* Vertex */
|
||||||
|
case BI_PRELOAD_POS_RESULT_PTR_LO:
|
||||||
|
assert(arch < 9);
|
||||||
|
return 58;
|
||||||
|
case BI_PRELOAD_POS_RESULT_PTR_HI:
|
||||||
|
assert(arch < 9);
|
||||||
|
return 59;
|
||||||
|
case BI_PRELOAD_INTERNAL_ID:
|
||||||
|
assert(arch >= 9);
|
||||||
|
return 59;
|
||||||
|
case BI_PRELOAD_VERTEX_ID:
|
||||||
|
return (arch >= 9) ? 60 : 61;
|
||||||
|
case BI_PRELOAD_INSTANCE_ID:
|
||||||
|
return (arch >= 9) ? 61 : 62;
|
||||||
|
case BI_PRELOAD_DRAW_ID:
|
||||||
|
assert(arch >= 9);
|
||||||
|
return 62;
|
||||||
|
case BI_PRELOAD_VIEW_ID:
|
||||||
|
assert(arch >= 9);
|
||||||
|
return 63;
|
||||||
|
/* Fragment */
|
||||||
|
case BI_PRELOAD_PRIMITIVE_ID:
|
||||||
|
return 57;
|
||||||
|
case BI_PRELOAD_PRIMITIVE_FLAGS:
|
||||||
|
return 58;
|
||||||
|
case BI_PRELOAD_POSITION_XY:
|
||||||
|
return 59;
|
||||||
|
case BI_PRELOAD_CUMULATIVE_COVERAGE:
|
||||||
|
/* Bits [15;0] */
|
||||||
|
return 60;
|
||||||
|
case BI_PRELOAD_RASTERIZER_COVERAGE:
|
||||||
|
/* Bits [15;0] */
|
||||||
|
return 61;
|
||||||
|
case BI_PRELOAD_SAMPLE_ID:
|
||||||
|
/* Bits [23;16] */
|
||||||
|
return 61;
|
||||||
|
case BI_PRELOAD_CENTROID_ID:
|
||||||
|
/* Bits [31;24] */
|
||||||
|
return 61;
|
||||||
|
case BI_PRELOAD_FRAME_ARG:
|
||||||
|
/* Double reg */
|
||||||
|
return 62;
|
||||||
|
/* Blend */
|
||||||
|
case BI_PRELOAD_BLEND_SRC0_C0:
|
||||||
|
return 0;
|
||||||
|
case BI_PRELOAD_BLEND_SRC0_C1:
|
||||||
|
return 1;
|
||||||
|
case BI_PRELOAD_BLEND_SRC0_C2:
|
||||||
|
return 2;
|
||||||
|
case BI_PRELOAD_BLEND_SRC0_C3:
|
||||||
|
return 3;
|
||||||
|
case BI_PRELOAD_BLEND_SRC1_C0:
|
||||||
|
return 4;
|
||||||
|
case BI_PRELOAD_BLEND_SRC1_C1:
|
||||||
|
return 5;
|
||||||
|
case BI_PRELOAD_BLEND_SRC1_C2:
|
||||||
|
return 6;
|
||||||
|
case BI_PRELOAD_BLEND_SRC1_C3:
|
||||||
|
return 7;
|
||||||
|
case BI_PRELOAD_BLEND_LINK:
|
||||||
|
return 48;
|
||||||
|
}
|
||||||
|
UNREACHABLE("Non-handled BI_PRELOAD");
|
||||||
|
}
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
const struct pan_compile_inputs *inputs;
|
const struct pan_compile_inputs *inputs;
|
||||||
nir_shader *nir;
|
nir_shader *nir;
|
||||||
|
|
|
||||||
|
|
@ -1155,8 +1155,10 @@ va_lower_blend(bi_context *ctx)
|
||||||
|
|
||||||
unsigned prolog_length = 2 * 8;
|
unsigned prolog_length = 2 * 8;
|
||||||
|
|
||||||
/* By ABI, r48 is the link register shared with blend shaders */
|
/* By ABI, the preload blend link register is shared with blend
|
||||||
assert(bi_is_equiv(I->dest[0], bi_register(48)));
|
* shaders */
|
||||||
|
assert(bi_is_equiv(I->dest[0], bi_register(bi_preload_reg(
|
||||||
|
BI_PRELOAD_BLEND_LINK, ctx->arch))));
|
||||||
|
|
||||||
if (I->flow == VA_FLOW_END)
|
if (I->flow == VA_FLOW_END)
|
||||||
bi_iadd_imm_i32_to(&b, I->dest[0], va_zero_lut(), 0);
|
bi_iadd_imm_i32_to(&b, I->dest[0], va_zero_lut(), 0);
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue