mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 04:58:05 +02:00
pan: Centralize preload registers
Rather than having preload registers hardcoded over multiple files, gather them in one place with an enum abstraction. This should simplify updates to the preload registers. Reviewed-by: Eric R. Smith <eric.smith@collabora.com> Reviewed-by: Lorenzo Rossi <lorenzo.rossi@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40643>
This commit is contained in:
parent
1e052f0bb5
commit
1f0370616a
4 changed files with 228 additions and 61 deletions
|
|
@ -321,8 +321,9 @@ bi_make_affinity(uint64_t clobber, unsigned count, bool split_file)
|
|||
static void
|
||||
bi_mark_interference(bi_block *block, struct lcra_state *l, uint8_t *live,
|
||||
uint64_t preload_live, unsigned node_count, bool is_blend,
|
||||
bool split_file, bool aligned_sr)
|
||||
bool split_file, unsigned arch)
|
||||
{
|
||||
bool aligned_sr = arch >= 9;
|
||||
bi_foreach_instr_in_block_rev(block, ins) {
|
||||
/* Mark all registers live after the instruction as
|
||||
* interfering with the destination */
|
||||
|
|
@ -383,8 +384,10 @@ bi_mark_interference(bi_block *block, struct lcra_state *l, uint8_t *live,
|
|||
}
|
||||
|
||||
if (!is_blend && ins->op == BI_OPCODE_BLEND) {
|
||||
/* Blend shaders might clobber r0-r15, r48. */
|
||||
uint64_t clobber = BITFIELD64_MASK(16) | BITFIELD64_BIT(48);
|
||||
/* Blend shaders might clobber r0-r15, blend link reg. */
|
||||
uint64_t clobber =
|
||||
BITFIELD64_MASK(16) |
|
||||
BITFIELD64_BIT(bi_preload_reg(BI_PRELOAD_BLEND_LINK, arch));
|
||||
|
||||
for (unsigned i = 0; i < node_count; ++i) {
|
||||
if (live[i])
|
||||
|
|
@ -410,7 +413,7 @@ bi_compute_interference(bi_context *ctx, struct lcra_state *l, bool full_regs)
|
|||
uint8_t *live = mem_dup(blk->live_out, ctx->ssa_alloc);
|
||||
|
||||
bi_mark_interference(blk, l, live, blk->reg_live_out, ctx->ssa_alloc,
|
||||
ctx->inputs->is_blend, !full_regs, ctx->arch >= 9);
|
||||
ctx->inputs->is_blend, !full_regs, ctx->arch);
|
||||
|
||||
free(live);
|
||||
}
|
||||
|
|
@ -438,36 +441,43 @@ bi_allocate_registers(bi_context *ctx, bool *success, bool full_regs)
|
|||
bi_foreach_dest(ins, d)
|
||||
l->affinity[ins->dest[d].value] = default_affinity;
|
||||
|
||||
/* Blend shaders expect the src colour to be in r0-r3 */
|
||||
/* Blend shaders expect the src colour to be in blend_src0_c0
|
||||
* through c3 */
|
||||
if (ins->op == BI_OPCODE_BLEND && !ctx->inputs->is_blend) {
|
||||
assert(bi_is_ssa(ins->src[0]));
|
||||
l->solutions[ins->src[0].value] = 0;
|
||||
l->solutions[ins->src[0].value] =
|
||||
bi_preload_reg(BI_PRELOAD_BLEND_SRC0_C0, ctx->arch);
|
||||
|
||||
/* Dual source blend input in r4-r7 */
|
||||
/* Dual source blend input in blend_src1_c0 through c3 */
|
||||
if (bi_is_ssa(ins->src[4]))
|
||||
l->solutions[ins->src[4].value] = 4;
|
||||
l->solutions[ins->src[4].value] =
|
||||
bi_preload_reg(BI_PRELOAD_BLEND_SRC1_C0, ctx->arch);
|
||||
|
||||
/* Writes to R48 */
|
||||
/* Writes to blend link */
|
||||
if (!bi_is_null(ins->dest[0]))
|
||||
l->solutions[ins->dest[0].value] = 48;
|
||||
l->solutions[ins->dest[0].value] =
|
||||
bi_preload_reg(BI_PRELOAD_BLEND_LINK, ctx->arch);
|
||||
}
|
||||
|
||||
/* Coverage mask writes stay in R60 */
|
||||
/* Coverage mask writes stay in the cumulative coverage reg */
|
||||
if ((ins->op == BI_OPCODE_ATEST || ins->op == BI_OPCODE_ZS_EMIT) &&
|
||||
!bi_is_null(ins->dest[0])) {
|
||||
l->solutions[ins->dest[0].value] = 60;
|
||||
l->solutions[ins->dest[0].value] =
|
||||
bi_preload_reg(BI_PRELOAD_CUMULATIVE_COVERAGE, ctx->arch);
|
||||
}
|
||||
|
||||
/* Experimentally, it seems coverage masks inputs to ATEST must
|
||||
* be in R60. Otherwise coverage mask writes do not work with
|
||||
* early-ZS with pixel-frequency-shading (this combination of
|
||||
* settings is legal if depth/stencil writes are disabled).
|
||||
* Allowing a FAU index also seems to work on Valhall, at least.
|
||||
* be in the cumulative coverage reg. Otherwise coverage mask
|
||||
* writes do not work with early-ZS with pixel-frequency-shading
|
||||
* (this combination of settings is legal if depth/stencil
|
||||
* writes are disabled). Allowing a FAU index also seems to
|
||||
* work on Valhall, at least.
|
||||
*/
|
||||
if (ins->op == BI_OPCODE_ATEST) {
|
||||
assert(bi_is_ssa(ins->src[0]) || ins->src[0].type == BI_INDEX_FAU);
|
||||
if (bi_is_ssa(ins->src[0]))
|
||||
l->solutions[ins->src[0].value] = 60;
|
||||
l->solutions[ins->src[0].value] =
|
||||
bi_preload_reg(BI_PRELOAD_CUMULATIVE_COVERAGE, ctx->arch);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -492,8 +502,10 @@ bi_allocate_registers(bi_context *ctx, bool *success, bool full_regs)
|
|||
|
||||
if (ctx->inputs->is_blend) {
|
||||
/* We're allowed to coalesce the moves to these */
|
||||
affinity |= BITFIELD64_BIT(48);
|
||||
affinity |= BITFIELD64_BIT(60);
|
||||
affinity |=
|
||||
BITFIELD64_BIT(bi_preload_reg(BI_PRELOAD_BLEND_LINK, ctx->arch));
|
||||
affinity |= BITFIELD64_BIT(
|
||||
bi_preload_reg(BI_PRELOAD_CUMULATIVE_COVERAGE, ctx->arch));
|
||||
}
|
||||
|
||||
/* Try to coalesce */
|
||||
|
|
@ -595,14 +607,15 @@ bi_choose_spill_node(bi_context *ctx, struct lcra_state *l)
|
|||
bi_foreach_instr_global(ctx, ins) {
|
||||
bi_foreach_dest(ins, d) {
|
||||
/* Don't allow spilling coverage mask writes because the
|
||||
* register preload logic assumes it will stay in R60.
|
||||
* This could be optimized.
|
||||
* register preload logic assumes it will stay in the
|
||||
* cumulative coverage reg. This could be optimized.
|
||||
*/
|
||||
if (ins->no_spill || ins->op == BI_OPCODE_ATEST ||
|
||||
ins->op == BI_OPCODE_ZS_EMIT ||
|
||||
(ins->op == BI_OPCODE_MOV_I32 &&
|
||||
ins->src[0].type == BI_INDEX_REGISTER &&
|
||||
ins->src[0].value == 60)) {
|
||||
ins->src[0].value ==
|
||||
bi_preload_reg(BI_PRELOAD_CUMULATIVE_COVERAGE, ctx->arch))) {
|
||||
BITSET_SET(no_spill, ins->dest[d].value);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,8 +32,9 @@ static void pan_stats_verbose(FILE *f, const char *prefix, bi_context *ctx,
|
|||
static bi_block *emit_cf_list(bi_context *ctx, struct exec_list *list);
|
||||
|
||||
static bi_index
|
||||
bi_preload(bi_builder *b, unsigned reg)
|
||||
bi_preload(bi_builder *b, enum bi_preload val)
|
||||
{
|
||||
unsigned reg = bi_preload_reg(val, b->shader->arch);
|
||||
if (bi_is_null(b->shader->preloaded[reg])) {
|
||||
/* Insert at the beginning of the shader */
|
||||
bi_builder b_ = *b;
|
||||
|
|
@ -50,7 +51,7 @@ static bi_index
|
|||
bi_coverage(bi_builder *b)
|
||||
{
|
||||
if (bi_is_null(b->shader->coverage))
|
||||
b->shader->coverage = bi_preload(b, 60);
|
||||
b->shader->coverage = bi_preload(b, BI_PRELOAD_CUMULATIVE_COVERAGE);
|
||||
|
||||
return b->shader->coverage;
|
||||
}
|
||||
|
|
@ -63,20 +64,20 @@ bi_coverage(bi_builder *b)
|
|||
static inline bi_index
|
||||
bi_vertex_id(bi_builder *b)
|
||||
{
|
||||
return bi_preload(b, (b->shader->arch >= 9) ? 60 : 61);
|
||||
return bi_preload(b, BI_PRELOAD_VERTEX_ID);
|
||||
}
|
||||
|
||||
static inline bi_index
|
||||
bi_instance_id(bi_builder *b)
|
||||
{
|
||||
return bi_preload(b, (b->shader->arch >= 9) ? 61 : 62);
|
||||
return bi_preload(b, BI_PRELOAD_INSTANCE_ID);
|
||||
}
|
||||
|
||||
static inline bi_index
|
||||
bi_draw_id(bi_builder *b)
|
||||
{
|
||||
assert(b->shader->arch >= 9);
|
||||
return bi_preload(b, 62);
|
||||
return bi_preload(b, BI_PRELOAD_DRAW_ID);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -258,8 +259,9 @@ bi_varying_src0_for_barycentric(bi_builder *b, nir_intrinsic_instr *intr)
|
|||
{
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_barycentric_centroid:
|
||||
return bi_preload(b, BI_PRELOAD_CENTROID_ID);
|
||||
case nir_intrinsic_load_barycentric_sample:
|
||||
return bi_preload(b, 61);
|
||||
return bi_preload(b, BI_PRELOAD_SAMPLE_ID);
|
||||
|
||||
/* Need to put the sample ID in the top 16-bits */
|
||||
case nir_intrinsic_load_barycentric_at_sample:
|
||||
|
|
@ -328,7 +330,8 @@ bi_varying_src0_for_barycentric(bi_builder *b, nir_intrinsic_instr *intr)
|
|||
|
||||
case nir_intrinsic_load_barycentric_pixel:
|
||||
default:
|
||||
return b->shader->arch >= 9 ? bi_preload(b, 61) : bi_dontcare(b);
|
||||
return b->shader->arch >= 9 ? bi_preload(b, BI_PRELOAD_CENTROID_ID)
|
||||
: bi_dontcare(b);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -550,7 +553,8 @@ bi_emit_lea_attr(bi_builder *b, nir_intrinsic_instr *intr)
|
|||
unsigned snap4 = 0x5E;
|
||||
uint32_t format = identity | (snap4 << 12) | (regfmt << 24);
|
||||
bi_collect_v3i32_to(b, bi_def_index(&intr->def),
|
||||
bi_preload(b, 58), bi_preload(b, 59),
|
||||
bi_preload(b, BI_PRELOAD_POS_RESULT_PTR_LO),
|
||||
bi_preload(b, BI_PRELOAD_POS_RESULT_PTR_HI),
|
||||
bi_imm_u32(format));
|
||||
return;
|
||||
}
|
||||
|
|
@ -838,8 +842,8 @@ bi_load_sample_id_to(bi_builder *b, bi_index dst)
|
|||
* seem to read garbage (despite being architecturally defined
|
||||
* as zero), so use a 5-bit mask instead of 8-bits */
|
||||
|
||||
bi_rshift_and_i32_to(b, dst, bi_preload(b, 61), bi_imm_u32(0x1f),
|
||||
bi_imm_u8(16), false);
|
||||
bi_rshift_and_i32_to(b, dst, bi_preload(b, BI_PRELOAD_SAMPLE_ID),
|
||||
bi_imm_u32(0x1f), bi_imm_u8(16), false);
|
||||
}
|
||||
|
||||
static bi_index
|
||||
|
|
@ -872,12 +876,24 @@ static void
|
|||
bi_emit_load_blend_input(bi_builder *b, nir_intrinsic_instr *instr)
|
||||
{
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
|
||||
unsigned base = sem.dual_source_blend_index * 4;
|
||||
unsigned size = nir_alu_type_get_type_size(nir_intrinsic_dest_type(instr));
|
||||
assert(size == 16 || size == 32);
|
||||
|
||||
bi_index srcs[] = {bi_preload(b, base + 0), bi_preload(b, base + 1),
|
||||
bi_preload(b, base + 2), bi_preload(b, base + 3)};
|
||||
bi_index srcs[4];
|
||||
switch (sem.dual_source_blend_index) {
|
||||
case 0:
|
||||
srcs[0] = bi_preload(b, BI_PRELOAD_BLEND_SRC0_C0);
|
||||
srcs[1] = bi_preload(b, BI_PRELOAD_BLEND_SRC0_C1);
|
||||
srcs[2] = bi_preload(b, BI_PRELOAD_BLEND_SRC0_C2);
|
||||
srcs[3] = bi_preload(b, BI_PRELOAD_BLEND_SRC0_C3);
|
||||
break;
|
||||
case 1:
|
||||
srcs[0] = bi_preload(b, BI_PRELOAD_BLEND_SRC1_C0);
|
||||
srcs[1] = bi_preload(b, BI_PRELOAD_BLEND_SRC1_C1);
|
||||
srcs[2] = bi_preload(b, BI_PRELOAD_BLEND_SRC1_C2);
|
||||
srcs[3] = bi_preload(b, BI_PRELOAD_BLEND_SRC1_C3);
|
||||
break;
|
||||
}
|
||||
|
||||
bi_emit_collect_to(b, bi_def_index(&instr->def), srcs, size == 32 ? 4 : 2);
|
||||
}
|
||||
|
|
@ -1759,7 +1775,7 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
|
|||
break;
|
||||
|
||||
case nir_intrinsic_load_cumulative_coverage_pan:
|
||||
bi_mov_i32_to(b, dst, bi_preload(b, 60));
|
||||
bi_mov_i32_to(b, dst, bi_preload(b, BI_PRELOAD_CUMULATIVE_COVERAGE));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_blend_descriptor_pan: {
|
||||
|
|
@ -1841,16 +1857,15 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
|
|||
}
|
||||
|
||||
case nir_intrinsic_blend_return_pan:
|
||||
/* Jump back to the fragment shader, return address is stored
|
||||
* in r48 (see above). On Valhall, only jump if the address is
|
||||
* nonzero. The check is free there and it implements the "jump
|
||||
* to 0 terminates the blend shader" that's automatic on
|
||||
* Bifrost.
|
||||
/* Jump back to the fragment shader. On Valhall, only jump if the address
|
||||
* is nonzero. The check is free there and it implements the "jump to 0
|
||||
* terminates the blend shader" that's automatic on Bifrost.
|
||||
*/
|
||||
if (b->shader->arch >= 9)
|
||||
bi_branchzi(b, bi_preload(b, 48), bi_preload(b, 48), BI_CMPF_NE);
|
||||
bi_branchzi(b, bi_preload(b, BI_PRELOAD_BLEND_LINK),
|
||||
bi_preload(b, BI_PRELOAD_BLEND_LINK), BI_CMPF_NE);
|
||||
else
|
||||
bi_jump(b, bi_preload(b, 48));
|
||||
bi_jump(b, bi_preload(b, BI_PRELOAD_BLEND_LINK));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ubo:
|
||||
|
|
@ -2008,7 +2023,7 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
|
|||
|
||||
case nir_intrinsic_load_pixel_coord:
|
||||
/* Vectorized load of the preloaded i16vec2 */
|
||||
bi_mov_i32_to(b, dst, bi_preload(b, 59));
|
||||
bi_mov_i32_to(b, dst, bi_preload(b, BI_PRELOAD_POSITION_XY));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_texel_buf_conv_pan:
|
||||
|
|
@ -2033,7 +2048,7 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
|
|||
break;
|
||||
|
||||
case nir_intrinsic_load_idvs_output_buf_index_pan:
|
||||
bi_mov_i32_to(b, dst, bi_preload(b, 59));
|
||||
bi_mov_i32_to(b, dst, bi_preload(b, BI_PRELOAD_INTERNAL_ID));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_lea_attr_pan:
|
||||
|
|
@ -2067,8 +2082,9 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
|
|||
break;
|
||||
|
||||
case nir_intrinsic_load_sample_mask_in:
|
||||
/* r61[0:15] contains the coverage bitmap */
|
||||
bi_u16_to_u32_to(b, dst, bi_half(bi_preload(b, 61), false));
|
||||
/* [0:15] contains the coverage bitmap */
|
||||
bi_u16_to_u32_to(
|
||||
b, dst, bi_half(bi_preload(b, BI_PRELOAD_RASTERIZER_COVERAGE), false));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_sample_mask:
|
||||
|
|
@ -2080,12 +2096,12 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
|
|||
break;
|
||||
|
||||
case nir_intrinsic_load_primitive_id:
|
||||
bi_mov_i32_to(b, dst, bi_preload(b, 57));
|
||||
bi_mov_i32_to(b, dst, bi_preload(b, BI_PRELOAD_PRIMITIVE_ID));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_front_face: {
|
||||
/* (r58 & 1) == 0 means primitive is front facing */
|
||||
bi_index primitive_facing = bi_preload(b, 58);
|
||||
/* (primitive_flags & 1) == 0 means primitive is front facing */
|
||||
bi_index primitive_facing = bi_preload(b, BI_PRELOAD_PRIMITIVE_FLAGS);
|
||||
|
||||
/* Starting with v11, there is more fields defined in the primitive flags */
|
||||
if (b->shader->arch >= 11)
|
||||
|
|
@ -2150,20 +2166,23 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
|
|||
}
|
||||
|
||||
case nir_intrinsic_load_local_invocation_id:
|
||||
bi_collect_v3i32_to(b, dst,
|
||||
bi_u16_to_u32(b, bi_half(bi_preload(b, 55), 0)),
|
||||
bi_u16_to_u32(b, bi_half(bi_preload(b, 55), 1)),
|
||||
bi_u16_to_u32(b, bi_half(bi_preload(b, 56), 0)));
|
||||
bi_collect_v3i32_to(
|
||||
b, dst,
|
||||
bi_u16_to_u32(b, bi_half(bi_preload(b, BI_PRELOAD_LOCAL_ID_0), 0)),
|
||||
bi_u16_to_u32(b, bi_half(bi_preload(b, BI_PRELOAD_LOCAL_ID_1), 1)),
|
||||
bi_u16_to_u32(b, bi_half(bi_preload(b, BI_PRELOAD_LOCAL_ID_2), 0)));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_workgroup_id:
|
||||
bi_collect_v3i32_to(b, dst, bi_preload(b, 57), bi_preload(b, 58),
|
||||
bi_preload(b, 59));
|
||||
bi_collect_v3i32_to(b, dst, bi_preload(b, BI_PRELOAD_WORKGROUP_ID_0),
|
||||
bi_preload(b, BI_PRELOAD_WORKGROUP_ID_1),
|
||||
bi_preload(b, BI_PRELOAD_WORKGROUP_ID_2));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_global_invocation_id:
|
||||
bi_collect_v3i32_to(b, dst, bi_preload(b, 60), bi_preload(b, 61),
|
||||
bi_preload(b, 62));
|
||||
bi_collect_v3i32_to(b, dst, bi_preload(b, BI_PRELOAD_GLOBAL_ID_0),
|
||||
bi_preload(b, BI_PRELOAD_GLOBAL_ID_1),
|
||||
bi_preload(b, BI_PRELOAD_GLOBAL_ID_2));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_shader_clock:
|
||||
|
|
@ -2190,7 +2209,9 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
|
|||
case nir_intrinsic_load_view_index:
|
||||
case nir_intrinsic_load_layer_id:
|
||||
assert(b->shader->arch >= 9);
|
||||
bi_mov_i32_to(b, dst, bi_u8_to_u32(b, bi_byte(bi_preload(b, 62), 0)));
|
||||
bi_mov_i32_to(
|
||||
b, dst,
|
||||
bi_u8_to_u32(b, bi_byte(bi_preload(b, BI_PRELOAD_FRAME_ARG), 0)));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ssbo_address:
|
||||
|
|
|
|||
|
|
@ -1116,6 +1116,137 @@ enum bi_idvs_mode {
|
|||
|
||||
#define BI_MAX_REGS 64
|
||||
|
||||
enum bi_preload {
|
||||
/* Compute */
|
||||
BI_PRELOAD_LOCAL_ID_0,
|
||||
BI_PRELOAD_LOCAL_ID_1,
|
||||
BI_PRELOAD_LOCAL_ID_2,
|
||||
BI_PRELOAD_WORKGROUP_ID_0,
|
||||
BI_PRELOAD_WORKGROUP_ID_1,
|
||||
BI_PRELOAD_WORKGROUP_ID_2,
|
||||
BI_PRELOAD_GLOBAL_ID_0,
|
||||
BI_PRELOAD_GLOBAL_ID_1,
|
||||
BI_PRELOAD_GLOBAL_ID_2,
|
||||
/* Vertex */
|
||||
BI_PRELOAD_POS_RESULT_PTR_LO,
|
||||
BI_PRELOAD_POS_RESULT_PTR_HI,
|
||||
BI_PRELOAD_INTERNAL_ID,
|
||||
BI_PRELOAD_VERTEX_ID,
|
||||
BI_PRELOAD_INSTANCE_ID,
|
||||
BI_PRELOAD_DRAW_ID,
|
||||
BI_PRELOAD_VIEW_ID,
|
||||
/* Fragment */
|
||||
BI_PRELOAD_PRIMITIVE_ID,
|
||||
BI_PRELOAD_PRIMITIVE_FLAGS,
|
||||
BI_PRELOAD_POSITION_XY,
|
||||
BI_PRELOAD_CUMULATIVE_COVERAGE,
|
||||
BI_PRELOAD_RASTERIZER_COVERAGE,
|
||||
BI_PRELOAD_SAMPLE_ID,
|
||||
BI_PRELOAD_CENTROID_ID,
|
||||
BI_PRELOAD_FRAME_ARG,
|
||||
/* Blend */
|
||||
BI_PRELOAD_BLEND_SRC0_C0,
|
||||
BI_PRELOAD_BLEND_SRC0_C1,
|
||||
BI_PRELOAD_BLEND_SRC0_C2,
|
||||
BI_PRELOAD_BLEND_SRC0_C3,
|
||||
BI_PRELOAD_BLEND_SRC1_C0,
|
||||
BI_PRELOAD_BLEND_SRC1_C1,
|
||||
BI_PRELOAD_BLEND_SRC1_C2,
|
||||
BI_PRELOAD_BLEND_SRC1_C3,
|
||||
BI_PRELOAD_BLEND_LINK,
|
||||
};
|
||||
|
||||
static inline unsigned
|
||||
bi_preload_reg(enum bi_preload val, unsigned arch)
|
||||
{
|
||||
switch (val) {
|
||||
/* Compute */
|
||||
case BI_PRELOAD_LOCAL_ID_0:
|
||||
/* Bits [15;0] */
|
||||
return 55;
|
||||
case BI_PRELOAD_LOCAL_ID_1:
|
||||
/* Bits [31;16] */
|
||||
return 55;
|
||||
case BI_PRELOAD_LOCAL_ID_2:
|
||||
/* Bits [15;0] */
|
||||
return 56;
|
||||
case BI_PRELOAD_WORKGROUP_ID_0:
|
||||
return 57;
|
||||
case BI_PRELOAD_WORKGROUP_ID_1:
|
||||
return 58;
|
||||
case BI_PRELOAD_WORKGROUP_ID_2:
|
||||
return 59;
|
||||
case BI_PRELOAD_GLOBAL_ID_0:
|
||||
return 60;
|
||||
case BI_PRELOAD_GLOBAL_ID_1:
|
||||
return 61;
|
||||
case BI_PRELOAD_GLOBAL_ID_2:
|
||||
return 62;
|
||||
/* Vertex */
|
||||
case BI_PRELOAD_POS_RESULT_PTR_LO:
|
||||
assert(arch < 9);
|
||||
return 58;
|
||||
case BI_PRELOAD_POS_RESULT_PTR_HI:
|
||||
assert(arch < 9);
|
||||
return 59;
|
||||
case BI_PRELOAD_INTERNAL_ID:
|
||||
assert(arch >= 9);
|
||||
return 59;
|
||||
case BI_PRELOAD_VERTEX_ID:
|
||||
return (arch >= 9) ? 60 : 61;
|
||||
case BI_PRELOAD_INSTANCE_ID:
|
||||
return (arch >= 9) ? 61 : 62;
|
||||
case BI_PRELOAD_DRAW_ID:
|
||||
assert(arch >= 9);
|
||||
return 62;
|
||||
case BI_PRELOAD_VIEW_ID:
|
||||
assert(arch >= 9);
|
||||
return 63;
|
||||
/* Fragment */
|
||||
case BI_PRELOAD_PRIMITIVE_ID:
|
||||
return 57;
|
||||
case BI_PRELOAD_PRIMITIVE_FLAGS:
|
||||
return 58;
|
||||
case BI_PRELOAD_POSITION_XY:
|
||||
return 59;
|
||||
case BI_PRELOAD_CUMULATIVE_COVERAGE:
|
||||
/* Bits [15;0] */
|
||||
return 60;
|
||||
case BI_PRELOAD_RASTERIZER_COVERAGE:
|
||||
/* Bits [15;0] */
|
||||
return 61;
|
||||
case BI_PRELOAD_SAMPLE_ID:
|
||||
/* Bits [23;16] */
|
||||
return 61;
|
||||
case BI_PRELOAD_CENTROID_ID:
|
||||
/* Bits [31;24] */
|
||||
return 61;
|
||||
case BI_PRELOAD_FRAME_ARG:
|
||||
/* Double reg */
|
||||
return 62;
|
||||
/* Blend */
|
||||
case BI_PRELOAD_BLEND_SRC0_C0:
|
||||
return 0;
|
||||
case BI_PRELOAD_BLEND_SRC0_C1:
|
||||
return 1;
|
||||
case BI_PRELOAD_BLEND_SRC0_C2:
|
||||
return 2;
|
||||
case BI_PRELOAD_BLEND_SRC0_C3:
|
||||
return 3;
|
||||
case BI_PRELOAD_BLEND_SRC1_C0:
|
||||
return 4;
|
||||
case BI_PRELOAD_BLEND_SRC1_C1:
|
||||
return 5;
|
||||
case BI_PRELOAD_BLEND_SRC1_C2:
|
||||
return 6;
|
||||
case BI_PRELOAD_BLEND_SRC1_C3:
|
||||
return 7;
|
||||
case BI_PRELOAD_BLEND_LINK:
|
||||
return 48;
|
||||
}
|
||||
UNREACHABLE("Non-handled BI_PRELOAD");
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
const struct pan_compile_inputs *inputs;
|
||||
nir_shader *nir;
|
||||
|
|
|
|||
|
|
@ -1155,8 +1155,10 @@ va_lower_blend(bi_context *ctx)
|
|||
|
||||
unsigned prolog_length = 2 * 8;
|
||||
|
||||
/* By ABI, r48 is the link register shared with blend shaders */
|
||||
assert(bi_is_equiv(I->dest[0], bi_register(48)));
|
||||
/* By ABI, the preload blend link register is shared with blend
|
||||
* shaders */
|
||||
assert(bi_is_equiv(I->dest[0], bi_register(bi_preload_reg(
|
||||
BI_PRELOAD_BLEND_LINK, ctx->arch))));
|
||||
|
||||
if (I->flow == VA_FLOW_END)
|
||||
bi_iadd_imm_i32_to(&b, I->dest[0], va_zero_lut(), 0);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue