mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 04:58:05 +02:00
pan/bi: Pack 8-bit vec2s
We used to splat out 8-bit vec2s to 16-bit by repeating both 8-bit halves twice with the B0011 swizzle. I think the original idea here was that 16-bit swizzles were more widely available in the hardware and that this would make swizzling things easier. The problem is that nothing actually knows that the value is half-repeated like this so nothing knows it can upgrade a swizzle from B0022 to B0123 (H01). So instead we get a bunch of B0022 swizzles, which nothing supports. We can shave a lot of instructions if we just stop trying to be so clever and instead repeat the whole thing with a B0101 swizzle. The only real issue here is that v2[fiu]8_to_v2[fiu]16 needs a B0011 swizzle, which we have to apply on-the-fly. Fortunately, any swizzle can be composed with B0011. Reviewed-by: Lorenzo Rossi <lorenzo.rossi@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40720>
This commit is contained in:
parent
db8cb73b34
commit
15d5675e8e
1 changed files with 50 additions and 37 deletions
|
|
@ -2307,35 +2307,49 @@ bi_alu_src_index(bi_builder *b, nir_alu_src src, unsigned comps)
|
|||
} else if (bitsize == 8 && comps == 1) {
|
||||
idx.swizzle = BI_SWIZZLE_B0000 + (src.swizzle[0] & 3);
|
||||
} else if (bitsize == 8) {
|
||||
if (comps == 2 || comps == 4) {
|
||||
/* For a vec2, place the two components in 0 and 2 instead of
|
||||
* 0 and 1. For a scalar, splat it out to all channels.
|
||||
*/
|
||||
unsigned c[4] = {0};
|
||||
for (unsigned i = 0; i < 4; ++i)
|
||||
c[i] = src.swizzle[i * comps / 4] & 3;
|
||||
bool has_swizzle = false;
|
||||
enum bi_swizzle swizzle = BI_SWIZZLE_H01;
|
||||
if (comps == 3) {
|
||||
unsigned c[4];
|
||||
for (unsigned i = 0; i < 3; ++i)
|
||||
c[i] = src.swizzle[i] & 3;
|
||||
|
||||
enum bi_swizzle swizzle;
|
||||
if (bi_swizzle_from_byte_channels(c, &swizzle)) {
|
||||
idx.swizzle = swizzle;
|
||||
return idx;
|
||||
/* Try to find a swizzle that starts with the given v3i8 swizzle */
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
c[3] = i;
|
||||
if (bi_swizzle_from_byte_channels(c, &swizzle)) {
|
||||
has_swizzle = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* For 1 and 2-component, repeat the swizzle to increase the chances
|
||||
* that it's a valid bi_swizzle.
|
||||
*/
|
||||
unsigned c[4];
|
||||
for (unsigned i = 0; i < 4; ++i)
|
||||
c[i] = src.swizzle[i % comps] & 3;
|
||||
has_swizzle = bi_swizzle_from_byte_channels(c, &swizzle);
|
||||
}
|
||||
|
||||
/* XXX: Use optimized swizzle when posisble */
|
||||
bi_index unoffset_srcs[NIR_MAX_VEC_COMPONENTS] = {bi_null()};
|
||||
unsigned channels[NIR_MAX_VEC_COMPONENTS] = {0};
|
||||
|
||||
for (unsigned i = 0; i < comps; ++i) {
|
||||
unoffset_srcs[i] = bi_src_index(&src.src);
|
||||
channels[i] = src.swizzle[i];
|
||||
if (has_swizzle) {
|
||||
idx.swizzle = swizzle;
|
||||
return idx;
|
||||
}
|
||||
|
||||
bi_index temp = bi_temp(b->shader);
|
||||
bi_make_vec_to(b, temp, unoffset_srcs, channels, comps, bitsize);
|
||||
bi_index v4_srcs[4];
|
||||
for (unsigned i = 0; i < comps; i++) {
|
||||
v4_srcs[i] = idx;
|
||||
v4_srcs[i].swizzle = BI_SWIZZLE_B0 + src.swizzle[i];
|
||||
}
|
||||
for (unsigned i = comps; i < 4; i++)
|
||||
v4_srcs[i] = bi_imm_u8(0);
|
||||
|
||||
bi_index temp = bi_mkvec_v4i8(b, v4_srcs[0], v4_srcs[1],
|
||||
v4_srcs[2], v4_srcs[3]);
|
||||
|
||||
static const enum bi_swizzle swizzle_lut[] = {
|
||||
BI_SWIZZLE_B0000, BI_SWIZZLE_B0011, BI_SWIZZLE_B0123, BI_SWIZZLE_B0123
|
||||
BI_SWIZZLE_B0000, BI_SWIZZLE_B0101, BI_SWIZZLE_B0123, BI_SWIZZLE_B0123
|
||||
};
|
||||
assert(comps - 1 < ARRAY_SIZE(swizzle_lut));
|
||||
|
||||
|
|
@ -2348,6 +2362,17 @@ bi_alu_src_index(bi_builder *b, nir_alu_src src, unsigned comps)
|
|||
return idx;
|
||||
}
|
||||
|
||||
static bi_index
|
||||
bi_swiz_b01(bi_index idx)
|
||||
{
|
||||
enum bi_swizzle swizzle;
|
||||
bool valid = bi_try_compose_swizzles(&swizzle, BI_SWIZZLE_B01, idx.swizzle);
|
||||
assert(valid);
|
||||
|
||||
idx.swizzle = swizzle;
|
||||
return idx;
|
||||
}
|
||||
|
||||
static enum bi_round
|
||||
bi_nir_round(nir_op op)
|
||||
{
|
||||
|
|
@ -2865,12 +2890,6 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
|||
bi_index s2 =
|
||||
srcs > 2 ? bi_alu_src_index(b, instr->src[2], comps) : bi_null();
|
||||
|
||||
bool need_post_swizzle = sz == 8 && comps == 2;
|
||||
bi_index post_swizzle_dst = dst;
|
||||
if (need_post_swizzle) {
|
||||
dst = bi_temp(b->shader);
|
||||
}
|
||||
|
||||
switch (instr->op) {
|
||||
case nir_op_ffma:
|
||||
bi_fma_to(b, sz, dst, s0, s1, s2);
|
||||
|
|
@ -3148,7 +3167,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
|||
if (src_sz == 16)
|
||||
bi_v2u16_to_v2f16_to(b, dst, s0);
|
||||
else if (src_sz == 8)
|
||||
bi_v2u8_to_v2f16_to(b, dst, s0);
|
||||
bi_v2u8_to_v2f16_to(b, dst, bi_swiz_b01(s0));
|
||||
break;
|
||||
|
||||
case nir_op_u2f32:
|
||||
|
|
@ -3174,7 +3193,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
|||
if (src_sz == 16)
|
||||
bi_v2s16_to_v2f16_to(b, dst, s0);
|
||||
else if (src_sz == 8)
|
||||
bi_v2s8_to_v2f16_to(b, dst, s0);
|
||||
bi_v2s8_to_v2f16_to(b, dst, bi_swiz_b01(s0));
|
||||
break;
|
||||
|
||||
case nir_op_i2f32:
|
||||
|
|
@ -3216,7 +3235,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
|||
assert(src_sz == 8 || src_sz == 32);
|
||||
|
||||
if (src_sz == 8)
|
||||
bi_v2s8_to_v2s16_to(b, dst, s0);
|
||||
bi_v2s8_to_v2s16_to(b, dst, bi_swiz_b01(s0));
|
||||
else
|
||||
bi_mov_i32_to(b, dst, s0);
|
||||
break;
|
||||
|
|
@ -3225,7 +3244,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
|||
assert(src_sz == 8 || src_sz == 32);
|
||||
|
||||
if (src_sz == 8)
|
||||
bi_v2u8_to_v2u16_to(b, dst, s0);
|
||||
bi_v2u8_to_v2u16_to(b, dst, bi_swiz_b01(s0));
|
||||
else
|
||||
bi_mov_i32_to(b, dst, s0);
|
||||
break;
|
||||
|
|
@ -3440,12 +3459,6 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
|||
fprintf(stderr, "Unhandled ALU op %s\n", nir_op_infos[instr->op].name);
|
||||
UNREACHABLE("Unknown ALU op");
|
||||
}
|
||||
|
||||
if (need_post_swizzle) {
|
||||
bi_index srcs[2] = {dst, dst};
|
||||
unsigned channels[2] = {0, 2};
|
||||
bi_make_vec_to(b, post_swizzle_dst, srcs, channels, 2, 8);
|
||||
}
|
||||
}
|
||||
|
||||
/* Returns dimension with 0 special casing cubemaps. Shamelessly copied from
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue