mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 11:18:08 +02:00
pan/midg: Support 8/16 bit load/store
Needed for panvk copy shaders to support 8 or 16bit formats. Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14885>
This commit is contained in:
parent
59ea6e2e27
commit
a8fbfcfbd3
4 changed files with 101 additions and 70 deletions
|
|
@ -134,9 +134,13 @@ M_LOAD(ld_vary_32, nir_type_uint32);
|
||||||
M_LOAD(ld_ubo_32, nir_type_uint32);
|
M_LOAD(ld_ubo_32, nir_type_uint32);
|
||||||
M_LOAD(ld_ubo_64, nir_type_uint32);
|
M_LOAD(ld_ubo_64, nir_type_uint32);
|
||||||
M_LOAD(ld_ubo_128, nir_type_uint32);
|
M_LOAD(ld_ubo_128, nir_type_uint32);
|
||||||
|
M_LOAD(ld_u8, nir_type_uint8);
|
||||||
|
M_LOAD(ld_u16, nir_type_uint16);
|
||||||
M_LOAD(ld_32, nir_type_uint32);
|
M_LOAD(ld_32, nir_type_uint32);
|
||||||
M_LOAD(ld_64, nir_type_uint32);
|
M_LOAD(ld_64, nir_type_uint32);
|
||||||
M_LOAD(ld_128, nir_type_uint32);
|
M_LOAD(ld_128, nir_type_uint32);
|
||||||
|
M_STORE(st_u8, nir_type_uint8);
|
||||||
|
M_STORE(st_u16, nir_type_uint16);
|
||||||
M_STORE(st_32, nir_type_uint32);
|
M_STORE(st_32, nir_type_uint32);
|
||||||
M_STORE(st_64, nir_type_uint32);
|
M_STORE(st_64, nir_type_uint32);
|
||||||
M_STORE(st_128, nir_type_uint32);
|
M_STORE(st_128, nir_type_uint32);
|
||||||
|
|
@ -1198,7 +1202,8 @@ mir_set_intr_mask(nir_instr *instr, midgard_instruction *ins, bool is_read)
|
||||||
dsize = nir_dest_bit_size(intr->dest);
|
dsize = nir_dest_bit_size(intr->dest);
|
||||||
} else {
|
} else {
|
||||||
nir_mask = nir_intrinsic_write_mask(intr);
|
nir_mask = nir_intrinsic_write_mask(intr);
|
||||||
dsize = 32;
|
dsize = OP_IS_COMMON_STORE(ins->op) ?
|
||||||
|
nir_src_bit_size(intr->src[0]) : 32;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Once we have the NIR mask, we need to normalize to work in 32-bit space */
|
/* Once we have the NIR mask, we need to normalize to work in 32-bit space */
|
||||||
|
|
@ -1283,19 +1288,55 @@ emit_global(
|
||||||
unsigned bitsize = nir_dest_bit_size(intr->dest) *
|
unsigned bitsize = nir_dest_bit_size(intr->dest) *
|
||||||
nir_dest_num_components(intr->dest);
|
nir_dest_num_components(intr->dest);
|
||||||
|
|
||||||
if (bitsize <= 32)
|
switch (bitsize) {
|
||||||
ins = m_ld_32(srcdest, 0);
|
case 8: ins = m_ld_u8(srcdest, 0); break;
|
||||||
else if (bitsize <= 64)
|
case 16: ins = m_ld_u16(srcdest, 0); break;
|
||||||
ins = m_ld_64(srcdest, 0);
|
case 32: ins = m_ld_32(srcdest, 0); break;
|
||||||
else if (bitsize <= 128)
|
case 64: ins = m_ld_64(srcdest, 0); break;
|
||||||
ins = m_ld_128(srcdest, 0);
|
case 128: ins = m_ld_128(srcdest, 0); break;
|
||||||
else
|
default: unreachable("Invalid global read size");
|
||||||
unreachable("Invalid global read size");
|
}
|
||||||
|
|
||||||
|
mir_set_intr_mask(instr, &ins, is_read);
|
||||||
|
|
||||||
|
/* For anything not aligned on 32bit, make sure we write full
|
||||||
|
* 32 bits registers. */
|
||||||
|
if (bitsize & 31) {
|
||||||
|
unsigned comps_per_32b = 32 / nir_dest_bit_size(intr->dest);
|
||||||
|
|
||||||
|
for (unsigned c = 0; c < 4 * comps_per_32b; c += comps_per_32b) {
|
||||||
|
if (!(ins.mask & BITFIELD_RANGE(c, comps_per_32b)))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
unsigned base = ~0;
|
||||||
|
for (unsigned i = 0; i < comps_per_32b; i++) {
|
||||||
|
if (ins.mask & BITFIELD_BIT(c + i)) {
|
||||||
|
base = ins.swizzle[0][c + i];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(base != ~0);
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < comps_per_32b; i++) {
|
||||||
|
if (!(ins.mask & BITFIELD_BIT(c + i))) {
|
||||||
|
ins.swizzle[0][c + i] = base + i;
|
||||||
|
ins.mask |= BITFIELD_BIT(c + i);
|
||||||
|
}
|
||||||
|
assert(ins.swizzle[0][c + i] == base + i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
unsigned bitsize = nir_src_bit_size(intr->src[0]) *
|
unsigned bitsize = nir_src_bit_size(intr->src[0]) *
|
||||||
nir_src_num_components(intr->src[0]);
|
nir_src_num_components(intr->src[0]);
|
||||||
|
|
||||||
if (bitsize <= 32)
|
if (bitsize == 8)
|
||||||
|
ins = m_st_u8(srcdest, 0);
|
||||||
|
else if (bitsize == 16)
|
||||||
|
ins = m_st_u16(srcdest, 0);
|
||||||
|
else if (bitsize <= 32)
|
||||||
ins = m_st_32(srcdest, 0);
|
ins = m_st_32(srcdest, 0);
|
||||||
else if (bitsize <= 64)
|
else if (bitsize <= 64)
|
||||||
ins = m_st_64(srcdest, 0);
|
ins = m_st_64(srcdest, 0);
|
||||||
|
|
@ -1303,10 +1344,11 @@ emit_global(
|
||||||
ins = m_st_128(srcdest, 0);
|
ins = m_st_128(srcdest, 0);
|
||||||
else
|
else
|
||||||
unreachable("Invalid global store size");
|
unreachable("Invalid global store size");
|
||||||
|
|
||||||
|
mir_set_intr_mask(instr, &ins, is_read);
|
||||||
}
|
}
|
||||||
|
|
||||||
mir_set_offset(ctx, &ins, offset, seg);
|
mir_set_offset(ctx, &ins, offset, seg);
|
||||||
mir_set_intr_mask(instr, &ins, is_read);
|
|
||||||
|
|
||||||
/* Set a valid swizzle for masked out components */
|
/* Set a valid swizzle for masked out components */
|
||||||
assert(ins.mask);
|
assert(ins.mask);
|
||||||
|
|
|
||||||
|
|
@ -368,14 +368,24 @@ mir_pack_vector_srcs(midgard_instruction *ins, midgard_vector_alu *alu)
|
||||||
static void
|
static void
|
||||||
mir_pack_swizzle_ldst(midgard_instruction *ins)
|
mir_pack_swizzle_ldst(midgard_instruction *ins)
|
||||||
{
|
{
|
||||||
/* TODO: non-32-bit, non-vec4 */
|
unsigned compsz = OP_IS_STORE(ins->op) ?
|
||||||
for (unsigned c = 0; c < 4; ++c) {
|
nir_alu_type_get_type_size(ins->src_types[0]) :
|
||||||
|
nir_alu_type_get_type_size(ins->dest_type);
|
||||||
|
unsigned maxcomps = 128 / compsz;
|
||||||
|
unsigned step = DIV_ROUND_UP(32, compsz);
|
||||||
|
|
||||||
|
for (unsigned c = 0; c < maxcomps; c += step) {
|
||||||
unsigned v = ins->swizzle[0][c];
|
unsigned v = ins->swizzle[0][c];
|
||||||
|
|
||||||
/* Check vec4 */
|
/* Make sure the component index doesn't exceed the maximum
|
||||||
assert(v <= 3);
|
* number of components. */
|
||||||
|
assert(v <= maxcomps);
|
||||||
|
|
||||||
ins->load_store.swizzle |= v << (2 * c);
|
if (compsz <= 32)
|
||||||
|
ins->load_store.swizzle |= (v / step) << (2 * (c / step));
|
||||||
|
else
|
||||||
|
ins->load_store.swizzle |= ((v / step) << (4 * c)) |
|
||||||
|
(((v / step) + 1) << ((4 * c) + 2));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* TODO: arg_1/2 */
|
/* TODO: arg_1/2 */
|
||||||
|
|
@ -458,57 +468,34 @@ mir_pack_tex_ooo(midgard_block *block, midgard_bundle *bundle, midgard_instructi
|
||||||
|
|
||||||
static unsigned
|
static unsigned
|
||||||
midgard_pack_common_store_mask(midgard_instruction *ins) {
|
midgard_pack_common_store_mask(midgard_instruction *ins) {
|
||||||
unsigned comp_sz = nir_alu_type_get_type_size(ins->dest_type);
|
ASSERTED unsigned comp_sz = nir_alu_type_get_type_size(ins->src_types[0]);
|
||||||
unsigned mask = ins->mask;
|
unsigned bytemask = mir_bytemask(ins);
|
||||||
unsigned packed = 0;
|
unsigned packed = 0;
|
||||||
unsigned nr_comp;
|
|
||||||
|
|
||||||
switch (ins->op) {
|
switch (ins->op) {
|
||||||
case midgard_op_st_u8:
|
case midgard_op_st_u8:
|
||||||
packed |= mask & 1;
|
return mir_bytemask(ins) & 1;
|
||||||
break;
|
case midgard_op_st_u16:
|
||||||
case midgard_op_st_u16:
|
return mir_bytemask(ins) & 3;
|
||||||
nr_comp = 16 / comp_sz;
|
case midgard_op_st_32:
|
||||||
for (int i = 0; i < nr_comp; i++) {
|
return mir_bytemask(ins);
|
||||||
if (mask & (1 << i)) {
|
case midgard_op_st_64:
|
||||||
if (comp_sz == 16)
|
assert(comp_sz >= 16);
|
||||||
packed |= 0x3;
|
for (unsigned i = 0; i < 4; i++) {
|
||||||
else if (comp_sz == 8)
|
if (bytemask & (3 << (i * 2)))
|
||||||
packed |= 1 << i;
|
packed |= 1 << i;
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case midgard_op_st_32:
|
|
||||||
case midgard_op_st_64:
|
|
||||||
case midgard_op_st_128: {
|
|
||||||
unsigned total_sz = 32;
|
|
||||||
if (ins->op == midgard_op_st_128)
|
|
||||||
total_sz = 128;
|
|
||||||
else if (ins->op == midgard_op_st_64)
|
|
||||||
total_sz = 64;
|
|
||||||
|
|
||||||
nr_comp = total_sz / comp_sz;
|
|
||||||
|
|
||||||
/* Each writemask bit masks 1/4th of the value to be stored. */
|
|
||||||
assert(comp_sz >= total_sz / 4);
|
|
||||||
|
|
||||||
for (int i = 0; i < nr_comp; i++) {
|
|
||||||
if (mask & (1 << i)) {
|
|
||||||
if (comp_sz == total_sz)
|
|
||||||
packed |= 0xF;
|
|
||||||
else if (comp_sz == total_sz / 2)
|
|
||||||
packed |= 0x3 << (i * 2);
|
|
||||||
else if (comp_sz == total_sz / 4)
|
|
||||||
packed |= 0x1 << i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
default:
|
return packed;
|
||||||
unreachable("unexpected ldst opcode");
|
case midgard_op_st_128:
|
||||||
|
assert(comp_sz >= 32);
|
||||||
|
for (unsigned i = 0; i < 4; i++) {
|
||||||
|
if (bytemask & (0xf << (i * 4)))
|
||||||
|
packed |= 1 << i;
|
||||||
|
}
|
||||||
|
return packed;
|
||||||
|
default:
|
||||||
|
unreachable("unexpected ldst opcode");
|
||||||
}
|
}
|
||||||
|
|
||||||
return packed;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -523,16 +510,18 @@ mir_pack_ldst_mask(midgard_instruction *ins)
|
||||||
if (sz == 64) {
|
if (sz == 64) {
|
||||||
packed = ((ins->mask & 0x2) ? (0x8 | 0x4) : 0) |
|
packed = ((ins->mask & 0x2) ? (0x8 | 0x4) : 0) |
|
||||||
((ins->mask & 0x1) ? (0x2 | 0x1) : 0);
|
((ins->mask & 0x1) ? (0x2 | 0x1) : 0);
|
||||||
} else if (sz == 16) {
|
} else if (sz < 32) {
|
||||||
|
unsigned comps_per_32b = 32 / sz;
|
||||||
|
|
||||||
packed = 0;
|
packed = 0;
|
||||||
|
|
||||||
for (unsigned i = 0; i < 4; ++i) {
|
for (unsigned i = 0; i < 4; ++i) {
|
||||||
/* Make sure we're duplicated */
|
unsigned submask = (ins->mask >> (i * comps_per_32b)) &
|
||||||
bool u = (ins->mask & (1 << (2*i + 0))) != 0;
|
BITFIELD_MASK(comps_per_32b);
|
||||||
ASSERTED bool v = (ins->mask & (1 << (2*i + 1))) != 0;
|
|
||||||
assert(u == v);
|
|
||||||
|
|
||||||
packed |= (u << i);
|
/* Make sure we're duplicated */
|
||||||
|
assert(submask == 0 || submask == BITFIELD_MASK(comps_per_32b));
|
||||||
|
packed |= (submask != 0) << i;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
assert(sz == 32);
|
assert(sz == 32);
|
||||||
|
|
|
||||||
|
|
@ -80,7 +80,7 @@ midgard_opt_dead_code_eliminate_block(compiler_context *ctx, midgard_block *bloc
|
||||||
unsigned oldmask = ins->mask;
|
unsigned oldmask = ins->mask;
|
||||||
|
|
||||||
/* Make sure we're packable */
|
/* Make sure we're packable */
|
||||||
if (type_size == 16 && ins->type == TAG_LOAD_STORE_4)
|
if (type_size < 32 && ins->type == TAG_LOAD_STORE_4)
|
||||||
round_size = 32;
|
round_size = 32;
|
||||||
|
|
||||||
unsigned rounded = mir_round_bytemask_up(live[ins->dest], round_size);
|
unsigned rounded = mir_round_bytemask_up(live[ins->dest], round_size);
|
||||||
|
|
|
||||||
|
|
@ -781,7 +781,7 @@ install_registers_instr(
|
||||||
struct phys_reg dst = index_to_reg(ctx, l, ins->dest, dest_shift);
|
struct phys_reg dst = index_to_reg(ctx, l, ins->dest, dest_shift);
|
||||||
|
|
||||||
ins->dest = SSA_FIXED_REGISTER(dst.reg);
|
ins->dest = SSA_FIXED_REGISTER(dst.reg);
|
||||||
offset_swizzle(ins->swizzle[0], 0, 2, 2, dst.offset);
|
offset_swizzle(ins->swizzle[0], 0, 2, dest_shift, dst.offset);
|
||||||
mir_set_bytemask(ins, mir_bytemask(ins) << dst.offset);
|
mir_set_bytemask(ins, mir_bytemask(ins) << dst.offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue