mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-30 01:20:17 +01:00
nir/builder: Add a nir_pack/unpack/bitcast helpers
The new helpers can generate any pack/unpack operation including those for which we do not have specific opcodes and they express a bitcast in terms of these pack/unpack operations. In particular, the new helpers properly handle 8-bit types. Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
This commit is contained in:
parent
b77d68b78e
commit
f16bd8a9fe
2 changed files with 151 additions and 76 deletions
|
|
@ -565,6 +565,132 @@ nir_imul_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)
|
|||
return nir_imul(build, x, nir_imm_intN_t(build, y, x->bit_size));
|
||||
}
|
||||
|
||||
static inline nir_ssa_def *
|
||||
nir_pack_bits(nir_builder *b, nir_ssa_def *src, unsigned dest_bit_size)
|
||||
{
|
||||
assert(src->num_components * src->bit_size == dest_bit_size);
|
||||
|
||||
switch (dest_bit_size) {
|
||||
case 64:
|
||||
switch (src->bit_size) {
|
||||
case 32: return nir_pack_64_2x32(b, src);
|
||||
case 16: return nir_pack_64_4x16(b, src);
|
||||
default: break;
|
||||
}
|
||||
break;
|
||||
|
||||
case 32:
|
||||
if (src->bit_size == 16)
|
||||
return nir_pack_32_2x16(b, src);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* If we got here, we have no dedicated unpack opcode. */
|
||||
nir_ssa_def *dest = nir_imm_intN_t(b, 0, dest_bit_size);
|
||||
for (unsigned i = 0; i < src->num_components; i++) {
|
||||
nir_ssa_def *val;
|
||||
switch (dest_bit_size) {
|
||||
case 64: val = nir_u2u64(b, nir_channel(b, src, i)); break;
|
||||
case 32: val = nir_u2u32(b, nir_channel(b, src, i)); break;
|
||||
case 16: val = nir_u2u16(b, nir_channel(b, src, i)); break;
|
||||
default: unreachable("Invalid bit size");
|
||||
}
|
||||
val = nir_ishl(b, val, nir_imm_int(b, i * src->bit_size));
|
||||
dest = nir_ior(b, dest, val);
|
||||
}
|
||||
return dest;
|
||||
}
|
||||
|
||||
static inline nir_ssa_def *
|
||||
nir_unpack_bits(nir_builder *b, nir_ssa_def *src, unsigned dest_bit_size)
|
||||
{
|
||||
assert(src->num_components == 1);
|
||||
assert(src->bit_size > dest_bit_size);
|
||||
const unsigned dest_num_components = src->bit_size / dest_bit_size;
|
||||
assert(dest_num_components <= NIR_MAX_VEC_COMPONENTS);
|
||||
|
||||
switch (src->bit_size) {
|
||||
case 64:
|
||||
switch (dest_bit_size) {
|
||||
case 32: return nir_unpack_64_2x32(b, src);
|
||||
case 16: return nir_unpack_64_4x16(b, src);
|
||||
default: break;
|
||||
}
|
||||
break;
|
||||
|
||||
case 32:
|
||||
if (dest_bit_size == 16)
|
||||
return nir_unpack_32_2x16(b, src);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* If we got here, we have no dedicated unpack opcode. */
|
||||
nir_ssa_def *dest_comps[NIR_MAX_VEC_COMPONENTS];
|
||||
for (unsigned i = 0; i < dest_num_components; i++) {
|
||||
nir_ssa_def *val = nir_ushr(b, src, nir_imm_int(b, i * dest_bit_size));
|
||||
switch (dest_bit_size) {
|
||||
case 32: dest_comps[i] = nir_u2u32(b, val); break;
|
||||
case 16: dest_comps[i] = nir_u2u16(b, val); break;
|
||||
case 8: dest_comps[i] = nir_u2u8(b, val); break;
|
||||
default: unreachable("Invalid bit size");
|
||||
}
|
||||
}
|
||||
return nir_vec(b, dest_comps, dest_num_components);
|
||||
}
|
||||
|
||||
static inline nir_ssa_def *
|
||||
nir_bitcast_vector(nir_builder *b, nir_ssa_def *src, unsigned dest_bit_size)
|
||||
{
|
||||
assert((src->bit_size * src->num_components) % dest_bit_size == 0);
|
||||
const unsigned dest_num_components =
|
||||
(src->bit_size * src->num_components) / dest_bit_size;
|
||||
assert(dest_num_components <= NIR_MAX_VEC_COMPONENTS);
|
||||
|
||||
if (src->bit_size > dest_bit_size) {
|
||||
assert(src->bit_size % dest_bit_size == 0);
|
||||
if (src->num_components == 1) {
|
||||
return nir_unpack_bits(b, src, dest_bit_size);
|
||||
} else {
|
||||
const unsigned divisor = src->bit_size / dest_bit_size;
|
||||
assert(src->num_components * divisor == dest_num_components);
|
||||
nir_ssa_def *dest[NIR_MAX_VEC_COMPONENTS];
|
||||
for (unsigned i = 0; i < src->num_components; i++) {
|
||||
nir_ssa_def *unpacked =
|
||||
nir_unpack_bits(b, nir_channel(b, src, i), dest_bit_size);
|
||||
assert(unpacked->num_components == divisor);
|
||||
for (unsigned j = 0; j < divisor; j++)
|
||||
dest[i * divisor + j] = nir_channel(b, unpacked, j);
|
||||
}
|
||||
return nir_vec(b, dest, dest_num_components);
|
||||
}
|
||||
} else if (src->bit_size < dest_bit_size) {
|
||||
assert(dest_bit_size % src->bit_size == 0);
|
||||
if (dest_num_components == 1) {
|
||||
return nir_pack_bits(b, src, dest_bit_size);
|
||||
} else {
|
||||
const unsigned divisor = dest_bit_size / src->bit_size;
|
||||
assert(src->num_components == dest_num_components * divisor);
|
||||
nir_ssa_def *dest[NIR_MAX_VEC_COMPONENTS];
|
||||
for (unsigned i = 0; i < dest_num_components; i++) {
|
||||
nir_component_mask_t src_mask =
|
||||
((1 << divisor) - 1) << (i * divisor);
|
||||
dest[i] = nir_pack_bits(b, nir_channels(b, src, src_mask),
|
||||
dest_bit_size);
|
||||
}
|
||||
return nir_vec(b, dest, dest_num_components);
|
||||
}
|
||||
} else {
|
||||
assert(src->bit_size == dest_bit_size);
|
||||
return src;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Turns a nir_src into a nir_ssa_def * so it can be passed to
|
||||
* nir_build_alu()-based builder calls.
|
||||
|
|
|
|||
|
|
@ -211,81 +211,6 @@ vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode,
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
vtn_handle_bitcast(struct vtn_builder *b, struct vtn_ssa_value *dest,
|
||||
struct nir_ssa_def *src)
|
||||
{
|
||||
if (glsl_get_vector_elements(dest->type) == src->num_components) {
|
||||
/* From the definition of OpBitcast in the SPIR-V 1.2 spec:
|
||||
*
|
||||
* "If Result Type has the same number of components as Operand, they
|
||||
* must also have the same component width, and results are computed per
|
||||
* component."
|
||||
*/
|
||||
dest->def = nir_imov(&b->nb, src);
|
||||
return;
|
||||
}
|
||||
|
||||
/* From the definition of OpBitcast in the SPIR-V 1.2 spec:
|
||||
*
|
||||
* "If Result Type has a different number of components than Operand, the
|
||||
* total number of bits in Result Type must equal the total number of bits
|
||||
* in Operand. Let L be the type, either Result Type or Operand’s type, that
|
||||
* has the larger number of components. Let S be the other type, with the
|
||||
* smaller number of components. The number of components in L must be an
|
||||
* integer multiple of the number of components in S. The first component
|
||||
* (that is, the only or lowest-numbered component) of S maps to the first
|
||||
* components of L, and so on, up to the last component of S mapping to the
|
||||
* last components of L. Within this mapping, any single component of S
|
||||
* (mapping to multiple components of L) maps its lower-ordered bits to the
|
||||
* lower-numbered components of L."
|
||||
*/
|
||||
unsigned src_bit_size = src->bit_size;
|
||||
unsigned dest_bit_size = glsl_get_bit_size(dest->type);
|
||||
unsigned src_components = src->num_components;
|
||||
unsigned dest_components = glsl_get_vector_elements(dest->type);
|
||||
vtn_assert(src_bit_size * src_components == dest_bit_size * dest_components);
|
||||
|
||||
nir_ssa_def *dest_chan[NIR_MAX_VEC_COMPONENTS];
|
||||
if (src_bit_size > dest_bit_size) {
|
||||
vtn_assert(src_bit_size % dest_bit_size == 0);
|
||||
unsigned divisor = src_bit_size / dest_bit_size;
|
||||
for (unsigned comp = 0; comp < src_components; comp++) {
|
||||
nir_ssa_def *split;
|
||||
if (src_bit_size == 64) {
|
||||
assert(dest_bit_size == 32 || dest_bit_size == 16);
|
||||
split = dest_bit_size == 32 ?
|
||||
nir_unpack_64_2x32(&b->nb, nir_channel(&b->nb, src, comp)) :
|
||||
nir_unpack_64_4x16(&b->nb, nir_channel(&b->nb, src, comp));
|
||||
} else {
|
||||
vtn_assert(src_bit_size == 32);
|
||||
vtn_assert(dest_bit_size == 16);
|
||||
split = nir_unpack_32_2x16(&b->nb, nir_channel(&b->nb, src, comp));
|
||||
}
|
||||
for (unsigned i = 0; i < divisor; i++)
|
||||
dest_chan[divisor * comp + i] = nir_channel(&b->nb, split, i);
|
||||
}
|
||||
} else {
|
||||
vtn_assert(dest_bit_size % src_bit_size == 0);
|
||||
unsigned divisor = dest_bit_size / src_bit_size;
|
||||
for (unsigned comp = 0; comp < dest_components; comp++) {
|
||||
unsigned channels = ((1 << divisor) - 1) << (comp * divisor);
|
||||
nir_ssa_def *src_chan = nir_channels(&b->nb, src, channels);
|
||||
if (dest_bit_size == 64) {
|
||||
assert(src_bit_size == 32 || src_bit_size == 16);
|
||||
dest_chan[comp] = src_bit_size == 32 ?
|
||||
nir_pack_64_2x32(&b->nb, src_chan) :
|
||||
nir_pack_64_4x16(&b->nb, src_chan);
|
||||
} else {
|
||||
vtn_assert(dest_bit_size == 32);
|
||||
vtn_assert(src_bit_size == 16);
|
||||
dest_chan[comp] = nir_pack_32_2x16(&b->nb, src_chan);
|
||||
}
|
||||
}
|
||||
}
|
||||
dest->def = nir_vec(&b->nb, dest_chan, dest_components);
|
||||
}
|
||||
|
||||
nir_op
|
||||
vtn_nir_alu_op_for_spirv_opcode(struct vtn_builder *b,
|
||||
SpvOp opcode, bool *swap,
|
||||
|
|
@ -633,7 +558,31 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
|
|||
}
|
||||
|
||||
case SpvOpBitcast:
|
||||
vtn_handle_bitcast(b, val->ssa, src[0]);
|
||||
/* From the definition of OpBitcast in the SPIR-V 1.2 spec:
|
||||
*
|
||||
* "If Result Type has the same number of components as Operand, they
|
||||
* must also have the same component width, and results are computed
|
||||
* per component.
|
||||
*
|
||||
* If Result Type has a different number of components than Operand,
|
||||
* the total number of bits in Result Type must equal the total
|
||||
* number of bits in Operand. Let L be the type, either Result Type
|
||||
* or Operand’s type, that has the larger number of components. Let S
|
||||
* be the other type, with the smaller number of components. The
|
||||
* number of components in L must be an integer multiple of the
|
||||
* number of components in S. The first component (that is, the only
|
||||
* or lowest-numbered component) of S maps to the first components of
|
||||
* L, and so on, up to the last component of S mapping to the last
|
||||
* components of L. Within this mapping, any single component of S
|
||||
* (mapping to multiple components of L) maps its lower-ordered bits
|
||||
* to the lower-numbered components of L."
|
||||
*/
|
||||
vtn_fail_if(src[0]->num_components * src[0]->bit_size !=
|
||||
glsl_get_vector_elements(type) * glsl_get_bit_size(type),
|
||||
"Source and destination of OpBitcast must have the same "
|
||||
"total number of bits");
|
||||
val->ssa->def = nir_bitcast_vector(&b->nb, src[0],
|
||||
glsl_get_bit_size(type));
|
||||
break;
|
||||
|
||||
case SpvOpFConvert: {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue