glsl, nir: Make ir_quadop_bitfield_insert a vectorized operation.

We would like to be able to combine

   result.x = bitfieldInsert(src0.x, src1.x, src2.x, src3.x);
   result.y = bitfieldInsert(src0.y, src1.y, src2.y, src3.y);
   result.z = bitfieldInsert(src0.z, src1.z, src2.z, src3.z);
   result.w = bitfieldInsert(src0.w, src1.w, src2.w, src3.w);

into a single ivec4 bitfieldInsert operation.  This should be possible
with most drivers.

This patch changes the offset and bits parameters from scalar ints
to ivecN or uvecN.  The type of all four operands will be the same,
for simplicity.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
This commit is contained in:
Kenneth Graunke 2016-01-05 04:01:11 -08:00 committed by Matt Turner
parent b85a229e1f
commit b4e198f47f
7 changed files with 24 additions and 18 deletions

View file

@ -4902,13 +4902,19 @@ builtin_builder::_bitfieldExtract(const glsl_type *type)
ir_function_signature * ir_function_signature *
builtin_builder::_bitfieldInsert(const glsl_type *type) builtin_builder::_bitfieldInsert(const glsl_type *type)
{ {
bool is_uint = type->base_type == GLSL_TYPE_UINT;
ir_variable *base = in_var(type, "base"); ir_variable *base = in_var(type, "base");
ir_variable *insert = in_var(type, "insert"); ir_variable *insert = in_var(type, "insert");
ir_variable *offset = in_var(glsl_type::int_type, "offset"); ir_variable *offset = in_var(glsl_type::int_type, "offset");
ir_variable *bits = in_var(glsl_type::int_type, "bits"); ir_variable *bits = in_var(glsl_type::int_type, "bits");
MAKE_SIG(type, gpu_shader5_or_es31, 4, base, insert, offset, bits); MAKE_SIG(type, gpu_shader5_or_es31, 4, base, insert, offset, bits);
body.emit(ret(bitfield_insert(base, insert, offset, bits))); operand cast_offset = is_uint ? i2u(offset) : operand(offset);
operand cast_bits = is_uint ? i2u(bits) : operand(bits);
body.emit(ret(bitfield_insert(base, insert,
swizzle(cast_offset, SWIZZLE_XXXX, type->vector_elements),
swizzle(cast_bits, SWIZZLE_XXXX, type->vector_elements))));
return sig; return sig;
} }

View file

@ -1710,7 +1710,6 @@ public:
operation == ir_triop_vector_insert || operation == ir_triop_vector_insert ||
operation == ir_quadop_vector || operation == ir_quadop_vector ||
/* TODO: these can't currently be vectorized */ /* TODO: these can't currently be vectorized */
operation == ir_quadop_bitfield_insert ||
operation == ir_triop_bitfield_extract; operation == ir_triop_bitfield_extract;
} }

View file

@ -1710,10 +1710,10 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
} }
case ir_quadop_bitfield_insert: { case ir_quadop_bitfield_insert: {
int offset = op[2]->value.i[0];
int bits = op[3]->value.i[0];
for (unsigned c = 0; c < components; c++) { for (unsigned c = 0; c < components; c++) {
int offset = op[2]->value.i[c];
int bits = op[3]->value.i[c];
if (bits == 0) if (bits == 0)
data.u[c] = op[0]->value.u[c]; data.u[c] = op[0]->value.u[c];
else if (offset < 0 || bits < 0) else if (offset < 0 || bits < 0)

View file

@ -647,10 +647,11 @@ ir_validate::visit_leave(ir_expression *ir)
break; break;
case ir_quadop_bitfield_insert: case ir_quadop_bitfield_insert:
assert(ir->type->is_integer());
assert(ir->operands[0]->type == ir->type); assert(ir->operands[0]->type == ir->type);
assert(ir->operands[1]->type == ir->type); assert(ir->operands[1]->type == ir->type);
assert(ir->operands[2]->type == glsl_type::int_type); assert(ir->operands[2]->type == ir->type);
assert(ir->operands[3]->type == glsl_type::int_type); assert(ir->operands[3]->type == ir->type);
break; break;
case ir_quadop_vector: case ir_quadop_vector:

View file

@ -381,8 +381,8 @@ lower_instructions_visitor::ldexp_to_arith(ir_expression *ir)
ir_constant *sign_mask = new(ir) ir_constant(0x80000000u, vec_elem); ir_constant *sign_mask = new(ir) ir_constant(0x80000000u, vec_elem);
ir_constant *exp_shift = new(ir) ir_constant(23); ir_constant *exp_shift = new(ir) ir_constant(23, vec_elem);
ir_constant *exp_width = new(ir) ir_constant(8); ir_constant *exp_width = new(ir) ir_constant(8, vec_elem);
/* Temporary variables */ /* Temporary variables */
ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary); ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary);
@ -470,8 +470,8 @@ lower_instructions_visitor::dldexp_to_arith(ir_expression *ir)
ir_constant *sign_mask = new(ir) ir_constant(0x80000000u); ir_constant *sign_mask = new(ir) ir_constant(0x80000000u);
ir_constant *exp_shift = new(ir) ir_constant(20); ir_constant *exp_shift = new(ir) ir_constant(20, vec_elem);
ir_constant *exp_width = new(ir) ir_constant(11); ir_constant *exp_width = new(ir) ir_constant(11, vec_elem);
ir_constant *exp_bias = new(ir) ir_constant(1022, vec_elem); ir_constant *exp_bias = new(ir) ir_constant(1022, vec_elem);
/* Temporary variables */ /* Temporary variables */

View file

@ -230,8 +230,8 @@ private:
if (op_mask & LOWER_PACK_USE_BFI) { if (op_mask & LOWER_PACK_USE_BFI) {
return bitfield_insert(bit_and(swizzle_x(u), constant(0xffffu)), return bitfield_insert(bit_and(swizzle_x(u), constant(0xffffu)),
swizzle_y(u), swizzle_y(u),
constant(16), constant(16u),
constant(16)); constant(16u));
} }
/* return (u.y << 16) | (u.x & 0xffff); */ /* return (u.y << 16) | (u.x & 0xffff); */
@ -261,9 +261,9 @@ private:
return bitfield_insert(bitfield_insert( return bitfield_insert(bitfield_insert(
bitfield_insert( bitfield_insert(
bit_and(swizzle_x(u), constant(0xffu)), bit_and(swizzle_x(u), constant(0xffu)),
swizzle_y(u), constant(8), constant(8)), swizzle_y(u), constant(8u), constant(8u)),
swizzle_z(u), constant(16), constant(8)), swizzle_z(u), constant(16u), constant(8u)),
swizzle_w(u), constant(24), constant(8)); swizzle_w(u), constant(24u), constant(8u));
} }
/* uvec4 u = UVEC4_RVAL & 0xff */ /* uvec4 u = UVEC4_RVAL & 0xff */

View file

@ -609,10 +609,10 @@ def quadop_horiz(name, output_size, src1_size, src2_size, src3_size,
[tuint, tuint, tuint, tuint], [tuint, tuint, tuint, tuint],
"", const_expr) "", const_expr)
opcode("bitfield_insert", 0, tuint, [0, 0, 1, 1], opcode("bitfield_insert", 0, tuint, [0, 0, 0, 0],
[tuint, tuint, tint, tint], "", """ [tuint, tuint, tint, tint], "", """
unsigned base = src0, insert = src1; unsigned base = src0, insert = src1;
int offset = src2.x, bits = src3.x; int offset = src2, bits = src3;
if (bits == 0) { if (bits == 0) {
dst = 0; dst = 0;
} else if (offset < 0 || bits < 0 || bits + offset > 32) { } else if (offset < 0 || bits < 0 || bits + offset > 32) {