gallivm: add PK2H/UP2H support

Add support for these opcodes, the conversion functions were already
there albeit need some new packing stuff.
Just like the tgsi version, piglit won't like it for all the same
reasons, so it's disabled (UP2H passes piglit arb_shader_language_packing
tests, albeit since PK2H won't due those rounding differences I don't
know if that one works or not as the piglit test is rather difficult to
deal with).
This commit is contained in:
Roland Scheidegger 2016-01-29 02:49:22 +01:00
parent dc16086e3b
commit 5171ec9ca9
5 changed files with 119 additions and 2 deletions

View file

@ -130,6 +130,7 @@ lp_build_half_to_float(struct gallivm_state *gallivm,
*
* Convert float32 to half floats, preserving Infs and NaNs,
* with rounding towards zero (trunc).
* XXX: For GL, would prefer rounding towards nearest(-even).
*/
LLVMValueRef
lp_build_float_to_half(struct gallivm_state *gallivm,
@ -143,6 +144,15 @@ lp_build_float_to_half(struct gallivm_state *gallivm,
struct lp_type i16_type = lp_type_int_vec(16, 16 * length);
LLVMValueRef result;
/*
* Note: Newer llvm versions (3.6 or so) support fptrunc to 16 bits
* directly, without any (x86 or generic) intrinsics.
* Albeit the rounding mode cannot be specified (and is undefined,
* though in practice on x86 seems to do nearest-even but it may
* be dependent on instruction set support), so is essentially
* useless.
*/
if (util_cpu_caps.has_f16c &&
(length == 4 || length == 8)) {
struct lp_type i168_type = lp_type_int_vec(16, 16 * 8);
@ -187,7 +197,11 @@ lp_build_float_to_half(struct gallivm_state *gallivm,
LLVMValueRef index = LLVMConstInt(i32t, i, 0);
LLVMValueRef f32 = LLVMBuildExtractElement(builder, src, index, "");
#if 0
/* XXX: not really supported by backends */
/*
* XXX: not really supported by backends.
* Even if they would now, rounding mode cannot be specified and
* is undefined.
*/
LLVMValueRef f16 = lp_build_intrinsic_unary(builder, "llvm.convert.to.fp16", i16t, f32);
#else
LLVMValueRef f16 = LLVMBuildCall(builder, func, &f32, 1, "");

View file

@ -256,6 +256,32 @@ lp_build_concat_n(struct gallivm_state *gallivm,
}
/**
* Un-interleave vector.
* This will return a vector consisting of every second element
* (depending on lo_hi, beginning at 0 or 1).
* The returned vector size (elems and width) will only be half
* that of the source vector.
*/
LLVMValueRef
lp_build_uninterleave1(struct gallivm_state *gallivm,
unsigned num_elems,
LLVMValueRef a,
unsigned lo_hi)
{
LLVMValueRef shuffle, elems[LP_MAX_VECTOR_LENGTH];
unsigned i;
assert(num_elems <= LP_MAX_VECTOR_LENGTH);
for(i = 0; i < num_elems / 2; ++i)
elems[i] = lp_build_const_int32(gallivm, 2*i + lo_hi);
shuffle = LLVMConstVector(elems, num_elems / 2);
return LLVMBuildShuffleVector(gallivm->builder, a, a, shuffle, "");
}
/**
* Interleave vector elements.
*

View file

@ -58,6 +58,11 @@ lp_build_interleave2(struct gallivm_state *gallivm,
LLVMValueRef b,
unsigned lo_hi);
LLVMValueRef
lp_build_uninterleave1(struct gallivm_state *gallivm,
unsigned num_elems,
LLVMValueRef a,
unsigned lo_hi);
void
lp_build_unpack2(struct gallivm_state *gallivm,

View file

@ -248,7 +248,6 @@ lp_build_tgsi_inst_llvm(
/* Ignore deprecated instructions */
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_UP2H:
case TGSI_OPCODE_UP2US:
case TGSI_OPCODE_UP4B:
case TGSI_OPCODE_UP4UB:

View file

@ -45,8 +45,10 @@
#include "lp_bld_arit.h"
#include "lp_bld_bitarit.h"
#include "lp_bld_const.h"
#include "lp_bld_conv.h"
#include "lp_bld_gather.h"
#include "lp_bld_logic.h"
#include "lp_bld_pack.h"
#include "tgsi/tgsi_exec.h"
@ -530,6 +532,75 @@ static struct lp_build_tgsi_action log_action = {
log_emit /* emit */
};
/* TGSI_OPCODE_PK2H */
static void
pk2h_fetch_args(
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
/* src0.x */
emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
0, TGSI_CHAN_X);
/* src0.y */
emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
0, TGSI_CHAN_Y);
}
static void
pk2h_emit(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct gallivm_state *gallivm = bld_base->base.gallivm;
struct lp_type f16i_t;
LLVMValueRef lo, hi, res;
f16i_t = lp_type_uint_vec(16, bld_base->base.type.length * 32);
lo = lp_build_float_to_half(gallivm, emit_data->args[0]);
hi = lp_build_float_to_half(gallivm, emit_data->args[1]);
/* maybe some interleave doubling vector width would be useful... */
lo = lp_build_pad_vector(gallivm, lo, bld_base->base.type.length * 2);
hi = lp_build_pad_vector(gallivm, hi, bld_base->base.type.length * 2);
res = lp_build_interleave2(gallivm, f16i_t, lo, hi, 0);
emit_data->output[emit_data->chan] = res;
}
static struct lp_build_tgsi_action pk2h_action = {
pk2h_fetch_args, /* fetch_args */
pk2h_emit /* emit */
};
/* TGSI_OPCODE_UP2H */
static void
up2h_emit(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
LLVMContextRef context = gallivm->context;
LLVMValueRef lo, hi, res[2], arg;
unsigned nr = bld_base->base.type.length;
LLVMTypeRef i16t = LLVMVectorType(LLVMInt16TypeInContext(context), nr * 2);
arg = LLVMBuildBitCast(builder, emit_data->args[0], i16t, "");
lo = lp_build_uninterleave1(gallivm, nr * 2, arg, 0);
hi = lp_build_uninterleave1(gallivm, nr * 2, arg, 1);
res[0] = lp_build_half_to_float(gallivm, lo);
res[1] = lp_build_half_to_float(gallivm, hi);
emit_data->output[0] = emit_data->output[2] = res[0];
emit_data->output[1] = emit_data->output[3] = res[1];
}
static struct lp_build_tgsi_action up2h_action = {
scalar_unary_fetch_args, /* fetch_args */
up2h_emit /* emit */
};
/* TGSI_OPCODE_LRP */
static void
@ -1032,10 +1103,12 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action;
bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action;
bld_base->op_actions[TGSI_OPCODE_LOG] = log_action;
bld_base->op_actions[TGSI_OPCODE_PK2H] = pk2h_action;
bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action;
bld_base->op_actions[TGSI_OPCODE_SQRT] = sqrt_action;
bld_base->op_actions[TGSI_OPCODE_POW] = pow_action;
bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action;
bld_base->op_actions[TGSI_OPCODE_UP2H] = up2h_action;
bld_base->op_actions[TGSI_OPCODE_XPD] = xpd_action;
bld_base->op_actions[TGSI_OPCODE_BREAKC].fetch_args = scalar_unary_fetch_args;