mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 21:50:12 +01:00
gallivm: Basic AVX2 support.
v2: pblendb -> pblendvb Reviewed-by: Roland Scheidegger <sroland@vmware.com>
This commit is contained in:
parent
add01add1b
commit
e088390c7d
4 changed files with 98 additions and 28 deletions
|
|
@ -142,6 +142,20 @@ lp_build_min_simple(struct lp_build_context *bld,
|
|||
intrinsic = "llvm.ppc.altivec.vminfp";
|
||||
intr_size = 128;
|
||||
}
|
||||
} else if (HAVE_LLVM < 0x0309 &&
|
||||
util_cpu_caps.has_avx2 && type.length > 4) {
|
||||
intr_size = 256;
|
||||
switch (type.width) {
|
||||
case 8:
|
||||
intrinsic = type.sign ? "llvm.x86.avx2.pmins.b" : "llvm.x86.avx2.pminu.b";
|
||||
break;
|
||||
case 16:
|
||||
intrinsic = type.sign ? "llvm.x86.avx2.pmins.w" : "llvm.x86.avx2.pminu.w";
|
||||
break;
|
||||
case 32:
|
||||
intrinsic = type.sign ? "llvm.x86.avx2.pmins.d" : "llvm.x86.avx2.pminu.d";
|
||||
break;
|
||||
}
|
||||
} else if (HAVE_LLVM < 0x0309 &&
|
||||
util_cpu_caps.has_sse2 && type.length >= 2) {
|
||||
intr_size = 128;
|
||||
|
|
@ -346,6 +360,20 @@ lp_build_max_simple(struct lp_build_context *bld,
|
|||
intrinsic = "llvm.ppc.altivec.vmaxfp";
|
||||
intr_size = 128;
|
||||
}
|
||||
} else if (HAVE_LLVM < 0x0309 &&
|
||||
util_cpu_caps.has_avx2 && type.length > 4) {
|
||||
intr_size = 256;
|
||||
switch (type.width) {
|
||||
case 8:
|
||||
intrinsic = type.sign ? "llvm.x86.avx2.pmaxs.b" : "llvm.x86.avx2.pmaxu.b";
|
||||
break;
|
||||
case 16:
|
||||
intrinsic = type.sign ? "llvm.x86.avx2.pmaxs.w" : "llvm.x86.avx2.pmaxu.w";
|
||||
break;
|
||||
case 32:
|
||||
intrinsic = type.sign ? "llvm.x86.avx2.pmaxs.d" : "llvm.x86.avx2.pmaxu.d";
|
||||
break;
|
||||
}
|
||||
} else if (HAVE_LLVM < 0x0309 &&
|
||||
util_cpu_caps.has_sse2 && type.length >= 2) {
|
||||
intr_size = 128;
|
||||
|
|
@ -526,18 +554,27 @@ lp_build_add(struct lp_build_context *bld,
|
|||
if(a == bld->one || b == bld->one)
|
||||
return bld->one;
|
||||
|
||||
if (type.width * type.length == 128 &&
|
||||
!type.floating && !type.fixed) {
|
||||
if(util_cpu_caps.has_sse2) {
|
||||
if(type.width == 8)
|
||||
intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b";
|
||||
if(type.width == 16)
|
||||
intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w";
|
||||
} else if (util_cpu_caps.has_altivec) {
|
||||
if(type.width == 8)
|
||||
intrinsic = type.sign ? "llvm.ppc.altivec.vaddsbs" : "llvm.ppc.altivec.vaddubs";
|
||||
if(type.width == 16)
|
||||
intrinsic = type.sign ? "llvm.ppc.altivec.vaddshs" : "llvm.ppc.altivec.vadduhs";
|
||||
if (!type.floating && !type.fixed) {
|
||||
if (type.width * type.length == 128) {
|
||||
if(util_cpu_caps.has_sse2) {
|
||||
if(type.width == 8)
|
||||
intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b";
|
||||
if(type.width == 16)
|
||||
intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w";
|
||||
} else if (util_cpu_caps.has_altivec) {
|
||||
if(type.width == 8)
|
||||
intrinsic = type.sign ? "llvm.ppc.altivec.vaddsbs" : "llvm.ppc.altivec.vaddubs";
|
||||
if(type.width == 16)
|
||||
intrinsic = type.sign ? "llvm.ppc.altivec.vaddshs" : "llvm.ppc.altivec.vadduhs";
|
||||
}
|
||||
}
|
||||
if (type.width * type.length == 256) {
|
||||
if(util_cpu_caps.has_avx2) {
|
||||
if(type.width == 8)
|
||||
intrinsic = type.sign ? "llvm.x86.avx2.padds.b" : "llvm.x86.avx2.paddus.b";
|
||||
if(type.width == 16)
|
||||
intrinsic = type.sign ? "llvm.x86.avx2.padds.w" : "llvm.x86.avx2.paddus.w";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -818,18 +855,27 @@ lp_build_sub(struct lp_build_context *bld,
|
|||
if(b == bld->one)
|
||||
return bld->zero;
|
||||
|
||||
if (type.width * type.length == 128 &&
|
||||
!type.floating && !type.fixed) {
|
||||
if (util_cpu_caps.has_sse2) {
|
||||
if(type.width == 8)
|
||||
intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b";
|
||||
if(type.width == 16)
|
||||
intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w";
|
||||
} else if (util_cpu_caps.has_altivec) {
|
||||
if(type.width == 8)
|
||||
intrinsic = type.sign ? "llvm.ppc.altivec.vsubsbs" : "llvm.ppc.altivec.vsububs";
|
||||
if(type.width == 16)
|
||||
intrinsic = type.sign ? "llvm.ppc.altivec.vsubshs" : "llvm.ppc.altivec.vsubuhs";
|
||||
if (!type.floating && !type.fixed) {
|
||||
if (type.width * type.length == 128) {
|
||||
if (util_cpu_caps.has_sse2) {
|
||||
if(type.width == 8)
|
||||
intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b";
|
||||
if(type.width == 16)
|
||||
intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w";
|
||||
} else if (util_cpu_caps.has_altivec) {
|
||||
if(type.width == 8)
|
||||
intrinsic = type.sign ? "llvm.ppc.altivec.vsubsbs" : "llvm.ppc.altivec.vsububs";
|
||||
if(type.width == 16)
|
||||
intrinsic = type.sign ? "llvm.ppc.altivec.vsubshs" : "llvm.ppc.altivec.vsubuhs";
|
||||
}
|
||||
}
|
||||
if (type.width * type.length == 256) {
|
||||
if (util_cpu_caps.has_avx2) {
|
||||
if(type.width == 8)
|
||||
intrinsic = type.sign ? "llvm.x86.avx2.psubs.b" : "llvm.x86.avx2.psubus.b";
|
||||
if(type.width == 16)
|
||||
intrinsic = type.sign ? "llvm.x86.avx2.psubs.w" : "llvm.x86.avx2.psubus.w";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1587,6 +1633,16 @@ lp_build_abs(struct lp_build_context *bld,
|
|||
return lp_build_intrinsic_unary(builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a);
|
||||
}
|
||||
}
|
||||
else if (type.width*type.length == 256 && util_cpu_caps.has_avx2) {
|
||||
switch(type.width) {
|
||||
case 8:
|
||||
return lp_build_intrinsic_unary(builder, "llvm.x86.avx2.pabs.b", vec_type, a);
|
||||
case 16:
|
||||
return lp_build_intrinsic_unary(builder, "llvm.x86.avx2.pabs.w", vec_type, a);
|
||||
case 32:
|
||||
return lp_build_intrinsic_unary(builder, "llvm.x86.avx2.pabs.d", vec_type, a);
|
||||
}
|
||||
}
|
||||
else if (type.width*type.length == 256 && util_cpu_caps.has_ssse3 &&
|
||||
(gallivm_debug & GALLIVM_DEBUG_PERF) &&
|
||||
(type.width == 8 || type.width == 16 || type.width == 32)) {
|
||||
|
|
|
|||
|
|
@ -457,6 +457,11 @@ lp_build_init(void)
|
|||
util_cpu_caps.has_f16c = 0;
|
||||
util_cpu_caps.has_fma = 0;
|
||||
}
|
||||
if (HAVE_LLVM < 0x0304 || !USE_MCJIT) {
|
||||
/* AVX2 support has only been tested with LLVM 3.4, and it requires
|
||||
* MCJIT. */
|
||||
util_cpu_caps.has_avx2 = 0;
|
||||
}
|
||||
|
||||
#ifdef PIPE_ARCH_PPC_64
|
||||
/* Set the NJ bit in VSCR to 0 so denormalized values are handled as
|
||||
|
|
|
|||
|
|
@ -348,7 +348,9 @@ lp_build_select(struct lp_build_context *bld,
|
|||
else if (((util_cpu_caps.has_sse4_1 &&
|
||||
type.width * type.length == 128) ||
|
||||
(util_cpu_caps.has_avx &&
|
||||
type.width * type.length == 256 && type.width >= 32)) &&
|
||||
type.width * type.length == 256 && type.width >= 32) ||
|
||||
(util_cpu_caps.has_avx2 &&
|
||||
type.width * type.length == 256)) &&
|
||||
!LLVMIsConstant(a) &&
|
||||
!LLVMIsConstant(b) &&
|
||||
!LLVMIsConstant(mask)) {
|
||||
|
|
@ -365,9 +367,13 @@ lp_build_select(struct lp_build_context *bld,
|
|||
intrinsic = "llvm.x86.avx.blendv.pd.256";
|
||||
arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 4);
|
||||
}
|
||||
else {
|
||||
else if (type.width == 32) {
|
||||
intrinsic = "llvm.x86.avx.blendv.ps.256";
|
||||
arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8);
|
||||
} else {
|
||||
assert(util_cpu_caps.has_avx2);
|
||||
intrinsic = "llvm.x86.avx2.pblendvb";
|
||||
arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 32);
|
||||
}
|
||||
}
|
||||
else if (type.floating &&
|
||||
|
|
|
|||
|
|
@ -1409,6 +1409,9 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
|
|||
LLVMValueRef mipoff1 = NULL;
|
||||
LLVMValueRef colors0;
|
||||
LLVMValueRef colors1;
|
||||
boolean use_floats = util_cpu_caps.has_avx &&
|
||||
!util_cpu_caps.has_avx2 &&
|
||||
bld->coord_type.length > 4;
|
||||
|
||||
/* sample the first mipmap level */
|
||||
lp_build_mipmap_level_sizes(bld, ilevel0,
|
||||
|
|
@ -1423,7 +1426,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
|
|||
mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
|
||||
}
|
||||
|
||||
if (util_cpu_caps.has_avx && bld->coord_type.length > 4) {
|
||||
if (use_floats) {
|
||||
if (img_filter == PIPE_TEX_FILTER_NEAREST) {
|
||||
lp_build_sample_image_nearest_afloat(bld,
|
||||
size0,
|
||||
|
|
@ -1514,7 +1517,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
|
|||
mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
|
||||
}
|
||||
|
||||
if (util_cpu_caps.has_avx && bld->coord_type.length > 4) {
|
||||
if (use_floats) {
|
||||
if (img_filter == PIPE_TEX_FILTER_NEAREST) {
|
||||
lp_build_sample_image_nearest_afloat(bld,
|
||||
size1,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue