diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 9dda7f76c17..4105afc7eeb 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -314,9 +314,9 @@ lp_apply_ellipse_transform(struct lp_build_context *bld, static const unsigned char swizzle01[] = { 0, 1 }; static const unsigned char swizzle23[] = { 2, 3 }; LLVMValueRef ds2dx_ds2dy = lp_build_swizzle_aos_n(gallivm, ds2dx_ds2dy_dt2dx_dt2dy, - swizzle01, ARRAY_SIZE(swizzle01), half_length_vec.length); + swizzle01, ARRAY_SIZE(swizzle01), 4, half_length_vec.length); LLVMValueRef dt2dx_dt2dy = lp_build_swizzle_aos_n(gallivm, ds2dx_ds2dy_dt2dx_dt2dy, - swizzle23, ARRAY_SIZE(swizzle23), half_length_vec.length); + swizzle23, ARRAY_SIZE(swizzle23), 4, half_length_vec.length); LLVMValueRef square_length_dx_dy = lp_build_add(&half_length_bld, ds2dx_ds2dy, dt2dx_dt2dy); LLVMValueRef zero_length_dx_dy = lp_build_cmp(&half_length_bld, PIPE_FUNC_LESS, square_length_dx_dy, lp_build_const_vec(gallivm, half_length_vec, epsilon2)); @@ -334,17 +334,17 @@ lp_apply_ellipse_transform(struct lp_build_context *bld, */ static const unsigned char swizzle32[] = { 3, 2 }; LLVMValueRef dsdx_dsdy = lp_build_swizzle_aos_n(gallivm, dsdx_dsdy_dtdx_dtdy, - swizzle01, ARRAY_SIZE(swizzle01), half_length_vec.length); + swizzle01, ARRAY_SIZE(swizzle01), 4, half_length_vec.length); LLVMValueRef dtdy_dtdx = lp_build_swizzle_aos_n(gallivm, dsdx_dsdy_dtdx_dtdy, - swizzle32, ARRAY_SIZE(swizzle32), half_length_vec.length); + swizzle32, ARRAY_SIZE(swizzle32), 4, half_length_vec.length); LLVMValueRef dsdxdtdy_dsdydtdx = lp_build_mul(&half_length_bld, dsdx_dsdy, dtdy_dtdx); static const unsigned char swizzle0[] = { 0 }; static const unsigned char swizzle1[] = { 1 }; LLVMValueRef determinant = lp_build_sub(&quarter_length_bld, lp_build_swizzle_aos_n(gallivm, dsdxdtdy_dsdydtdx, - swizzle0, ARRAY_SIZE(swizzle0), quarter_length_vec.length), + swizzle0, ARRAY_SIZE(swizzle0), 2, quarter_length_vec.length), lp_build_swizzle_aos_n(gallivm, dsdxdtdy_dsdydtdx, - swizzle1, ARRAY_SIZE(swizzle1), quarter_length_vec.length)); + swizzle1, ARRAY_SIZE(swizzle1), 2, quarter_length_vec.length)); LLVMValueRef determinant2 = lp_build_mul(&quarter_length_bld, determinant, determinant); LLVMValueRef zero_determinant = lp_build_cmp(&quarter_length_bld, PIPE_FUNC_LESS, determinant2, lp_build_const_vec(gallivm, quarter_length_vec, epsilon2)); @@ -355,15 +355,15 @@ lp_apply_ellipse_transform(struct lp_build_context *bld, static const unsigned char swizzle02[] = { 0, 2 }; static const unsigned char swizzle13[] = { 1, 3 }; LLVMValueRef dsdx_dtdx = lp_build_swizzle_aos_n(gallivm, dsdx_dsdy_dtdx_dtdy, - swizzle02, ARRAY_SIZE(swizzle02), half_length_vec.length); + swizzle02, ARRAY_SIZE(swizzle02), 4, half_length_vec.length); LLVMValueRef dsdy_dtdy = lp_build_swizzle_aos_n(gallivm, dsdx_dsdy_dtdx_dtdy, - swizzle13, ARRAY_SIZE(swizzle13), half_length_vec.length); + swizzle13, ARRAY_SIZE(swizzle13), 4, half_length_vec.length); LLVMValueRef ds2dxdy_dt2dxdy = lp_build_mul(&half_length_bld, dsdx_dtdx, dsdy_dtdy); LLVMValueRef dot_product = lp_build_add(&quarter_length_bld, lp_build_swizzle_aos_n(gallivm, ds2dxdy_dt2dxdy, - swizzle0, ARRAY_SIZE(swizzle0), quarter_length_vec.length), + swizzle0, ARRAY_SIZE(swizzle0), 2, quarter_length_vec.length), lp_build_swizzle_aos_n(gallivm, ds2dxdy_dt2dxdy, - swizzle1, ARRAY_SIZE(swizzle1), quarter_length_vec.length)); + swizzle1, ARRAY_SIZE(swizzle1), 2, quarter_length_vec.length)); LLVMValueRef abs_dot_product = lp_build_abs(&quarter_length_bld, dot_product); LLVMValueRef zero_dot_product = lp_build_cmp(&quarter_length_bld, PIPE_FUNC_LESS, abs_dot_product, lp_build_const_vec(gallivm, quarter_length_vec, epsilon)); @@ -395,27 +395,27 @@ lp_apply_ellipse_transform(struct lp_build_context *bld, static const unsigned char swizzle20[] = { 2, 0 }; static const unsigned char swizzle31[] = { 3, 1 }; LLVMValueRef dt2dx_ds2dx = lp_build_swizzle_aos_n(gallivm, ds2dx_ds2dy_dt2dx_dt2dy, - swizzle20, ARRAY_SIZE(swizzle20), half_length_vec.length); + swizzle20, ARRAY_SIZE(swizzle20), 4, half_length_vec.length); LLVMValueRef dt2dy_ds2dy = lp_build_swizzle_aos_n(gallivm, ds2dx_ds2dy_dt2dx_dt2dy, - swizzle31, ARRAY_SIZE(swizzle31), half_length_vec.length); + swizzle31, ARRAY_SIZE(swizzle31), 4, half_length_vec.length); LLVMValueRef A_C = lp_build_add(&half_length_bld, dt2dx_ds2dx, dt2dy_ds2dy); LLVMValueRef A = lp_build_swizzle_aos_n(gallivm, A_C, - swizzle0, ARRAY_SIZE(swizzle0), quarter_length_vec.length); + swizzle0, ARRAY_SIZE(swizzle0), 2, quarter_length_vec.length); LLVMValueRef C = lp_build_swizzle_aos_n(gallivm, A_C, - swizzle1, ARRAY_SIZE(swizzle1), quarter_length_vec.length); + swizzle1, ARRAY_SIZE(swizzle1), 2, quarter_length_vec.length); /* * float B = -2.0 * (dx.s * dx.t + dy.s * dy.t) */ LLVMValueRef dtdx_dtdy = lp_build_swizzle_aos_n(gallivm, dsdx_dsdy_dtdx_dtdy, - swizzle23, ARRAY_SIZE(swizzle23), half_length_vec.length); + swizzle23, ARRAY_SIZE(swizzle23), 4, half_length_vec.length); LLVMValueRef dstdx_dstdy = lp_build_mul(&half_length_bld, dsdx_dsdy, dtdx_dtdy); LLVMValueRef B = lp_build_mul(&quarter_length_bld, lp_build_const_vec(gallivm, quarter_length_vec, -2), lp_build_add(&quarter_length_bld, lp_build_swizzle_aos_n(gallivm, dstdx_dstdy, - swizzle0, ARRAY_SIZE(swizzle0), quarter_length_vec.length), + swizzle0, ARRAY_SIZE(swizzle0), 2, quarter_length_vec.length), lp_build_swizzle_aos_n(gallivm, dstdx_dstdy, - swizzle1, ARRAY_SIZE(swizzle1), quarter_length_vec.length))); + swizzle1, ARRAY_SIZE(swizzle1), 2, quarter_length_vec.length))); /* * float F = (dx.s * dy.t - dy.s * dx.t) * (dx.s * dy.t - dy.s * dx.t) @@ -534,9 +534,9 @@ lp_build_rho_aniso(struct lp_build_sample_context *bld, LLVMValueRef int_size = lp_build_minify(int_size_bld, bld->int_size, first_level, true); LLVMValueRef float_size = lp_build_int_to_float(float_size_bld, int_size); - static const unsigned char swizzle01[] = { 0, 1 }; - LLVMValueRef width_height = lp_build_swizzle_aos_n(gallivm, float_size, - swizzle01, ARRAY_SIZE(swizzle01), full_length_vec.length); + static const unsigned char swizzle0011[] = { 0, 0, 1,1 }; + LLVMValueRef w_w_h_h = lp_build_swizzle_aos_n(gallivm, float_size, + swizzle0011, ARRAY_SIZE(swizzle0011), 0, full_length_vec.length); LLVMValueRef dsdx_dsdy_dtdx_dtdy; if (derivs) { @@ -569,27 +569,28 @@ lp_build_rho_aniso(struct lp_build_sample_context *bld, 0, 1, 2,3, }; dsdx_dsdy_dtdx_dtdy = lp_build_swizzle_aos_n(gallivm, dsdx_dsdy_dtdx_dtdy, - broadcast4, ARRAY_SIZE(broadcast4), full_length_vec.length); + broadcast4, ARRAY_SIZE(broadcast4), 4, full_length_vec.length); } } - dsdx_dsdy_dtdx_dtdy = lp_build_mul(&full_length_bld, dsdx_dsdy_dtdx_dtdy, width_height); + dsdx_dsdy_dtdx_dtdy = lp_build_mul(&full_length_bld, dsdx_dsdy_dtdx_dtdy, w_w_h_h); LLVMValueRef ds2dx_ds2dy_dt2dx_dt2dy = lp_apply_ellipse_transform(&full_length_bld, dsdx_dsdy_dtdx_dtdy); + static const unsigned char swizzle01[] = { 0, 1 }; static const unsigned char swizzle23[] = { 2, 3 }; LLVMValueRef ds2dx_ds2dy = lp_build_swizzle_aos_n(gallivm, ds2dx_ds2dy_dt2dx_dt2dy, - swizzle01, ARRAY_SIZE(swizzle01), half_length_vec.length); + swizzle01, ARRAY_SIZE(swizzle01), 4, half_length_vec.length); LLVMValueRef dt2dx_dt2dy = lp_build_swizzle_aos_n(gallivm, ds2dx_ds2dy_dt2dx_dt2dy, - swizzle23, ARRAY_SIZE(swizzle23), half_length_vec.length); + swizzle23, ARRAY_SIZE(swizzle23), 4, half_length_vec.length); LLVMValueRef rho_x2_rho_y2 = lp_build_add(&half_length_bld, ds2dx_ds2dy, dt2dx_dt2dy); static const unsigned char swizzle0[] = { 0 }; static const unsigned char swizzle1[] = { 1 }; LLVMValueRef rho_x2 = lp_build_swizzle_aos_n(gallivm, rho_x2_rho_y2, - swizzle0, ARRAY_SIZE(swizzle0), quarter_length_vec.length); + swizzle0, ARRAY_SIZE(swizzle0), 2, quarter_length_vec.length); LLVMValueRef rho_y2 = lp_build_swizzle_aos_n(gallivm, rho_x2_rho_y2, - swizzle1, ARRAY_SIZE(swizzle1), quarter_length_vec.length); + swizzle1, ARRAY_SIZE(swizzle1), 2, quarter_length_vec.length); LLVMValueRef rho_max2 = lp_build_max(&quarter_length_bld, rho_x2, rho_y2); LLVMValueRef rho_min2 = lp_build_min(&quarter_length_bld, rho_x2, rho_y2); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c index ae466e90e6d..6f0e25adff1 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c @@ -304,9 +304,16 @@ lp_build_swizzle_scalar_aos(struct lp_build_context *bld, * Swizzle a vector consisting of an array of XYZW structs. * * This fills a vector of dst_len length with the swizzled channels from src. + * Swizzle values are repeated as many times as necessary to fill the dst_len. + * The "stride" value is used to increment a base index every time the list of + * swizzle repeats. Once its value equals or exceeds the source length, all + * further values are "undef". * - * e.g. with swizzles = { 2, 1, 0 } and swizzle_count = 6 results in - * RGBA RGBA = BGR BGR BG + * e.g. with swizzles = { 2, 1, 0 }, stride = 0, dst_len = 6 results in + * R1G1B1A1 R2G2B2A2 = B1G1R1 B1G1R1 B1G1 + * + * e.g. with swizzles = { 2, 1, 0 }, stride = 4, dst_len = 6 results in + * R1G1B1A1 R2G2B2A2 = B1G1R1 B2G2R2 undef undef * * @param swizzles the swizzle array * @param num_swizzles the number of elements in swizzles @@ -317,6 +324,7 @@ lp_build_swizzle_aos_n(struct gallivm_state* gallivm, LLVMValueRef src, const unsigned char* swizzles, unsigned num_swizzles, + unsigned stride, unsigned dst_len) { LLVMBuilderRef builder = gallivm->builder; @@ -329,8 +337,20 @@ lp_build_swizzle_aos_n(struct gallivm_state* gallivm, lp_build_const_int32(gallivm, swizzles[0]), ""); } + const unsigned src_len = LLVMGetVectorSize(LLVMTypeOf(src)); + + unsigned base_index = 0; for (unsigned i = 0; i < dst_len; ++i) { - int swizzle = swizzles[i % num_swizzles]; + const unsigned local_index = i % num_swizzles; + unsigned swizzle = swizzles[local_index]; + swizzle += base_index; + if (local_index + 1 == num_swizzles) { + base_index += stride; + } + + if (swizzle >= src_len) { + swizzle = LP_BLD_SWIZZLE_DONTCARE; + } if (swizzle == LP_BLD_SWIZZLE_DONTCARE) { shuffles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h index 76a3d257bde..4153c6a7365 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h @@ -96,6 +96,7 @@ lp_build_swizzle_aos_n(struct gallivm_state* gallivm, LLVMValueRef src, const unsigned char* swizzles, unsigned num_swizzles, + unsigned stride, unsigned dst_len); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index df9db51babc..3b422ab2788 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -1589,7 +1589,7 @@ generate_fs_twiddle(struct gallivm_state *gallivm, for (unsigned i = 0; i < src_count; ++i) { dst[i] = lp_build_swizzle_aos_n(gallivm, dst[i], swizzles, - type.length, type.length); + type.length, 0, type.length); } } @@ -2877,11 +2877,11 @@ generate_unswizzled_blend(struct gallivm_state *gallivm, if (pad_inline) { /* Use all 4 channels e.g. from RGBA RGBA to RGxx RGxx */ blend_color = lp_build_swizzle_aos_n(gallivm, blend_color, swizzle, - TGSI_NUM_CHANNELS, row_type.length); + TGSI_NUM_CHANNELS, 0, row_type.length); } else { /* Only use dst_channels e.g. RGBA RGBA to RG RG xxxx */ blend_color = lp_build_swizzle_aos_n(gallivm, blend_color, swizzle, - dst_channels, row_type.length); + dst_channels, 0, row_type.length); } /*