diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 7a01528e36a..f7f89ca2ed9 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -467,7 +467,6 @@ static void fs_fb_fetch(const struct lp_build_fs_iface *iface, LLVMValueRef color_ptr = LLVMBuildLoad(builder, LLVMBuildGEP(builder, fs_iface->color_ptr_ptr, &index, 1, ""), ""); LLVMValueRef stride = LLVMBuildLoad(builder, LLVMBuildGEP(builder, fs_iface->color_stride_ptr, &index, 1, ""), ""); - LLVMValueRef dst[4 * 4]; enum pipe_format cbuf_format = key->cbuf_format[cbuf]; const struct util_format_description* out_format_desc = util_format_description(cbuf_format); if (out_format_desc->format == PIPE_FORMAT_NONE) { @@ -475,46 +474,10 @@ static void fs_fb_fetch(const struct lp_build_fs_iface *iface, return; } - struct lp_type dst_type; unsigned block_size = bld->type.length; unsigned block_height = key->resource_1d ? 1 : 2; unsigned block_width = block_size / block_height; - lp_mem_type_from_format_desc(out_format_desc, &dst_type); - - struct lp_type blend_type; - memset(&blend_type, 0, sizeof blend_type); - blend_type.floating = FALSE; /* values are integers */ - blend_type.sign = FALSE; /* values are unsigned */ - blend_type.norm = TRUE; /* values are in [0,1] or [-1,1] */ - blend_type.width = 8; /* 8-bit ubyte values */ - blend_type.length = 16; /* 16 elements per vector */ - - uint32_t dst_alignment; - /* - * Compute the alignment of the destination pointer in bytes - * We fetch 1-4 pixels, if the format has pot alignment then those fetches - * are always aligned by MIN2(16, fetch_width) except for buffers (not - * 1d tex but can't distinguish here) so need to stick with per-pixel - * alignment in this case. - */ - if (key->resource_1d) { - dst_alignment = (out_format_desc->block.bits + 7)/(out_format_desc->block.width * 8); - } - else { - dst_alignment = dst_type.length * dst_type.width / 8; - } - /* Force power-of-two alignment by extracting only the least-significant-bit */ - dst_alignment = 1 << (ffs(dst_alignment) - 1); - /* - * Resource base and stride pointers are aligned to 16 bytes, so that's - * the maximum alignment we can guarantee - */ - dst_alignment = MIN2(16, dst_alignment); - - LLVMTypeRef blend_vec_type = lp_build_vec_type(gallivm, blend_type); - color_ptr = LLVMBuildBitCast(builder, color_ptr, LLVMPointerType(blend_vec_type, 0), ""); - if (key->multisample) { LLVMValueRef sample_stride = LLVMBuildLoad(builder, LLVMBuildGEP(builder, fs_iface->color_sample_stride_ptr, @@ -536,12 +499,36 @@ static void fs_fb_fetch(const struct lp_build_fs_iface *iface, } y_offset = LLVMBuildMul(builder, counter, lp_build_const_int32(gallivm, 2), ""); } - load_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height, dst, dst_type, block_size, dst_alignment, x_offset, y_offset, true); + LLVMValueRef offsets[4 * 4]; for (unsigned i = 0; i < block_size; i++) { - dst[i] = LLVMBuildBitCast(builder, dst[i], LLVMInt32TypeInContext(gallivm->context), ""); + unsigned x = i % block_width; + unsigned y = i / block_width; + + if (block_size == 8) { + /* remap the raw slots into the fragment shader execution mode. */ + /* this math took me way too long to work out, I'm sure it's overkill. */ + x = (i & 1) + ((i >> 2) << 1); + if (!key->resource_1d) + y = (i & 2) >> 1; + } + + LLVMValueRef x_val; + if (x_offset) { + x_val = LLVMBuildAdd(builder, lp_build_const_int32(gallivm, x), x_offset, ""); + x_val = LLVMBuildMul(builder, x_val, lp_build_const_int32(gallivm, out_format_desc->block.bits / 8), ""); + } else { + x_val = lp_build_const_int32(gallivm, x * (out_format_desc->block.bits / 8)); + } + + LLVMValueRef y_val = lp_build_const_int32(gallivm, y); + if (y_offset) + y_val = LLVMBuildAdd(builder, y_val, y_offset, ""); + y_val = LLVMBuildMul(builder, y_val, stride, ""); + + offsets[i] = LLVMBuildAdd(builder, x_val, y_val, ""); } - LLVMValueRef packed = lp_build_gather_values(gallivm, dst, block_size); + LLVMValueRef offset = lp_build_gather_values(gallivm, offsets, block_size); struct lp_type texel_type = bld->type; if (out_format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB && @@ -553,9 +540,10 @@ static void fs_fb_fetch(const struct lp_build_fs_iface *iface, texel_type = lp_type_uint_vec(bld->type.width, bld->type.width * bld->type.length); } } - lp_build_unpack_rgba_soa(gallivm, out_format_desc, - texel_type, - packed, result); + + lp_build_fetch_rgba_soa(gallivm, out_format_desc, texel_type, + true, color_ptr, offset, + NULL, NULL, NULL, result); } /**